update

2022-01-10 15:19:31 -05:00 · 2022-01-10 15:19:31 -05:00 · 5af9fddd54
commit 5af9fddd54
66 changed files with 77148 additions and 0 deletions
--- a/.check-build
+++ b/.check-build
@ -0,0 +1,127 @@
+#!/usr/bin/env bash
+
+set -eu
+
+REFERENCE_FILES=(
+    # lab 1
+    src/mrapps/crash.go
+    src/mrapps/indexer.go
+    src/mrapps/mtiming.go
+    src/mrapps/nocrash.go
+    src/mrapps/rtiming.go
+    src/mrapps/wc.go
+    src/main/mrsequential.go
+    src/main/mrcoordinator.go
+    src/main/mrworker.go
+
+    # lab 2
+    src/raft/persister.go
+    src/raft/test_test.go
+    src/raft/config.go
+    src/labrpc/labrpc.go
+
+    # lab 3
+    src/kvraft/test_test.go
+    src/kvraft/config.go
+
+    # lab 4a
+    src/shardctrler/test_test.go
+    src/shardctrler/config.go
+
+    # lab 4b
+    src/shardkv/test_test.go
+    src/shardkv/config.go
+)
+
+main() {
+    upstream="$1"
+    labnum="$2"
+
+    # make sure we have reference copy of lab, in FETCH_HEAD
+    git fetch "$upstream" 2>/dev/null || die "unable to git fetch $upstream"
+
+    # copy existing directory
+    tmpdir="$(mktemp -d)"
+    find src -type s -delete # cp can't copy sockets
+    cp -r src "$tmpdir"
+    orig="$PWD"
+    cd "$tmpdir"
+
+    # check out reference files
+    for f in ${REFERENCE_FILES[@]}; do
+        mkdir -p "$(dirname $f)"
+        git --git-dir="$orig/.git" show "FETCH_HEAD:$f" > "$f"
+    done
+
+    case $labnum in
+        "lab1") check_lab1;;
+        "lab2a"|"lab2b"|"lab2c"|"lab2d") check_lab2;;
+        "lab3a"|"lab3b") check_lab3;;
+        "lab4a") check_lab4a;;
+        "lab4b") check_lab4b;;
+        *) die "unknown lab: $labnum";;
+    esac
+
+    cd
+    rm -rf "$tmpdir"
+}
+
+check_lab1() {
+    check_cmd cd src/mrapps
+    check_cmd go build -buildmode=plugin wc.go
+    check_cmd go build -buildmode=plugin indexer.go
+    check_cmd go build -buildmode=plugin mtiming.go
+    check_cmd go build -buildmode=plugin rtiming.go
+    check_cmd go build -buildmode=plugin crash.go
+    check_cmd go build -buildmode=plugin nocrash.go
+    check_cmd cd ../main
+    check_cmd go build mrcoordinator.go
+    check_cmd go build mrworker.go
+    check_cmd go build mrsequential.go
+}
+
+check_lab2() {
+    check_cmd cd src/raft
+    check_cmd go test -c
+}
+
+check_lab3() {
+    check_cmd cd src/kvraft
+    check_cmd go test -c
+}
+
+check_lab4a() {
+    check_cmd cd src/shardctrler
+    check_cmd go test -c
+}
+
+check_lab4b() {
+    check_cmd cd src/shardkv
+    check_cmd go test -c
+    # also check other labs/parts
+    cd "$tmpdir"
+    check_lab4a
+    cd "$tmpdir"
+    check_lab3
+    cd "$tmpdir"
+    check_lab2
+}
+
+check_cmd() {
+    if ! "$@" >/dev/null 2>&1; then
+        echo "We tried building your source code with testing-related files reverted to original versions, and the build failed. This copy of your code is preserved in $tmpdir for debugging purposes. Please make sure the code you are trying to hand in does not make changes to test code." >&2
+        echo >&2
+        echo "The build failed while trying to run the following command:" >&2
+        echo >&2
+        echo "$ $@" >&2
+        echo "  (cwd: ${PWD#$tmpdir/})" >&2
+        exit 1
+    fi
+}
+
+die() {
+    echo "$1" >&2
+    exit 1
+}
+
+main "$@"
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,4 @@
+pkg/
+api.key
+.api.key.trimmed
+*-handin.tar.gz
--- a/45
+++ b/45
@ -0,0 +1,45 @@
+# This is the Makefile helping you submit the labs.
+# Just create 6.824/api.key with your API key in it,
+# and submit your lab with the following command:
+#     $ make [lab1|lab2a|lab2b|lab2c|lab2d|lab3a|lab3b|lab4a|lab4b]
+
+LABS=" lab1 lab2a lab2b lab2c lab2d lab3a lab3b lab4a lab4b "
+
+%: check-%
+	@echo "Preparing $@-handin.tar.gz"
+	@if echo $(LABS) | grep -q " $@ " ; then \
+		echo "Tarring up your submission..." ; \
+		COPYFILE_DISABLE=1 tar cvzf $@-handin.tar.gz \
+			"--exclude=src/main/pg-*.txt" \
+			"--exclude=src/main/diskvd" \
+			"--exclude=src/mapreduce/824-mrinput-*.txt" \
+			"--exclude=src/main/mr-*" \
+			"--exclude=mrtmp.*" \
+			"--exclude=src/main/diff.out" \
+			"--exclude=src/main/mrmaster" \
+			"--exclude=src/main/mrsequential" \
+			"--exclude=src/main/mrworker" \
+			"--exclude=*.so" \
+			Makefile src; \
+		if ! test -e api.key ; then \
+			echo "Missing $(PWD)/api.key. Please create the file with your key in it or submit the $@-handin.tar.gz via the web interface."; \
+		else \
+			echo "Are you sure you want to submit $@? Enter 'yes' to continue:"; \
+			read line; \
+			if test "$$line" != "yes" ; then echo "Giving up submission"; exit; fi; \
+			if test `stat -c "%s" "$@-handin.tar.gz" 2>/dev/null || stat -f "%z" "$@-handin.tar.gz"` -ge 20971520 ; then echo "File exceeds 20MB."; exit; fi; \
+			cat api.key | tr -d '\n' > .api.key.trimmed ; \
+			curl --silent --fail --show-error -F file=@$@-handin.tar.gz -F "key=<.api.key.trimmed" \
+			https://6824.scripts.mit.edu/2022/handin.py/upload > /dev/null || { \
+				echo ; \
+				echo "Submit seems to have failed."; \
+				echo "Please upload the tarball manually on the submission website."; } \
+		fi; \
+	else \
+		echo "Bad target $@. Usage: make [$(LABS)]"; \
+	fi
+
+.PHONY: check-%
+check-%:
+	@echo "Checking that your submission builds correctly..."
+	@./.check-build git://g.csail.mit.edu/6.824-golabs-2022 $(patsubst check-%,%,$@)
--- a/src/.gitignore
+++ b/src/.gitignore
@ -0,0 +1,12 @@
+*.*/
+main/mr-tmp/
+mrtmp.*
+824-mrinput-*.txt
+/main/diff.out
+/mapreduce/x.txt
+/pbservice/x.txt
+/kvpaxos/x.txt
+*.so
+/main/mrcoordinator
+/main/mrsequential
+/main/mrworker
--- a/src/go.mod
+++ b/src/go.mod
@ -0,0 +1,3 @@
+module 6.824
+
+go 1.15
--- a/src/go.sum
+++ b/src/go.sum
--- a/src/kvraft/client.go
+++ b/src/kvraft/client.go
@ -0,0 +1,64 @@
+package kvraft
+
+import "6.824/labrpc"
+import "crypto/rand"
+import "math/big"
+
+
+type Clerk struct {
+	servers []*labrpc.ClientEnd
+	// You will have to modify this struct.
+}
+
+func nrand() int64 {
+	max := big.NewInt(int64(1) << 62)
+	bigx, _ := rand.Int(rand.Reader, max)
+	x := bigx.Int64()
+	return x
+}
+
+func MakeClerk(servers []*labrpc.ClientEnd) *Clerk {
+	ck := new(Clerk)
+	ck.servers = servers
+	// You'll have to add code here.
+	return ck
+}
+
+//
+// fetch the current value for a key.
+// returns "" if the key does not exist.
+// keeps trying forever in the face of all other errors.
+//
+// you can send an RPC with code like this:
+// ok := ck.servers[i].Call("KVServer.Get", &args, &reply)
+//
+// the types of args and reply (including whether they are pointers)
+// must match the declared types of the RPC handler function's
+// arguments. and reply must be passed as a pointer.
+//
+func (ck *Clerk) Get(key string) string {
+
+	// You will have to modify this function.
+	return ""
+}
+
+//
+// shared by Put and Append.
+//
+// you can send an RPC with code like this:
+// ok := ck.servers[i].Call("KVServer.PutAppend", &args, &reply)
+//
+// the types of args and reply (including whether they are pointers)
+// must match the declared types of the RPC handler function's
+// arguments. and reply must be passed as a pointer.
+//
+func (ck *Clerk) PutAppend(key string, value string, op string) {
+	// You will have to modify this function.
+}
+
+func (ck *Clerk) Put(key string, value string) {
+	ck.PutAppend(key, value, "Put")
+}
+func (ck *Clerk) Append(key string, value string) {
+	ck.PutAppend(key, value, "Append")
+}
--- a/src/kvraft/common.go
+++ b/src/kvraft/common.go
@ -0,0 +1,33 @@
+package kvraft
+
+const (
+	OK             = "OK"
+	ErrNoKey       = "ErrNoKey"
+	ErrWrongLeader = "ErrWrongLeader"
+)
+
+type Err string
+
+// Put or Append
+type PutAppendArgs struct {
+	Key   string
+	Value string
+	Op    string // "Put" or "Append"
+	// You'll have to add definitions here.
+	// Field names must start with capital letters,
+	// otherwise RPC will break.
+}
+
+type PutAppendReply struct {
+	Err Err
+}
+
+type GetArgs struct {
+	Key string
+	// You'll have to add definitions here.
+}
+
+type GetReply struct {
+	Err   Err
+	Value string
+}
--- a/src/kvraft/config.go
+++ b/src/kvraft/config.go
@ -0,0 +1,425 @@
+package kvraft
+
+import "6.824/labrpc"
+import "testing"
+import "os"
+
+// import "log"
+import crand "crypto/rand"
+import "math/big"
+import "math/rand"
+import "encoding/base64"
+import "sync"
+import "runtime"
+import "6.824/raft"
+import "fmt"
+import "time"
+import "sync/atomic"
+
+func randstring(n int) string {
+	b := make([]byte, 2*n)
+	crand.Read(b)
+	s := base64.URLEncoding.EncodeToString(b)
+	return s[0:n]
+}
+
+func makeSeed() int64 {
+	max := big.NewInt(int64(1) << 62)
+	bigx, _ := crand.Int(crand.Reader, max)
+	x := bigx.Int64()
+	return x
+}
+
+// Randomize server handles
+func random_handles(kvh []*labrpc.ClientEnd) []*labrpc.ClientEnd {
+	sa := make([]*labrpc.ClientEnd, len(kvh))
+	copy(sa, kvh)
+	for i := range sa {
+		j := rand.Intn(i + 1)
+		sa[i], sa[j] = sa[j], sa[i]
+	}
+	return sa
+}
+
+type config struct {
+	mu           sync.Mutex
+	t            *testing.T
+	net          *labrpc.Network
+	n            int
+	kvservers    []*KVServer
+	saved        []*raft.Persister
+	endnames     [][]string // names of each server's sending ClientEnds
+	clerks       map[*Clerk][]string
+	nextClientId int
+	maxraftstate int
+	start        time.Time // time at which make_config() was called
+	// begin()/end() statistics
+	t0    time.Time // time at which test_test.go called cfg.begin()
+	rpcs0 int       // rpcTotal() at start of test
+	ops   int32     // number of clerk get/put/append method calls
+}
+
+func (cfg *config) checkTimeout() {
+	// enforce a two minute real-time limit on each test
+	if !cfg.t.Failed() && time.Since(cfg.start) > 120*time.Second {
+		cfg.t.Fatal("test took longer than 120 seconds")
+	}
+}
+
+func (cfg *config) cleanup() {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+	for i := 0; i < len(cfg.kvservers); i++ {
+		if cfg.kvservers[i] != nil {
+			cfg.kvservers[i].Kill()
+		}
+	}
+	cfg.net.Cleanup()
+	cfg.checkTimeout()
+}
+
+// Maximum log size across all servers
+func (cfg *config) LogSize() int {
+	logsize := 0
+	for i := 0; i < cfg.n; i++ {
+		n := cfg.saved[i].RaftStateSize()
+		if n > logsize {
+			logsize = n
+		}
+	}
+	return logsize
+}
+
+// Maximum snapshot size across all servers
+func (cfg *config) SnapshotSize() int {
+	snapshotsize := 0
+	for i := 0; i < cfg.n; i++ {
+		n := cfg.saved[i].SnapshotSize()
+		if n > snapshotsize {
+			snapshotsize = n
+		}
+	}
+	return snapshotsize
+}
+
+// attach server i to servers listed in to
+// caller must hold cfg.mu
+func (cfg *config) connectUnlocked(i int, to []int) {
+	// log.Printf("connect peer %d to %v\n", i, to)
+
+	// outgoing socket files
+	for j := 0; j < len(to); j++ {
+		endname := cfg.endnames[i][to[j]]
+		cfg.net.Enable(endname, true)
+	}
+
+	// incoming socket files
+	for j := 0; j < len(to); j++ {
+		endname := cfg.endnames[to[j]][i]
+		cfg.net.Enable(endname, true)
+	}
+}
+
+func (cfg *config) connect(i int, to []int) {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+	cfg.connectUnlocked(i, to)
+}
+
+// detach server i from the servers listed in from
+// caller must hold cfg.mu
+func (cfg *config) disconnectUnlocked(i int, from []int) {
+	// log.Printf("disconnect peer %d from %v\n", i, from)
+
+	// outgoing socket files
+	for j := 0; j < len(from); j++ {
+		if cfg.endnames[i] != nil {
+			endname := cfg.endnames[i][from[j]]
+			cfg.net.Enable(endname, false)
+		}
+	}
+
+	// incoming socket files
+	for j := 0; j < len(from); j++ {
+		if cfg.endnames[j] != nil {
+			endname := cfg.endnames[from[j]][i]
+			cfg.net.Enable(endname, false)
+		}
+	}
+}
+
+func (cfg *config) disconnect(i int, from []int) {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+	cfg.disconnectUnlocked(i, from)
+}
+
+func (cfg *config) All() []int {
+	all := make([]int, cfg.n)
+	for i := 0; i < cfg.n; i++ {
+		all[i] = i
+	}
+	return all
+}
+
+func (cfg *config) ConnectAll() {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+	for i := 0; i < cfg.n; i++ {
+		cfg.connectUnlocked(i, cfg.All())
+	}
+}
+
+// Sets up 2 partitions with connectivity between servers in each  partition.
+func (cfg *config) partition(p1 []int, p2 []int) {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+	// log.Printf("partition servers into: %v %v\n", p1, p2)
+	for i := 0; i < len(p1); i++ {
+		cfg.disconnectUnlocked(p1[i], p2)
+		cfg.connectUnlocked(p1[i], p1)
+	}
+	for i := 0; i < len(p2); i++ {
+		cfg.disconnectUnlocked(p2[i], p1)
+		cfg.connectUnlocked(p2[i], p2)
+	}
+}
+
+// Create a clerk with clerk specific server names.
+// Give it connections to all of the servers, but for
+// now enable only connections to servers in to[].
+func (cfg *config) makeClient(to []int) *Clerk {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+
+	// a fresh set of ClientEnds.
+	ends := make([]*labrpc.ClientEnd, cfg.n)
+	endnames := make([]string, cfg.n)
+	for j := 0; j < cfg.n; j++ {
+		endnames[j] = randstring(20)
+		ends[j] = cfg.net.MakeEnd(endnames[j])
+		cfg.net.Connect(endnames[j], j)
+	}
+
+	ck := MakeClerk(random_handles(ends))
+	cfg.clerks[ck] = endnames
+	cfg.nextClientId++
+	cfg.ConnectClientUnlocked(ck, to)
+	return ck
+}
+
+func (cfg *config) deleteClient(ck *Clerk) {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+
+	v := cfg.clerks[ck]
+	for i := 0; i < len(v); i++ {
+		os.Remove(v[i])
+	}
+	delete(cfg.clerks, ck)
+}
+
+// caller should hold cfg.mu
+func (cfg *config) ConnectClientUnlocked(ck *Clerk, to []int) {
+	// log.Printf("ConnectClient %v to %v\n", ck, to)
+	endnames := cfg.clerks[ck]
+	for j := 0; j < len(to); j++ {
+		s := endnames[to[j]]
+		cfg.net.Enable(s, true)
+	}
+}
+
+func (cfg *config) ConnectClient(ck *Clerk, to []int) {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+	cfg.ConnectClientUnlocked(ck, to)
+}
+
+// caller should hold cfg.mu
+func (cfg *config) DisconnectClientUnlocked(ck *Clerk, from []int) {
+	// log.Printf("DisconnectClient %v from %v\n", ck, from)
+	endnames := cfg.clerks[ck]
+	for j := 0; j < len(from); j++ {
+		s := endnames[from[j]]
+		cfg.net.Enable(s, false)
+	}
+}
+
+func (cfg *config) DisconnectClient(ck *Clerk, from []int) {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+	cfg.DisconnectClientUnlocked(ck, from)
+}
+
+// Shutdown a server by isolating it
+func (cfg *config) ShutdownServer(i int) {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+
+	cfg.disconnectUnlocked(i, cfg.All())
+
+	// disable client connections to the server.
+	// it's important to do this before creating
+	// the new Persister in saved[i], to avoid
+	// the possibility of the server returning a
+	// positive reply to an Append but persisting
+	// the result in the superseded Persister.
+	cfg.net.DeleteServer(i)
+
+	// a fresh persister, in case old instance
+	// continues to update the Persister.
+	// but copy old persister's content so that we always
+	// pass Make() the last persisted state.
+	if cfg.saved[i] != nil {
+		cfg.saved[i] = cfg.saved[i].Copy()
+	}
+
+	kv := cfg.kvservers[i]
+	if kv != nil {
+		cfg.mu.Unlock()
+		kv.Kill()
+		cfg.mu.Lock()
+		cfg.kvservers[i] = nil
+	}
+}
+
+// If restart servers, first call ShutdownServer
+func (cfg *config) StartServer(i int) {
+	cfg.mu.Lock()
+
+	// a fresh set of outgoing ClientEnd names.
+	cfg.endnames[i] = make([]string, cfg.n)
+	for j := 0; j < cfg.n; j++ {
+		cfg.endnames[i][j] = randstring(20)
+	}
+
+	// a fresh set of ClientEnds.
+	ends := make([]*labrpc.ClientEnd, cfg.n)
+	for j := 0; j < cfg.n; j++ {
+		ends[j] = cfg.net.MakeEnd(cfg.endnames[i][j])
+		cfg.net.Connect(cfg.endnames[i][j], j)
+	}
+
+	// a fresh persister, so old instance doesn't overwrite
+	// new instance's persisted state.
+	// give the fresh persister a copy of the old persister's
+	// state, so that the spec is that we pass StartKVServer()
+	// the last persisted state.
+	if cfg.saved[i] != nil {
+		cfg.saved[i] = cfg.saved[i].Copy()
+	} else {
+		cfg.saved[i] = raft.MakePersister()
+	}
+	cfg.mu.Unlock()
+
+	cfg.kvservers[i] = StartKVServer(ends, i, cfg.saved[i], cfg.maxraftstate)
+
+	kvsvc := labrpc.MakeService(cfg.kvservers[i])
+	rfsvc := labrpc.MakeService(cfg.kvservers[i].rf)
+	srv := labrpc.MakeServer()
+	srv.AddService(kvsvc)
+	srv.AddService(rfsvc)
+	cfg.net.AddServer(i, srv)
+}
+
+func (cfg *config) Leader() (bool, int) {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+
+	for i := 0; i < cfg.n; i++ {
+		_, is_leader := cfg.kvservers[i].rf.GetState()
+		if is_leader {
+			return true, i
+		}
+	}
+	return false, 0
+}
+
+// Partition servers into 2 groups and put current leader in minority
+func (cfg *config) make_partition() ([]int, []int) {
+	_, l := cfg.Leader()
+	p1 := make([]int, cfg.n/2+1)
+	p2 := make([]int, cfg.n/2)
+	j := 0
+	for i := 0; i < cfg.n; i++ {
+		if i != l {
+			if j < len(p1) {
+				p1[j] = i
+			} else {
+				p2[j-len(p1)] = i
+			}
+			j++
+		}
+	}
+	p2[len(p2)-1] = l
+	return p1, p2
+}
+
+var ncpu_once sync.Once
+
+func make_config(t *testing.T, n int, unreliable bool, maxraftstate int) *config {
+	ncpu_once.Do(func() {
+		if runtime.NumCPU() < 2 {
+			fmt.Printf("warning: only one CPU, which may conceal locking bugs\n")
+		}
+		rand.Seed(makeSeed())
+	})
+	runtime.GOMAXPROCS(4)
+	cfg := &config{}
+	cfg.t = t
+	cfg.net = labrpc.MakeNetwork()
+	cfg.n = n
+	cfg.kvservers = make([]*KVServer, cfg.n)
+	cfg.saved = make([]*raft.Persister, cfg.n)
+	cfg.endnames = make([][]string, cfg.n)
+	cfg.clerks = make(map[*Clerk][]string)
+	cfg.nextClientId = cfg.n + 1000 // client ids start 1000 above the highest serverid
+	cfg.maxraftstate = maxraftstate
+	cfg.start = time.Now()
+
+	// create a full set of KV servers.
+	for i := 0; i < cfg.n; i++ {
+		cfg.StartServer(i)
+	}
+
+	cfg.ConnectAll()
+
+	cfg.net.Reliable(!unreliable)
+
+	return cfg
+}
+
+func (cfg *config) rpcTotal() int {
+	return cfg.net.GetTotalCount()
+}
+
+// start a Test.
+// print the Test message.
+// e.g. cfg.begin("Test (2B): RPC counts aren't too high")
+func (cfg *config) begin(description string) {
+	fmt.Printf("%s ...\n", description)
+	cfg.t0 = time.Now()
+	cfg.rpcs0 = cfg.rpcTotal()
+	atomic.StoreInt32(&cfg.ops, 0)
+}
+
+func (cfg *config) op() {
+	atomic.AddInt32(&cfg.ops, 1)
+}
+
+// end a Test -- the fact that we got here means there
+// was no failure.
+// print the Passed message,
+// and some performance numbers.
+func (cfg *config) end() {
+	cfg.checkTimeout()
+	if cfg.t.Failed() == false {
+		t := time.Since(cfg.t0).Seconds()  // real time
+		npeers := cfg.n                    // number of Raft peers
+		nrpc := cfg.rpcTotal() - cfg.rpcs0 // number of RPC sends
+		ops := atomic.LoadInt32(&cfg.ops)  //  number of clerk get/put/append calls
+
+		fmt.Printf("  ... Passed --")
+		fmt.Printf("  %4.1f  %d %5d %4d\n", t, npeers, nrpc, ops)
+	}
+}
--- a/src/kvraft/server.go
+++ b/src/kvraft/server.go
@ -0,0 +1,101 @@
+package kvraft
+
+import (
+	"6.824/labgob"
+	"6.824/labrpc"
+	"6.824/raft"
+	"log"
+	"sync"
+	"sync/atomic"
+)
+
+const Debug = false
+
+func DPrintf(format string, a ...interface{}) (n int, err error) {
+	if Debug {
+		log.Printf(format, a...)
+	}
+	return
+}
+
+
+type Op struct {
+	// Your definitions here.
+	// Field names must start with capital letters,
+	// otherwise RPC will break.
+}
+
+type KVServer struct {
+	mu      sync.Mutex
+	me      int
+	rf      *raft.Raft
+	applyCh chan raft.ApplyMsg
+	dead    int32 // set by Kill()
+
+	maxraftstate int // snapshot if log grows this big
+
+	// Your definitions here.
+}
+
+
+func (kv *KVServer) Get(args *GetArgs, reply *GetReply) {
+	// Your code here.
+}
+
+func (kv *KVServer) PutAppend(args *PutAppendArgs, reply *PutAppendReply) {
+	// Your code here.
+}
+
+//
+// the tester calls Kill() when a KVServer instance won't
+// be needed again. for your convenience, we supply
+// code to set rf.dead (without needing a lock),
+// and a killed() method to test rf.dead in
+// long-running loops. you can also add your own
+// code to Kill(). you're not required to do anything
+// about this, but it may be convenient (for example)
+// to suppress debug output from a Kill()ed instance.
+//
+func (kv *KVServer) Kill() {
+	atomic.StoreInt32(&kv.dead, 1)
+	kv.rf.Kill()
+	// Your code here, if desired.
+}
+
+func (kv *KVServer) killed() bool {
+	z := atomic.LoadInt32(&kv.dead)
+	return z == 1
+}
+
+//
+// servers[] contains the ports of the set of
+// servers that will cooperate via Raft to
+// form the fault-tolerant key/value service.
+// me is the index of the current server in servers[].
+// the k/v server should store snapshots through the underlying Raft
+// implementation, which should call persister.SaveStateAndSnapshot() to
+// atomically save the Raft state along with the snapshot.
+// the k/v server should snapshot when Raft's saved state exceeds maxraftstate bytes,
+// in order to allow Raft to garbage-collect its log. if maxraftstate is -1,
+// you don't need to snapshot.
+// StartKVServer() must return quickly, so it should start goroutines
+// for any long-running work.
+//
+func StartKVServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister, maxraftstate int) *KVServer {
+	// call labgob.Register on structures you want
+	// Go's RPC library to marshall/unmarshall.
+	labgob.Register(Op{})
+
+	kv := new(KVServer)
+	kv.me = me
+	kv.maxraftstate = maxraftstate
+
+	// You may need initialization code here.
+
+	kv.applyCh = make(chan raft.ApplyMsg)
+	kv.rf = raft.Make(servers, me, persister, kv.applyCh)
+
+	// You may need initialization code here.
+
+	return kv
+}
--- a/src/kvraft/test_test.go
+++ b/src/kvraft/test_test.go
@ -0,0 +1,716 @@
+package kvraft
+
+import "6.824/porcupine"
+import "6.824/models"
+import "testing"
+import "strconv"
+import "time"
+import "math/rand"
+import "strings"
+import "sync"
+import "sync/atomic"
+import "fmt"
+import "io/ioutil"
+
+// The tester generously allows solutions to complete elections in one second
+// (much more than the paper's range of timeouts).
+const electionTimeout = 1 * time.Second
+
+const linearizabilityCheckTimeout = 1 * time.Second
+
+type OpLog struct {
+	operations []porcupine.Operation
+	sync.Mutex
+}
+
+func (log *OpLog) Append(op porcupine.Operation) {
+	log.Lock()
+	defer log.Unlock()
+	log.operations = append(log.operations, op)
+}
+
+func (log *OpLog) Read() []porcupine.Operation {
+	log.Lock()
+	defer log.Unlock()
+	ops := make([]porcupine.Operation, len(log.operations))
+	copy(ops, log.operations)
+	return ops
+}
+
+// get/put/putappend that keep counts
+func Get(cfg *config, ck *Clerk, key string, log *OpLog, cli int) string {
+	start := time.Now().UnixNano()
+	v := ck.Get(key)
+	end := time.Now().UnixNano()
+	cfg.op()
+	if log != nil {
+		log.Append(porcupine.Operation{
+			Input:    models.KvInput{Op: 0, Key: key},
+			Output:   models.KvOutput{Value: v},
+			Call:     start,
+			Return:   end,
+			ClientId: cli,
+		})
+	}
+
+	return v
+}
+
+func Put(cfg *config, ck *Clerk, key string, value string, log *OpLog, cli int) {
+	start := time.Now().UnixNano()
+	ck.Put(key, value)
+	end := time.Now().UnixNano()
+	cfg.op()
+	if log != nil {
+		log.Append(porcupine.Operation{
+			Input:    models.KvInput{Op: 1, Key: key, Value: value},
+			Output:   models.KvOutput{},
+			Call:     start,
+			Return:   end,
+			ClientId: cli,
+		})
+	}
+}
+
+func Append(cfg *config, ck *Clerk, key string, value string, log *OpLog, cli int) {
+	start := time.Now().UnixNano()
+	ck.Append(key, value)
+	end := time.Now().UnixNano()
+	cfg.op()
+	if log != nil {
+		log.Append(porcupine.Operation{
+			Input:    models.KvInput{Op: 2, Key: key, Value: value},
+			Output:   models.KvOutput{},
+			Call:     start,
+			Return:   end,
+			ClientId: cli,
+		})
+	}
+}
+
+func check(cfg *config, t *testing.T, ck *Clerk, key string, value string) {
+	v := Get(cfg, ck, key, nil, -1)
+	if v != value {
+		t.Fatalf("Get(%v): expected:\n%v\nreceived:\n%v", key, value, v)
+	}
+}
+
+// a client runs the function f and then signals it is done
+func run_client(t *testing.T, cfg *config, me int, ca chan bool, fn func(me int, ck *Clerk, t *testing.T)) {
+	ok := false
+	defer func() { ca <- ok }()
+	ck := cfg.makeClient(cfg.All())
+	fn(me, ck, t)
+	ok = true
+	cfg.deleteClient(ck)
+}
+
+// spawn ncli clients and wait until they are all done
+func spawn_clients_and_wait(t *testing.T, cfg *config, ncli int, fn func(me int, ck *Clerk, t *testing.T)) {
+	ca := make([]chan bool, ncli)
+	for cli := 0; cli < ncli; cli++ {
+		ca[cli] = make(chan bool)
+		go run_client(t, cfg, cli, ca[cli], fn)
+	}
+	// log.Printf("spawn_clients_and_wait: waiting for clients")
+	for cli := 0; cli < ncli; cli++ {
+		ok := <-ca[cli]
+		// log.Printf("spawn_clients_and_wait: client %d is done\n", cli)
+		if ok == false {
+			t.Fatalf("failure")
+		}
+	}
+}
+
+// predict effect of Append(k, val) if old value is prev.
+func NextValue(prev string, val string) string {
+	return prev + val
+}
+
+// check that for a specific client all known appends are present in a value,
+// and in order
+func checkClntAppends(t *testing.T, clnt int, v string, count int) {
+	lastoff := -1
+	for j := 0; j < count; j++ {
+		wanted := "x " + strconv.Itoa(clnt) + " " + strconv.Itoa(j) + " y"
+		off := strings.Index(v, wanted)
+		if off < 0 {
+			t.Fatalf("%v missing element %v in Append result %v", clnt, wanted, v)
+		}
+		off1 := strings.LastIndex(v, wanted)
+		if off1 != off {
+			t.Fatalf("duplicate element %v in Append result", wanted)
+		}
+		if off <= lastoff {
+			t.Fatalf("wrong order for element %v in Append result", wanted)
+		}
+		lastoff = off
+	}
+}
+
+// check that all known appends are present in a value,
+// and are in order for each concurrent client.
+func checkConcurrentAppends(t *testing.T, v string, counts []int) {
+	nclients := len(counts)
+	for i := 0; i < nclients; i++ {
+		lastoff := -1
+		for j := 0; j < counts[i]; j++ {
+			wanted := "x " + strconv.Itoa(i) + " " + strconv.Itoa(j) + " y"
+			off := strings.Index(v, wanted)
+			if off < 0 {
+				t.Fatalf("%v missing element %v in Append result %v", i, wanted, v)
+			}
+			off1 := strings.LastIndex(v, wanted)
+			if off1 != off {
+				t.Fatalf("duplicate element %v in Append result", wanted)
+			}
+			if off <= lastoff {
+				t.Fatalf("wrong order for element %v in Append result", wanted)
+			}
+			lastoff = off
+		}
+	}
+}
+
+// repartition the servers periodically
+func partitioner(t *testing.T, cfg *config, ch chan bool, done *int32) {
+	defer func() { ch <- true }()
+	for atomic.LoadInt32(done) == 0 {
+		a := make([]int, cfg.n)
+		for i := 0; i < cfg.n; i++ {
+			a[i] = (rand.Int() % 2)
+		}
+		pa := make([][]int, 2)
+		for i := 0; i < 2; i++ {
+			pa[i] = make([]int, 0)
+			for j := 0; j < cfg.n; j++ {
+				if a[j] == i {
+					pa[i] = append(pa[i], j)
+				}
+			}
+		}
+		cfg.partition(pa[0], pa[1])
+		time.Sleep(electionTimeout + time.Duration(rand.Int63()%200)*time.Millisecond)
+	}
+}
+
+// Basic test is as follows: one or more clients submitting Append/Get
+// operations to set of servers for some period of time.  After the period is
+// over, test checks that all appended values are present and in order for a
+// particular key.  If unreliable is set, RPCs may fail.  If crash is set, the
+// servers crash after the period is over and restart.  If partitions is set,
+// the test repartitions the network concurrently with the clients and servers. If
+// maxraftstate is a positive number, the size of the state for Raft (i.e., log
+// size) shouldn't exceed 8*maxraftstate. If maxraftstate is negative,
+// snapshots shouldn't be used.
+func GenericTest(t *testing.T, part string, nclients int, nservers int, unreliable bool, crash bool, partitions bool, maxraftstate int, randomkeys bool) {
+
+	title := "Test: "
+	if unreliable {
+		// the network drops RPC requests and replies.
+		title = title + "unreliable net, "
+	}
+	if crash {
+		// peers re-start, and thus persistence must work.
+		title = title + "restarts, "
+	}
+	if partitions {
+		// the network may partition
+		title = title + "partitions, "
+	}
+	if maxraftstate != -1 {
+		title = title + "snapshots, "
+	}
+	if randomkeys {
+		title = title + "random keys, "
+	}
+	if nclients > 1 {
+		title = title + "many clients"
+	} else {
+		title = title + "one client"
+	}
+	title = title + " (" + part + ")" // 3A or 3B
+
+	cfg := make_config(t, nservers, unreliable, maxraftstate)
+	defer cfg.cleanup()
+
+	cfg.begin(title)
+	opLog := &OpLog{}
+
+	ck := cfg.makeClient(cfg.All())
+
+	done_partitioner := int32(0)
+	done_clients := int32(0)
+	ch_partitioner := make(chan bool)
+	clnts := make([]chan int, nclients)
+	for i := 0; i < nclients; i++ {
+		clnts[i] = make(chan int)
+	}
+	for i := 0; i < 3; i++ {
+		// log.Printf("Iteration %v\n", i)
+		atomic.StoreInt32(&done_clients, 0)
+		atomic.StoreInt32(&done_partitioner, 0)
+		go spawn_clients_and_wait(t, cfg, nclients, func(cli int, myck *Clerk, t *testing.T) {
+			j := 0
+			defer func() {
+				clnts[cli] <- j
+			}()
+			last := "" // only used when not randomkeys
+			if !randomkeys {
+				Put(cfg, myck, strconv.Itoa(cli), last, opLog, cli)
+			}
+			for atomic.LoadInt32(&done_clients) == 0 {
+				var key string
+				if randomkeys {
+					key = strconv.Itoa(rand.Intn(nclients))
+				} else {
+					key = strconv.Itoa(cli)
+				}
+				nv := "x " + strconv.Itoa(cli) + " " + strconv.Itoa(j) + " y"
+				if (rand.Int() % 1000) < 500 {
+					// log.Printf("%d: client new append %v\n", cli, nv)
+					Append(cfg, myck, key, nv, opLog, cli)
+					if !randomkeys {
+						last = NextValue(last, nv)
+					}
+					j++
+				} else if randomkeys && (rand.Int()%1000) < 100 {
+					// we only do this when using random keys, because it would break the
+					// check done after Get() operations
+					Put(cfg, myck, key, nv, opLog, cli)
+					j++
+				} else {
+					// log.Printf("%d: client new get %v\n", cli, key)
+					v := Get(cfg, myck, key, opLog, cli)
+					// the following check only makes sense when we're not using random keys
+					if !randomkeys && v != last {
+						t.Fatalf("get wrong value, key %v, wanted:\n%v\n, got\n%v\n", key, last, v)
+					}
+				}
+			}
+		})
+
+		if partitions {
+			// Allow the clients to perform some operations without interruption
+			time.Sleep(1 * time.Second)
+			go partitioner(t, cfg, ch_partitioner, &done_partitioner)
+		}
+		time.Sleep(5 * time.Second)
+
+		atomic.StoreInt32(&done_clients, 1)     // tell clients to quit
+		atomic.StoreInt32(&done_partitioner, 1) // tell partitioner to quit
+
+		if partitions {
+			// log.Printf("wait for partitioner\n")
+			<-ch_partitioner
+			// reconnect network and submit a request. A client may
+			// have submitted a request in a minority.  That request
+			// won't return until that server discovers a new term
+			// has started.
+			cfg.ConnectAll()
+			// wait for a while so that we have a new term
+			time.Sleep(electionTimeout)
+		}
+
+		if crash {
+			// log.Printf("shutdown servers\n")
+			for i := 0; i < nservers; i++ {
+				cfg.ShutdownServer(i)
+			}
+			// Wait for a while for servers to shutdown, since
+			// shutdown isn't a real crash and isn't instantaneous
+			time.Sleep(electionTimeout)
+			// log.Printf("restart servers\n")
+			// crash and re-start all
+			for i := 0; i < nservers; i++ {
+				cfg.StartServer(i)
+			}
+			cfg.ConnectAll()
+		}
+
+		// log.Printf("wait for clients\n")
+		for i := 0; i < nclients; i++ {
+			// log.Printf("read from clients %d\n", i)
+			j := <-clnts[i]
+			// if j < 10 {
+			// 	log.Printf("Warning: client %d managed to perform only %d put operations in 1 sec?\n", i, j)
+			// }
+			key := strconv.Itoa(i)
+			// log.Printf("Check %v for client %d\n", j, i)
+			v := Get(cfg, ck, key, opLog, 0)
+			if !randomkeys {
+				checkClntAppends(t, i, v, j)
+			}
+		}
+
+		if maxraftstate > 0 {
+			// Check maximum after the servers have processed all client
+			// requests and had time to checkpoint.
+			sz := cfg.LogSize()
+			if sz > 8*maxraftstate {
+				t.Fatalf("logs were not trimmed (%v > 8*%v)", sz, maxraftstate)
+			}
+		}
+		if maxraftstate < 0 {
+			// Check that snapshots are not used
+			ssz := cfg.SnapshotSize()
+			if ssz > 0 {
+				t.Fatalf("snapshot too large (%v), should not be used when maxraftstate = %d", ssz, maxraftstate)
+			}
+		}
+	}
+
+	res, info := porcupine.CheckOperationsVerbose(models.KvModel, opLog.Read(), linearizabilityCheckTimeout)
+	if res == porcupine.Illegal {
+		file, err := ioutil.TempFile("", "*.html")
+		if err != nil {
+			fmt.Printf("info: failed to create temp file for visualization")
+		} else {
+			err = porcupine.Visualize(models.KvModel, info, file)
+			if err != nil {
+				fmt.Printf("info: failed to write history visualization to %s\n", file.Name())
+			} else {
+				fmt.Printf("info: wrote history visualization to %s\n", file.Name())
+			}
+		}
+		t.Fatal("history is not linearizable")
+	} else if res == porcupine.Unknown {
+		fmt.Println("info: linearizability check timed out, assuming history is ok")
+	}
+
+	cfg.end()
+}
+
+// Check that ops are committed fast enough, better than 1 per heartbeat interval
+func GenericTestSpeed(t *testing.T, part string, maxraftstate int) {
+	const nservers = 3
+	const numOps = 1000
+	cfg := make_config(t, nservers, false, maxraftstate)
+	defer cfg.cleanup()
+
+	ck := cfg.makeClient(cfg.All())
+
+	cfg.begin(fmt.Sprintf("Test: ops complete fast enough (%s)", part))
+
+	// wait until first op completes, so we know a leader is elected
+	// and KV servers are ready to process client requests
+	ck.Get("x")
+
+	start := time.Now()
+	for i := 0; i < numOps; i++ {
+		ck.Append("x", "x 0 "+strconv.Itoa(i)+" y")
+	}
+	dur := time.Since(start)
+
+	v := ck.Get("x")
+	checkClntAppends(t, 0, v, numOps)
+
+	// heartbeat interval should be ~ 100 ms; require at least 3 ops per
+	const heartbeatInterval = 100 * time.Millisecond
+	const opsPerInterval = 3
+	const timePerOp = heartbeatInterval / opsPerInterval
+	if dur > numOps*timePerOp {
+		t.Fatalf("Operations completed too slowly %v/op > %v/op\n", dur/numOps, timePerOp)
+	}
+
+	cfg.end()
+}
+
+func TestBasic3A(t *testing.T) {
+	// Test: one client (3A) ...
+	GenericTest(t, "3A", 1, 5, false, false, false, -1, false)
+}
+
+func TestSpeed3A(t *testing.T) {
+	GenericTestSpeed(t, "3A", -1)
+}
+
+func TestConcurrent3A(t *testing.T) {
+	// Test: many clients (3A) ...
+	GenericTest(t, "3A", 5, 5, false, false, false, -1, false)
+}
+
+func TestUnreliable3A(t *testing.T) {
+	// Test: unreliable net, many clients (3A) ...
+	GenericTest(t, "3A", 5, 5, true, false, false, -1, false)
+}
+
+func TestUnreliableOneKey3A(t *testing.T) {
+	const nservers = 3
+	cfg := make_config(t, nservers, true, -1)
+	defer cfg.cleanup()
+
+	ck := cfg.makeClient(cfg.All())
+
+	cfg.begin("Test: concurrent append to same key, unreliable (3A)")
+
+	Put(cfg, ck, "k", "", nil, -1)
+
+	const nclient = 5
+	const upto = 10
+	spawn_clients_and_wait(t, cfg, nclient, func(me int, myck *Clerk, t *testing.T) {
+		n := 0
+		for n < upto {
+			Append(cfg, myck, "k", "x "+strconv.Itoa(me)+" "+strconv.Itoa(n)+" y", nil, -1)
+			n++
+		}
+	})
+
+	var counts []int
+	for i := 0; i < nclient; i++ {
+		counts = append(counts, upto)
+	}
+
+	vx := Get(cfg, ck, "k", nil, -1)
+	checkConcurrentAppends(t, vx, counts)
+
+	cfg.end()
+}
+
+// Submit a request in the minority partition and check that the requests
+// doesn't go through until the partition heals.  The leader in the original
+// network ends up in the minority partition.
+func TestOnePartition3A(t *testing.T) {
+	const nservers = 5
+	cfg := make_config(t, nservers, false, -1)
+	defer cfg.cleanup()
+	ck := cfg.makeClient(cfg.All())
+
+	Put(cfg, ck, "1", "13", nil, -1)
+
+	cfg.begin("Test: progress in majority (3A)")
+
+	p1, p2 := cfg.make_partition()
+	cfg.partition(p1, p2)
+
+	ckp1 := cfg.makeClient(p1)  // connect ckp1 to p1
+	ckp2a := cfg.makeClient(p2) // connect ckp2a to p2
+	ckp2b := cfg.makeClient(p2) // connect ckp2b to p2
+
+	Put(cfg, ckp1, "1", "14", nil, -1)
+	check(cfg, t, ckp1, "1", "14")
+
+	cfg.end()
+
+	done0 := make(chan bool)
+	done1 := make(chan bool)
+
+	cfg.begin("Test: no progress in minority (3A)")
+	go func() {
+		Put(cfg, ckp2a, "1", "15", nil, -1)
+		done0 <- true
+	}()
+	go func() {
+		Get(cfg, ckp2b, "1", nil, -1) // different clerk in p2
+		done1 <- true
+	}()
+
+	select {
+	case <-done0:
+		t.Fatalf("Put in minority completed")
+	case <-done1:
+		t.Fatalf("Get in minority completed")
+	case <-time.After(time.Second):
+	}
+
+	check(cfg, t, ckp1, "1", "14")
+	Put(cfg, ckp1, "1", "16", nil, -1)
+	check(cfg, t, ckp1, "1", "16")
+
+	cfg.end()
+
+	cfg.begin("Test: completion after heal (3A)")
+
+	cfg.ConnectAll()
+	cfg.ConnectClient(ckp2a, cfg.All())
+	cfg.ConnectClient(ckp2b, cfg.All())
+
+	time.Sleep(electionTimeout)
+
+	select {
+	case <-done0:
+	case <-time.After(30 * 100 * time.Millisecond):
+		t.Fatalf("Put did not complete")
+	}
+
+	select {
+	case <-done1:
+	case <-time.After(30 * 100 * time.Millisecond):
+		t.Fatalf("Get did not complete")
+	default:
+	}
+
+	check(cfg, t, ck, "1", "15")
+
+	cfg.end()
+}
+
+func TestManyPartitionsOneClient3A(t *testing.T) {
+	// Test: partitions, one client (3A) ...
+	GenericTest(t, "3A", 1, 5, false, false, true, -1, false)
+}
+
+func TestManyPartitionsManyClients3A(t *testing.T) {
+	// Test: partitions, many clients (3A) ...
+	GenericTest(t, "3A", 5, 5, false, false, true, -1, false)
+}
+
+func TestPersistOneClient3A(t *testing.T) {
+	// Test: restarts, one client (3A) ...
+	GenericTest(t, "3A", 1, 5, false, true, false, -1, false)
+}
+
+func TestPersistConcurrent3A(t *testing.T) {
+	// Test: restarts, many clients (3A) ...
+	GenericTest(t, "3A", 5, 5, false, true, false, -1, false)
+}
+
+func TestPersistConcurrentUnreliable3A(t *testing.T) {
+	// Test: unreliable net, restarts, many clients (3A) ...
+	GenericTest(t, "3A", 5, 5, true, true, false, -1, false)
+}
+
+func TestPersistPartition3A(t *testing.T) {
+	// Test: restarts, partitions, many clients (3A) ...
+	GenericTest(t, "3A", 5, 5, false, true, true, -1, false)
+}
+
+func TestPersistPartitionUnreliable3A(t *testing.T) {
+	// Test: unreliable net, restarts, partitions, many clients (3A) ...
+	GenericTest(t, "3A", 5, 5, true, true, true, -1, false)
+}
+
+func TestPersistPartitionUnreliableLinearizable3A(t *testing.T) {
+	// Test: unreliable net, restarts, partitions, random keys, many clients (3A) ...
+	GenericTest(t, "3A", 15, 7, true, true, true, -1, true)
+}
+
+//
+// if one server falls behind, then rejoins, does it
+// recover by using the InstallSnapshot RPC?
+// also checks that majority discards committed log entries
+// even if minority doesn't respond.
+//
+func TestSnapshotRPC3B(t *testing.T) {
+	const nservers = 3
+	maxraftstate := 1000
+	cfg := make_config(t, nservers, false, maxraftstate)
+	defer cfg.cleanup()
+
+	ck := cfg.makeClient(cfg.All())
+
+	cfg.begin("Test: InstallSnapshot RPC (3B)")
+
+	Put(cfg, ck, "a", "A", nil, -1)
+	check(cfg, t, ck, "a", "A")
+
+	// a bunch of puts into the majority partition.
+	cfg.partition([]int{0, 1}, []int{2})
+	{
+		ck1 := cfg.makeClient([]int{0, 1})
+		for i := 0; i < 50; i++ {
+			Put(cfg, ck1, strconv.Itoa(i), strconv.Itoa(i), nil, -1)
+		}
+		time.Sleep(electionTimeout)
+		Put(cfg, ck1, "b", "B", nil, -1)
+	}
+
+	// check that the majority partition has thrown away
+	// most of its log entries.
+	sz := cfg.LogSize()
+	if sz > 8*maxraftstate {
+		t.Fatalf("logs were not trimmed (%v > 8*%v)", sz, maxraftstate)
+	}
+
+	// now make group that requires participation of
+	// lagging server, so that it has to catch up.
+	cfg.partition([]int{0, 2}, []int{1})
+	{
+		ck1 := cfg.makeClient([]int{0, 2})
+		Put(cfg, ck1, "c", "C", nil, -1)
+		Put(cfg, ck1, "d", "D", nil, -1)
+		check(cfg, t, ck1, "a", "A")
+		check(cfg, t, ck1, "b", "B")
+		check(cfg, t, ck1, "1", "1")
+		check(cfg, t, ck1, "49", "49")
+	}
+
+	// now everybody
+	cfg.partition([]int{0, 1, 2}, []int{})
+
+	Put(cfg, ck, "e", "E", nil, -1)
+	check(cfg, t, ck, "c", "C")
+	check(cfg, t, ck, "e", "E")
+	check(cfg, t, ck, "1", "1")
+
+	cfg.end()
+}
+
+// are the snapshots not too huge? 500 bytes is a generous bound for the
+// operations we're doing here.
+func TestSnapshotSize3B(t *testing.T) {
+	const nservers = 3
+	maxraftstate := 1000
+	maxsnapshotstate := 500
+	cfg := make_config(t, nservers, false, maxraftstate)
+	defer cfg.cleanup()
+
+	ck := cfg.makeClient(cfg.All())
+
+	cfg.begin("Test: snapshot size is reasonable (3B)")
+
+	for i := 0; i < 200; i++ {
+		Put(cfg, ck, "x", "0", nil, -1)
+		check(cfg, t, ck, "x", "0")
+		Put(cfg, ck, "x", "1", nil, -1)
+		check(cfg, t, ck, "x", "1")
+	}
+
+	// check that servers have thrown away most of their log entries
+	sz := cfg.LogSize()
+	if sz > 8*maxraftstate {
+		t.Fatalf("logs were not trimmed (%v > 8*%v)", sz, maxraftstate)
+	}
+
+	// check that the snapshots are not unreasonably large
+	ssz := cfg.SnapshotSize()
+	if ssz > maxsnapshotstate {
+		t.Fatalf("snapshot too large (%v > %v)", ssz, maxsnapshotstate)
+	}
+
+	cfg.end()
+}
+
+func TestSpeed3B(t *testing.T) {
+	GenericTestSpeed(t, "3B", 1000)
+}
+
+func TestSnapshotRecover3B(t *testing.T) {
+	// Test: restarts, snapshots, one client (3B) ...
+	GenericTest(t, "3B", 1, 5, false, true, false, 1000, false)
+}
+
+func TestSnapshotRecoverManyClients3B(t *testing.T) {
+	// Test: restarts, snapshots, many clients (3B) ...
+	GenericTest(t, "3B", 20, 5, false, true, false, 1000, false)
+}
+
+func TestSnapshotUnreliable3B(t *testing.T) {
+	// Test: unreliable net, snapshots, many clients (3B) ...
+	GenericTest(t, "3B", 5, 5, true, false, false, 1000, false)
+}
+
+func TestSnapshotUnreliableRecover3B(t *testing.T) {
+	// Test: unreliable net, restarts, snapshots, many clients (3B) ...
+	GenericTest(t, "3B", 5, 5, true, true, false, 1000, false)
+}
+
+func TestSnapshotUnreliableRecoverConcurrentPartition3B(t *testing.T) {
+	// Test: unreliable net, restarts, partitions, snapshots, many clients (3B) ...
+	GenericTest(t, "3B", 5, 5, true, true, true, 1000, false)
+}
+
+func TestSnapshotUnreliableRecoverConcurrentPartitionLinearizable3B(t *testing.T) {
+	// Test: unreliable net, restarts, partitions, snapshots, random keys, many clients (3B) ...
+	GenericTest(t, "3B", 15, 7, true, true, true, 1000, true)
+}
--- a/src/labgob/labgob.go
+++ b/src/labgob/labgob.go
@ -0,0 +1,177 @@
+package labgob
+
+//
+// trying to send non-capitalized fields over RPC produces a range of
+// misbehavior, including both mysterious incorrect computation and
+// outright crashes. so this wrapper around Go's encoding/gob warns
+// about non-capitalized field names.
+//
+
+import "encoding/gob"
+import "io"
+import "reflect"
+import "fmt"
+import "sync"
+import "unicode"
+import "unicode/utf8"
+
+var mu sync.Mutex
+var errorCount int // for TestCapital
+var checked map[reflect.Type]bool
+
+type LabEncoder struct {
+	gob *gob.Encoder
+}
+
+func NewEncoder(w io.Writer) *LabEncoder {
+	enc := &LabEncoder{}
+	enc.gob = gob.NewEncoder(w)
+	return enc
+}
+
+func (enc *LabEncoder) Encode(e interface{}) error {
+	checkValue(e)
+	return enc.gob.Encode(e)
+}
+
+func (enc *LabEncoder) EncodeValue(value reflect.Value) error {
+	checkValue(value.Interface())
+	return enc.gob.EncodeValue(value)
+}
+
+type LabDecoder struct {
+	gob *gob.Decoder
+}
+
+func NewDecoder(r io.Reader) *LabDecoder {
+	dec := &LabDecoder{}
+	dec.gob = gob.NewDecoder(r)
+	return dec
+}
+
+func (dec *LabDecoder) Decode(e interface{}) error {
+	checkValue(e)
+	checkDefault(e)
+	return dec.gob.Decode(e)
+}
+
+func Register(value interface{}) {
+	checkValue(value)
+	gob.Register(value)
+}
+
+func RegisterName(name string, value interface{}) {
+	checkValue(value)
+	gob.RegisterName(name, value)
+}
+
+func checkValue(value interface{}) {
+	checkType(reflect.TypeOf(value))
+}
+
+func checkType(t reflect.Type) {
+	k := t.Kind()
+
+	mu.Lock()
+	// only complain once, and avoid recursion.
+	if checked == nil {
+		checked = map[reflect.Type]bool{}
+	}
+	if checked[t] {
+		mu.Unlock()
+		return
+	}
+	checked[t] = true
+	mu.Unlock()
+
+	switch k {
+	case reflect.Struct:
+		for i := 0; i < t.NumField(); i++ {
+			f := t.Field(i)
+			rune, _ := utf8.DecodeRuneInString(f.Name)
+			if unicode.IsUpper(rune) == false {
+				// ta da
+				fmt.Printf("labgob error: lower-case field %v of %v in RPC or persist/snapshot will break your Raft\n",
+					f.Name, t.Name())
+				mu.Lock()
+				errorCount += 1
+				mu.Unlock()
+			}
+			checkType(f.Type)
+		}
+		return
+	case reflect.Slice, reflect.Array, reflect.Ptr:
+		checkType(t.Elem())
+		return
+	case reflect.Map:
+		checkType(t.Elem())
+		checkType(t.Key())
+		return
+	default:
+		return
+	}
+}
+
+//
+// warn if the value contains non-default values,
+// as it would if one sent an RPC but the reply
+// struct was already modified. if the RPC reply
+// contains default values, GOB won't overwrite
+// the non-default value.
+//
+func checkDefault(value interface{}) {
+	if value == nil {
+		return
+	}
+	checkDefault1(reflect.ValueOf(value), 1, "")
+}
+
+func checkDefault1(value reflect.Value, depth int, name string) {
+	if depth > 3 {
+		return
+	}
+
+	t := value.Type()
+	k := t.Kind()
+
+	switch k {
+	case reflect.Struct:
+		for i := 0; i < t.NumField(); i++ {
+			vv := value.Field(i)
+			name1 := t.Field(i).Name
+			if name != "" {
+				name1 = name + "." + name1
+			}
+			checkDefault1(vv, depth+1, name1)
+		}
+		return
+	case reflect.Ptr:
+		if value.IsNil() {
+			return
+		}
+		checkDefault1(value.Elem(), depth+1, name)
+		return
+	case reflect.Bool,
+		reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
+		reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64,
+		reflect.Uintptr, reflect.Float32, reflect.Float64,
+		reflect.String:
+		if reflect.DeepEqual(reflect.Zero(t).Interface(), value.Interface()) == false {
+			mu.Lock()
+			if errorCount < 1 {
+				what := name
+				if what == "" {
+					what = t.Name()
+				}
+				// this warning typically arises if code re-uses the same RPC reply
+				// variable for multiple RPC calls, or if code restores persisted
+				// state into variable that already have non-default values.
+				fmt.Printf("labgob warning: Decoding into a non-default variable/field %v may not work\n",
+					what)
+			}
+			errorCount += 1
+			mu.Unlock()
+		}
+		return
+	}
+}
--- a/src/labgob/test_test.go
+++ b/src/labgob/test_test.go
@ -0,0 +1,172 @@
+package labgob
+
+import "testing"
+
+import "bytes"
+
+type T1 struct {
+	T1int0    int
+	T1int1    int
+	T1string0 string
+	T1string1 string
+}
+
+type T2 struct {
+	T2slice []T1
+	T2map   map[int]*T1
+	T2t3    interface{}
+}
+
+type T3 struct {
+	T3int999 int
+}
+
+//
+// test that we didn't break GOB.
+//
+func TestGOB(t *testing.T) {
+	e0 := errorCount
+
+	w := new(bytes.Buffer)
+
+	Register(T3{})
+
+	{
+		x0 := 0
+		x1 := 1
+		t1 := T1{}
+		t1.T1int1 = 1
+		t1.T1string1 = "6.824"
+		t2 := T2{}
+		t2.T2slice = []T1{T1{}, t1}
+		t2.T2map = map[int]*T1{}
+		t2.T2map[99] = &T1{1, 2, "x", "y"}
+		t2.T2t3 = T3{999}
+
+		e := NewEncoder(w)
+		e.Encode(x0)
+		e.Encode(x1)
+		e.Encode(t1)
+		e.Encode(t2)
+	}
+	data := w.Bytes()
+
+	{
+		var x0 int
+		var x1 int
+		var t1 T1
+		var t2 T2
+
+		r := bytes.NewBuffer(data)
+		d := NewDecoder(r)
+		if d.Decode(&x0) != nil ||
+			d.Decode(&x1) != nil ||
+			d.Decode(&t1) != nil ||
+			d.Decode(&t2) != nil {
+			t.Fatalf("Decode failed")
+		}
+
+		if x0 != 0 {
+			t.Fatalf("wrong x0 %v\n", x0)
+		}
+		if x1 != 1 {
+			t.Fatalf("wrong x1 %v\n", x1)
+		}
+		if t1.T1int0 != 0 {
+			t.Fatalf("wrong t1.T1int0 %v\n", t1.T1int0)
+		}
+		if t1.T1int1 != 1 {
+			t.Fatalf("wrong t1.T1int1 %v\n", t1.T1int1)
+		}
+		if t1.T1string0 != "" {
+			t.Fatalf("wrong t1.T1string0 %v\n", t1.T1string0)
+		}
+		if t1.T1string1 != "6.824" {
+			t.Fatalf("wrong t1.T1string1 %v\n", t1.T1string1)
+		}
+		if len(t2.T2slice) != 2 {
+			t.Fatalf("wrong t2.T2slice len %v\n", len(t2.T2slice))
+		}
+		if t2.T2slice[1].T1int1 != 1 {
+			t.Fatalf("wrong slice value\n")
+		}
+		if len(t2.T2map) != 1 {
+			t.Fatalf("wrong t2.T2map len %v\n", len(t2.T2map))
+		}
+		if t2.T2map[99].T1string1 != "y" {
+			t.Fatalf("wrong map value\n")
+		}
+		t3 := (t2.T2t3).(T3)
+		if t3.T3int999 != 999 {
+			t.Fatalf("wrong t2.T2t3.T3int999\n")
+		}
+	}
+
+	if errorCount != e0 {
+		t.Fatalf("there were errors, but should not have been")
+	}
+}
+
+type T4 struct {
+	Yes int
+	no  int
+}
+
+//
+// make sure we check capitalization
+// labgob prints one warning during this test.
+//
+func TestCapital(t *testing.T) {
+	e0 := errorCount
+
+	v := []map[*T4]int{}
+
+	w := new(bytes.Buffer)
+	e := NewEncoder(w)
+	e.Encode(v)
+	data := w.Bytes()
+
+	var v1 []map[T4]int
+	r := bytes.NewBuffer(data)
+	d := NewDecoder(r)
+	d.Decode(&v1)
+
+	if errorCount != e0+1 {
+		t.Fatalf("failed to warn about lower-case field")
+	}
+}
+
+//
+// check that we warn when someone sends a default value over
+// RPC but the target into which we're decoding holds a non-default
+// value, which GOB seems not to overwrite as you'd expect.
+//
+// labgob does not print a warning.
+//
+func TestDefault(t *testing.T) {
+	e0 := errorCount
+
+	type DD struct {
+		X int
+	}
+
+	// send a default value...
+	dd1 := DD{}
+
+	w := new(bytes.Buffer)
+	e := NewEncoder(w)
+	e.Encode(dd1)
+	data := w.Bytes()
+
+	// and receive it into memory that already
+	// holds non-default values.
+	reply := DD{99}
+
+	r := bytes.NewBuffer(data)
+	d := NewDecoder(r)
+	d.Decode(&reply)
+
+	if errorCount != e0+1 {
+		t.Fatalf("failed to warn about decoding into non-default value")
+	}
+}
--- a/src/labrpc/labrpc.go
+++ b/src/labrpc/labrpc.go
@ -0,0 +1,513 @@
+package labrpc
+
+//
+// channel-based RPC, for 824 labs.
+//
+// simulates a network that can lose requests, lose replies,
+// delay messages, and entirely disconnect particular hosts.
+//
+// we will use the original labrpc.go to test your code for grading.
+// so, while you can modify this code to help you debug, please
+// test against the original before submitting.
+//
+// adapted from Go net/rpc/server.go.
+//
+// sends labgob-encoded values to ensure that RPCs
+// don't include references to program objects.
+//
+// net := MakeNetwork() -- holds network, clients, servers.
+// end := net.MakeEnd(endname) -- create a client end-point, to talk to one server.
+// net.AddServer(servername, server) -- adds a named server to network.
+// net.DeleteServer(servername) -- eliminate the named server.
+// net.Connect(endname, servername) -- connect a client to a server.
+// net.Enable(endname, enabled) -- enable/disable a client.
+// net.Reliable(bool) -- false means drop/delay messages
+//
+// end.Call("Raft.AppendEntries", &args, &reply) -- send an RPC, wait for reply.
+// the "Raft" is the name of the server struct to be called.
+// the "AppendEntries" is the name of the method to be called.
+// Call() returns true to indicate that the server executed the request
+// and the reply is valid.
+// Call() returns false if the network lost the request or reply
+// or the server is down.
+// It is OK to have multiple Call()s in progress at the same time on the
+// same ClientEnd.
+// Concurrent calls to Call() may be delivered to the server out of order,
+// since the network may re-order messages.
+// Call() is guaranteed to return (perhaps after a delay) *except* if the
+// handler function on the server side does not return.
+// the server RPC handler function must declare its args and reply arguments
+// as pointers, so that their types exactly match the types of the arguments
+// to Call().
+//
+// srv := MakeServer()
+// srv.AddService(svc) -- a server can have multiple services, e.g. Raft and k/v
+//   pass srv to net.AddServer()
+//
+// svc := MakeService(receiverObject) -- obj's methods will handle RPCs
+//   much like Go's rpcs.Register()
+//   pass svc to srv.AddService()
+//
+
+import "6.824/labgob"
+import "bytes"
+import "reflect"
+import "sync"
+import "log"
+import "strings"
+import "math/rand"
+import "time"
+import "sync/atomic"
+
+type reqMsg struct {
+	endname  interface{} // name of sending ClientEnd
+	svcMeth  string      // e.g. "Raft.AppendEntries"
+	argsType reflect.Type
+	args     []byte
+	replyCh  chan replyMsg
+}
+
+type replyMsg struct {
+	ok    bool
+	reply []byte
+}
+
+type ClientEnd struct {
+	endname interface{}   // this end-point's name
+	ch      chan reqMsg   // copy of Network.endCh
+	done    chan struct{} // closed when Network is cleaned up
+}
+
+// send an RPC, wait for the reply.
+// the return value indicates success; false means that
+// no reply was received from the server.
+func (e *ClientEnd) Call(svcMeth string, args interface{}, reply interface{}) bool {
+	req := reqMsg{}
+	req.endname = e.endname
+	req.svcMeth = svcMeth
+	req.argsType = reflect.TypeOf(args)
+	req.replyCh = make(chan replyMsg)
+
+	qb := new(bytes.Buffer)
+	qe := labgob.NewEncoder(qb)
+	if err := qe.Encode(args); err != nil {
+		panic(err)
+	}
+	req.args = qb.Bytes()
+
+	//
+	// send the request.
+	//
+	select {
+	case e.ch <- req:
+		// the request has been sent.
+	case <-e.done:
+		// entire Network has been destroyed.
+		return false
+	}
+
+	//
+	// wait for the reply.
+	//
+	rep := <-req.replyCh
+	if rep.ok {
+		rb := bytes.NewBuffer(rep.reply)
+		rd := labgob.NewDecoder(rb)
+		if err := rd.Decode(reply); err != nil {
+			log.Fatalf("ClientEnd.Call(): decode reply: %v\n", err)
+		}
+		return true
+	} else {
+		return false
+	}
+}
+
+type Network struct {
+	mu             sync.Mutex
+	reliable       bool
+	longDelays     bool                        // pause a long time on send on disabled connection
+	longReordering bool                        // sometimes delay replies a long time
+	ends           map[interface{}]*ClientEnd  // ends, by name
+	enabled        map[interface{}]bool        // by end name
+	servers        map[interface{}]*Server     // servers, by name
+	connections    map[interface{}]interface{} // endname -> servername
+	endCh          chan reqMsg
+	done           chan struct{} // closed when Network is cleaned up
+	count          int32         // total RPC count, for statistics
+	bytes          int64         // total bytes send, for statistics
+}
+
+func MakeNetwork() *Network {
+	rn := &Network{}
+	rn.reliable = true
+	rn.ends = map[interface{}]*ClientEnd{}
+	rn.enabled = map[interface{}]bool{}
+	rn.servers = map[interface{}]*Server{}
+	rn.connections = map[interface{}](interface{}){}
+	rn.endCh = make(chan reqMsg)
+	rn.done = make(chan struct{})
+
+	// single goroutine to handle all ClientEnd.Call()s
+	go func() {
+		for {
+			select {
+			case xreq := <-rn.endCh:
+				atomic.AddInt32(&rn.count, 1)
+				atomic.AddInt64(&rn.bytes, int64(len(xreq.args)))
+				go rn.processReq(xreq)
+			case <-rn.done:
+				return
+			}
+		}
+	}()
+
+	return rn
+}
+
+func (rn *Network) Cleanup() {
+	close(rn.done)
+}
+
+func (rn *Network) Reliable(yes bool) {
+	rn.mu.Lock()
+	defer rn.mu.Unlock()
+
+	rn.reliable = yes
+}
+
+func (rn *Network) LongReordering(yes bool) {
+	rn.mu.Lock()
+	defer rn.mu.Unlock()
+
+	rn.longReordering = yes
+}
+
+func (rn *Network) LongDelays(yes bool) {
+	rn.mu.Lock()
+	defer rn.mu.Unlock()
+
+	rn.longDelays = yes
+}
+
+func (rn *Network) readEndnameInfo(endname interface{}) (enabled bool,
+	servername interface{}, server *Server, reliable bool, longreordering bool,
+) {
+	rn.mu.Lock()
+	defer rn.mu.Unlock()
+
+	enabled = rn.enabled[endname]
+	servername = rn.connections[endname]
+	if servername != nil {
+		server = rn.servers[servername]
+	}
+	reliable = rn.reliable
+	longreordering = rn.longReordering
+	return
+}
+
+func (rn *Network) isServerDead(endname interface{}, servername interface{}, server *Server) bool {
+	rn.mu.Lock()
+	defer rn.mu.Unlock()
+
+	if rn.enabled[endname] == false || rn.servers[servername] != server {
+		return true
+	}
+	return false
+}
+
+func (rn *Network) processReq(req reqMsg) {
+	enabled, servername, server, reliable, longreordering := rn.readEndnameInfo(req.endname)
+
+	if enabled && servername != nil && server != nil {
+		if reliable == false {
+			// short delay
+			ms := (rand.Int() % 27)
+			time.Sleep(time.Duration(ms) * time.Millisecond)
+		}
+
+		if reliable == false && (rand.Int()%1000) < 100 {
+			// drop the request, return as if timeout
+			req.replyCh <- replyMsg{false, nil}
+			return
+		}
+
+		// execute the request (call the RPC handler).
+		// in a separate thread so that we can periodically check
+		// if the server has been killed and the RPC should get a
+		// failure reply.
+		ech := make(chan replyMsg)
+		go func() {
+			r := server.dispatch(req)
+			ech <- r
+		}()
+
+		// wait for handler to return,
+		// but stop waiting if DeleteServer() has been called,
+		// and return an error.
+		var reply replyMsg
+		replyOK := false
+		serverDead := false
+		for replyOK == false && serverDead == false {
+			select {
+			case reply = <-ech:
+				replyOK = true
+			case <-time.After(100 * time.Millisecond):
+				serverDead = rn.isServerDead(req.endname, servername, server)
+				if serverDead {
+					go func() {
+						<-ech // drain channel to let the goroutine created earlier terminate
+					}()
+				}
+			}
+		}
+
+		// do not reply if DeleteServer() has been called, i.e.
+		// the server has been killed. this is needed to avoid
+		// situation in which a client gets a positive reply
+		// to an Append, but the server persisted the update
+		// into the old Persister. config.go is careful to call
+		// DeleteServer() before superseding the Persister.
+		serverDead = rn.isServerDead(req.endname, servername, server)
+
+		if replyOK == false || serverDead == true {
+			// server was killed while we were waiting; return error.
+			req.replyCh <- replyMsg{false, nil}
+		} else if reliable == false && (rand.Int()%1000) < 100 {
+			// drop the reply, return as if timeout
+			req.replyCh <- replyMsg{false, nil}
+		} else if longreordering == true && rand.Intn(900) < 600 {
+			// delay the response for a while
+			ms := 200 + rand.Intn(1+rand.Intn(2000))
+			// Russ points out that this timer arrangement will decrease
+			// the number of goroutines, so that the race
+			// detector is less likely to get upset.
+			time.AfterFunc(time.Duration(ms)*time.Millisecond, func() {
+				atomic.AddInt64(&rn.bytes, int64(len(reply.reply)))
+				req.replyCh <- reply
+			})
+		} else {
+			atomic.AddInt64(&rn.bytes, int64(len(reply.reply)))
+			req.replyCh <- reply
+		}
+	} else {
+		// simulate no reply and eventual timeout.
+		ms := 0
+		if rn.longDelays {
+			// let Raft tests check that leader doesn't send
+			// RPCs synchronously.
+			ms = (rand.Int() % 7000)
+		} else {
+			// many kv tests require the client to try each
+			// server in fairly rapid succession.
+			ms = (rand.Int() % 100)
+		}
+		time.AfterFunc(time.Duration(ms)*time.Millisecond, func() {
+			req.replyCh <- replyMsg{false, nil}
+		})
+	}
+
+}
+
+// create a client end-point.
+// start the thread that listens and delivers.
+func (rn *Network) MakeEnd(endname interface{}) *ClientEnd {
+	rn.mu.Lock()
+	defer rn.mu.Unlock()
+
+	if _, ok := rn.ends[endname]; ok {
+		log.Fatalf("MakeEnd: %v already exists\n", endname)
+	}
+
+	e := &ClientEnd{}
+	e.endname = endname
+	e.ch = rn.endCh
+	e.done = rn.done
+	rn.ends[endname] = e
+	rn.enabled[endname] = false
+	rn.connections[endname] = nil
+
+	return e
+}
+
+func (rn *Network) AddServer(servername interface{}, rs *Server) {
+	rn.mu.Lock()
+	defer rn.mu.Unlock()
+
+	rn.servers[servername] = rs
+}
+
+func (rn *Network) DeleteServer(servername interface{}) {
+	rn.mu.Lock()
+	defer rn.mu.Unlock()
+
+	rn.servers[servername] = nil
+}
+
+// connect a ClientEnd to a server.
+// a ClientEnd can only be connected once in its lifetime.
+func (rn *Network) Connect(endname interface{}, servername interface{}) {
+	rn.mu.Lock()
+	defer rn.mu.Unlock()
+
+	rn.connections[endname] = servername
+}
+
+// enable/disable a ClientEnd.
+func (rn *Network) Enable(endname interface{}, enabled bool) {
+	rn.mu.Lock()
+	defer rn.mu.Unlock()
+
+	rn.enabled[endname] = enabled
+}
+
+// get a server's count of incoming RPCs.
+func (rn *Network) GetCount(servername interface{}) int {
+	rn.mu.Lock()
+	defer rn.mu.Unlock()
+
+	svr := rn.servers[servername]
+	return svr.GetCount()
+}
+
+func (rn *Network) GetTotalCount() int {
+	x := atomic.LoadInt32(&rn.count)
+	return int(x)
+}
+
+func (rn *Network) GetTotalBytes() int64 {
+	x := atomic.LoadInt64(&rn.bytes)
+	return x
+}
+
+//
+// a server is a collection of services, all sharing
+// the same rpc dispatcher. so that e.g. both a Raft
+// and a k/v server can listen to the same rpc endpoint.
+//
+type Server struct {
+	mu       sync.Mutex
+	services map[string]*Service
+	count    int // incoming RPCs
+}
+
+func MakeServer() *Server {
+	rs := &Server{}
+	rs.services = map[string]*Service{}
+	return rs
+}
+
+func (rs *Server) AddService(svc *Service) {
+	rs.mu.Lock()
+	defer rs.mu.Unlock()
+	rs.services[svc.name] = svc
+}
+
+func (rs *Server) dispatch(req reqMsg) replyMsg {
+	rs.mu.Lock()
+
+	rs.count += 1
+
+	// split Raft.AppendEntries into service and method
+	dot := strings.LastIndex(req.svcMeth, ".")
+	serviceName := req.svcMeth[:dot]
+	methodName := req.svcMeth[dot+1:]
+
+	service, ok := rs.services[serviceName]
+
+	rs.mu.Unlock()
+
+	if ok {
+		return service.dispatch(methodName, req)
+	} else {
+		choices := []string{}
+		for k, _ := range rs.services {
+			choices = append(choices, k)
+		}
+		log.Fatalf("labrpc.Server.dispatch(): unknown service %v in %v.%v; expecting one of %v\n",
+			serviceName, serviceName, methodName, choices)
+		return replyMsg{false, nil}
+	}
+}
+
+func (rs *Server) GetCount() int {
+	rs.mu.Lock()
+	defer rs.mu.Unlock()
+	return rs.count
+}
+
+// an object with methods that can be called via RPC.
+// a single server may have more than one Service.
+type Service struct {
+	name    string
+	rcvr    reflect.Value
+	typ     reflect.Type
+	methods map[string]reflect.Method
+}
+
+func MakeService(rcvr interface{}) *Service {
+	svc := &Service{}
+	svc.typ = reflect.TypeOf(rcvr)
+	svc.rcvr = reflect.ValueOf(rcvr)
+	svc.name = reflect.Indirect(svc.rcvr).Type().Name()
+	svc.methods = map[string]reflect.Method{}
+
+	for m := 0; m < svc.typ.NumMethod(); m++ {
+		method := svc.typ.Method(m)
+		mtype := method.Type
+		mname := method.Name
+
+		//fmt.Printf("%v pp %v ni %v 1k %v 2k %v no %v\n",
+		//	mname, method.PkgPath, mtype.NumIn(), mtype.In(1).Kind(), mtype.In(2).Kind(), mtype.NumOut())
+
+		if method.PkgPath != "" || // capitalized?
+			mtype.NumIn() != 3 ||
+			//mtype.In(1).Kind() != reflect.Ptr ||
+			mtype.In(2).Kind() != reflect.Ptr ||
+			mtype.NumOut() != 0 {
+			// the method is not suitable for a handler
+			//fmt.Printf("bad method: %v\n", mname)
+		} else {
+			// the method looks like a handler
+			svc.methods[mname] = method
+		}
+	}
+
+	return svc
+}
+
+func (svc *Service) dispatch(methname string, req reqMsg) replyMsg {
+	if method, ok := svc.methods[methname]; ok {
+		// prepare space into which to read the argument.
+		// the Value's type will be a pointer to req.argsType.
+		args := reflect.New(req.argsType)
+
+		// decode the argument.
+		ab := bytes.NewBuffer(req.args)
+		ad := labgob.NewDecoder(ab)
+		ad.Decode(args.Interface())
+
+		// allocate space for the reply.
+		replyType := method.Type.In(2)
+		replyType = replyType.Elem()
+		replyv := reflect.New(replyType)
+
+		// call the method.
+		function := method.Func
+		function.Call([]reflect.Value{svc.rcvr, args.Elem(), replyv})
+
+		// encode the reply.
+		rb := new(bytes.Buffer)
+		re := labgob.NewEncoder(rb)
+		re.EncodeValue(replyv)
+
+		return replyMsg{true, rb.Bytes()}
+	} else {
+		choices := []string{}
+		for k, _ := range svc.methods {
+			choices = append(choices, k)
+		}
+		log.Fatalf("labrpc.Service.dispatch(): unknown method %v in %v; expecting one of %v\n",
+			methname, req.svcMeth, choices)
+		return replyMsg{false, nil}
+	}
+}
--- a/src/labrpc/test_test.go
+++ b/src/labrpc/test_test.go
@ -0,0 +1,597 @@
+package labrpc
+
+import "testing"
+import "strconv"
+import "sync"
+import "runtime"
+import "time"
+import "fmt"
+
+type JunkArgs struct {
+	X int
+}
+type JunkReply struct {
+	X string
+}
+
+type JunkServer struct {
+	mu   sync.Mutex
+	log1 []string
+	log2 []int
+}
+
+func (js *JunkServer) Handler1(args string, reply *int) {
+	js.mu.Lock()
+	defer js.mu.Unlock()
+	js.log1 = append(js.log1, args)
+	*reply, _ = strconv.Atoi(args)
+}
+
+func (js *JunkServer) Handler2(args int, reply *string) {
+	js.mu.Lock()
+	defer js.mu.Unlock()
+	js.log2 = append(js.log2, args)
+	*reply = "handler2-" + strconv.Itoa(args)
+}
+
+func (js *JunkServer) Handler3(args int, reply *int) {
+	js.mu.Lock()
+	defer js.mu.Unlock()
+	time.Sleep(20 * time.Second)
+	*reply = -args
+}
+
+// args is a pointer
+func (js *JunkServer) Handler4(args *JunkArgs, reply *JunkReply) {
+	reply.X = "pointer"
+}
+
+// args is a not pointer
+func (js *JunkServer) Handler5(args JunkArgs, reply *JunkReply) {
+	reply.X = "no pointer"
+}
+
+func (js *JunkServer) Handler6(args string, reply *int) {
+	js.mu.Lock()
+	defer js.mu.Unlock()
+	*reply = len(args)
+}
+
+func (js *JunkServer) Handler7(args int, reply *string) {
+	js.mu.Lock()
+	defer js.mu.Unlock()
+	*reply = ""
+	for i := 0; i < args; i++ {
+		*reply = *reply + "y"
+	}
+}
+
+func TestBasic(t *testing.T) {
+	runtime.GOMAXPROCS(4)
+
+	rn := MakeNetwork()
+	defer rn.Cleanup()
+
+	e := rn.MakeEnd("end1-99")
+
+	js := &JunkServer{}
+	svc := MakeService(js)
+
+	rs := MakeServer()
+	rs.AddService(svc)
+	rn.AddServer("server99", rs)
+
+	rn.Connect("end1-99", "server99")
+	rn.Enable("end1-99", true)
+
+	{
+		reply := ""
+		e.Call("JunkServer.Handler2", 111, &reply)
+		if reply != "handler2-111" {
+			t.Fatalf("wrong reply from Handler2")
+		}
+	}
+
+	{
+		reply := 0
+		e.Call("JunkServer.Handler1", "9099", &reply)
+		if reply != 9099 {
+			t.Fatalf("wrong reply from Handler1")
+		}
+	}
+}
+
+func TestTypes(t *testing.T) {
+	runtime.GOMAXPROCS(4)
+
+	rn := MakeNetwork()
+	defer rn.Cleanup()
+
+	e := rn.MakeEnd("end1-99")
+
+	js := &JunkServer{}
+	svc := MakeService(js)
+
+	rs := MakeServer()
+	rs.AddService(svc)
+	rn.AddServer("server99", rs)
+
+	rn.Connect("end1-99", "server99")
+	rn.Enable("end1-99", true)
+
+	{
+		var args JunkArgs
+		var reply JunkReply
+		// args must match type (pointer or not) of handler.
+		e.Call("JunkServer.Handler4", &args, &reply)
+		if reply.X != "pointer" {
+			t.Fatalf("wrong reply from Handler4")
+		}
+	}
+
+	{
+		var args JunkArgs
+		var reply JunkReply
+		// args must match type (pointer or not) of handler.
+		e.Call("JunkServer.Handler5", args, &reply)
+		if reply.X != "no pointer" {
+			t.Fatalf("wrong reply from Handler5")
+		}
+	}
+}
+
+//
+// does net.Enable(endname, false) really disconnect a client?
+//
+func TestDisconnect(t *testing.T) {
+	runtime.GOMAXPROCS(4)
+
+	rn := MakeNetwork()
+	defer rn.Cleanup()
+
+	e := rn.MakeEnd("end1-99")
+
+	js := &JunkServer{}
+	svc := MakeService(js)
+
+	rs := MakeServer()
+	rs.AddService(svc)
+	rn.AddServer("server99", rs)
+
+	rn.Connect("end1-99", "server99")
+
+	{
+		reply := ""
+		e.Call("JunkServer.Handler2", 111, &reply)
+		if reply != "" {
+			t.Fatalf("unexpected reply from Handler2")
+		}
+	}
+
+	rn.Enable("end1-99", true)
+
+	{
+		reply := 0
+		e.Call("JunkServer.Handler1", "9099", &reply)
+		if reply != 9099 {
+			t.Fatalf("wrong reply from Handler1")
+		}
+	}
+}
+
+//
+// test net.GetCount()
+//
+func TestCounts(t *testing.T) {
+	runtime.GOMAXPROCS(4)
+
+	rn := MakeNetwork()
+	defer rn.Cleanup()
+
+	e := rn.MakeEnd("end1-99")
+
+	js := &JunkServer{}
+	svc := MakeService(js)
+
+	rs := MakeServer()
+	rs.AddService(svc)
+	rn.AddServer(99, rs)
+
+	rn.Connect("end1-99", 99)
+	rn.Enable("end1-99", true)
+
+	for i := 0; i < 17; i++ {
+		reply := ""
+		e.Call("JunkServer.Handler2", i, &reply)
+		wanted := "handler2-" + strconv.Itoa(i)
+		if reply != wanted {
+			t.Fatalf("wrong reply %v from Handler1, expecting %v", reply, wanted)
+		}
+	}
+
+	n := rn.GetCount(99)
+	if n != 17 {
+		t.Fatalf("wrong GetCount() %v, expected 17\n", n)
+	}
+}
+
+//
+// test net.GetTotalBytes()
+//
+func TestBytes(t *testing.T) {
+	runtime.GOMAXPROCS(4)
+
+	rn := MakeNetwork()
+	defer rn.Cleanup()
+
+	e := rn.MakeEnd("end1-99")
+
+	js := &JunkServer{}
+	svc := MakeService(js)
+
+	rs := MakeServer()
+	rs.AddService(svc)
+	rn.AddServer(99, rs)
+
+	rn.Connect("end1-99", 99)
+	rn.Enable("end1-99", true)
+
+	for i := 0; i < 17; i++ {
+		args := "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+		args = args + args
+		args = args + args
+		reply := 0
+		e.Call("JunkServer.Handler6", args, &reply)
+		wanted := len(args)
+		if reply != wanted {
+			t.Fatalf("wrong reply %v from Handler6, expecting %v", reply, wanted)
+		}
+	}
+
+	n := rn.GetTotalBytes()
+	if n < 4828 || n > 6000 {
+		t.Fatalf("wrong GetTotalBytes() %v, expected about 5000\n", n)
+	}
+
+	for i := 0; i < 17; i++ {
+		args := 107
+		reply := ""
+		e.Call("JunkServer.Handler7", args, &reply)
+		wanted := args
+		if len(reply) != wanted {
+			t.Fatalf("wrong reply len=%v from Handler6, expecting %v", len(reply), wanted)
+		}
+	}
+
+	nn := rn.GetTotalBytes() - n
+	if nn < 1800 || nn > 2500 {
+		t.Fatalf("wrong GetTotalBytes() %v, expected about 2000\n", nn)
+	}
+}
+
+//
+// test RPCs from concurrent ClientEnds
+//
+func TestConcurrentMany(t *testing.T) {
+	runtime.GOMAXPROCS(4)
+
+	rn := MakeNetwork()
+	defer rn.Cleanup()
+
+	js := &JunkServer{}
+	svc := MakeService(js)
+
+	rs := MakeServer()
+	rs.AddService(svc)
+	rn.AddServer(1000, rs)
+
+	ch := make(chan int)
+
+	nclients := 20
+	nrpcs := 10
+	for ii := 0; ii < nclients; ii++ {
+		go func(i int) {
+			n := 0
+			defer func() { ch <- n }()
+
+			e := rn.MakeEnd(i)
+			rn.Connect(i, 1000)
+			rn.Enable(i, true)
+
+			for j := 0; j < nrpcs; j++ {
+				arg := i*100 + j
+				reply := ""
+				e.Call("JunkServer.Handler2", arg, &reply)
+				wanted := "handler2-" + strconv.Itoa(arg)
+				if reply != wanted {
+					t.Fatalf("wrong reply %v from Handler1, expecting %v", reply, wanted)
+				}
+				n += 1
+			}
+		}(ii)
+	}
+
+	total := 0
+	for ii := 0; ii < nclients; ii++ {
+		x := <-ch
+		total += x
+	}
+
+	if total != nclients*nrpcs {
+		t.Fatalf("wrong number of RPCs completed, got %v, expected %v", total, nclients*nrpcs)
+	}
+
+	n := rn.GetCount(1000)
+	if n != total {
+		t.Fatalf("wrong GetCount() %v, expected %v\n", n, total)
+	}
+}
+
+//
+// test unreliable
+//
+func TestUnreliable(t *testing.T) {
+	runtime.GOMAXPROCS(4)
+
+	rn := MakeNetwork()
+	defer rn.Cleanup()
+	rn.Reliable(false)
+
+	js := &JunkServer{}
+	svc := MakeService(js)
+
+	rs := MakeServer()
+	rs.AddService(svc)
+	rn.AddServer(1000, rs)
+
+	ch := make(chan int)
+
+	nclients := 300
+	for ii := 0; ii < nclients; ii++ {
+		go func(i int) {
+			n := 0
+			defer func() { ch <- n }()
+
+			e := rn.MakeEnd(i)
+			rn.Connect(i, 1000)
+			rn.Enable(i, true)
+
+			arg := i * 100
+			reply := ""
+			ok := e.Call("JunkServer.Handler2", arg, &reply)
+			if ok {
+				wanted := "handler2-" + strconv.Itoa(arg)
+				if reply != wanted {
+					t.Fatalf("wrong reply %v from Handler1, expecting %v", reply, wanted)
+				}
+				n += 1
+			}
+		}(ii)
+	}
+
+	total := 0
+	for ii := 0; ii < nclients; ii++ {
+		x := <-ch
+		total += x
+	}
+
+	if total == nclients || total == 0 {
+		t.Fatalf("all RPCs succeeded despite unreliable")
+	}
+}
+
+//
+// test concurrent RPCs from a single ClientEnd
+//
+func TestConcurrentOne(t *testing.T) {
+	runtime.GOMAXPROCS(4)
+
+	rn := MakeNetwork()
+	defer rn.Cleanup()
+
+	js := &JunkServer{}
+	svc := MakeService(js)
+
+	rs := MakeServer()
+	rs.AddService(svc)
+	rn.AddServer(1000, rs)
+
+	e := rn.MakeEnd("c")
+	rn.Connect("c", 1000)
+	rn.Enable("c", true)
+
+	ch := make(chan int)
+
+	nrpcs := 20
+	for ii := 0; ii < nrpcs; ii++ {
+		go func(i int) {
+			n := 0
+			defer func() { ch <- n }()
+
+			arg := 100 + i
+			reply := ""
+			e.Call("JunkServer.Handler2", arg, &reply)
+			wanted := "handler2-" + strconv.Itoa(arg)
+			if reply != wanted {
+				t.Fatalf("wrong reply %v from Handler2, expecting %v", reply, wanted)
+			}
+			n += 1
+		}(ii)
+	}
+
+	total := 0
+	for ii := 0; ii < nrpcs; ii++ {
+		x := <-ch
+		total += x
+	}
+
+	if total != nrpcs {
+		t.Fatalf("wrong number of RPCs completed, got %v, expected %v", total, nrpcs)
+	}
+
+	js.mu.Lock()
+	defer js.mu.Unlock()
+	if len(js.log2) != nrpcs {
+		t.Fatalf("wrong number of RPCs delivered")
+	}
+
+	n := rn.GetCount(1000)
+	if n != total {
+		t.Fatalf("wrong GetCount() %v, expected %v\n", n, total)
+	}
+}
+
+//
+// regression: an RPC that's delayed during Enabled=false
+// should not delay subsequent RPCs (e.g. after Enabled=true).
+//
+func TestRegression1(t *testing.T) {
+	runtime.GOMAXPROCS(4)
+
+	rn := MakeNetwork()
+	defer rn.Cleanup()
+
+	js := &JunkServer{}
+	svc := MakeService(js)
+
+	rs := MakeServer()
+	rs.AddService(svc)
+	rn.AddServer(1000, rs)
+
+	e := rn.MakeEnd("c")
+	rn.Connect("c", 1000)
+
+	// start some RPCs while the ClientEnd is disabled.
+	// they'll be delayed.
+	rn.Enable("c", false)
+	ch := make(chan bool)
+	nrpcs := 20
+	for ii := 0; ii < nrpcs; ii++ {
+		go func(i int) {
+			ok := false
+			defer func() { ch <- ok }()
+
+			arg := 100 + i
+			reply := ""
+			// this call ought to return false.
+			e.Call("JunkServer.Handler2", arg, &reply)
+			ok = true
+		}(ii)
+	}
+
+	time.Sleep(100 * time.Millisecond)
+
+	// now enable the ClientEnd and check that an RPC completes quickly.
+	t0 := time.Now()
+	rn.Enable("c", true)
+	{
+		arg := 99
+		reply := ""
+		e.Call("JunkServer.Handler2", arg, &reply)
+		wanted := "handler2-" + strconv.Itoa(arg)
+		if reply != wanted {
+			t.Fatalf("wrong reply %v from Handler2, expecting %v", reply, wanted)
+		}
+	}
+	dur := time.Since(t0).Seconds()
+
+	if dur > 0.03 {
+		t.Fatalf("RPC took too long (%v) after Enable", dur)
+	}
+
+	for ii := 0; ii < nrpcs; ii++ {
+		<-ch
+	}
+
+	js.mu.Lock()
+	defer js.mu.Unlock()
+	if len(js.log2) != 1 {
+		t.Fatalf("wrong number (%v) of RPCs delivered, expected 1", len(js.log2))
+	}
+
+	n := rn.GetCount(1000)
+	if n != 1 {
+		t.Fatalf("wrong GetCount() %v, expected %v\n", n, 1)
+	}
+}
+
+//
+// if an RPC is stuck in a server, and the server
+// is killed with DeleteServer(), does the RPC
+// get un-stuck?
+//
+func TestKilled(t *testing.T) {
+	runtime.GOMAXPROCS(4)
+
+	rn := MakeNetwork()
+	defer rn.Cleanup()
+
+	e := rn.MakeEnd("end1-99")
+
+	js := &JunkServer{}
+	svc := MakeService(js)
+
+	rs := MakeServer()
+	rs.AddService(svc)
+	rn.AddServer("server99", rs)
+
+	rn.Connect("end1-99", "server99")
+	rn.Enable("end1-99", true)
+
+	doneCh := make(chan bool)
+	go func() {
+		reply := 0
+		ok := e.Call("JunkServer.Handler3", 99, &reply)
+		doneCh <- ok
+	}()
+
+	time.Sleep(1000 * time.Millisecond)
+
+	select {
+	case <-doneCh:
+		t.Fatalf("Handler3 should not have returned yet")
+	case <-time.After(100 * time.Millisecond):
+	}
+
+	rn.DeleteServer("server99")
+
+	select {
+	case x := <-doneCh:
+		if x != false {
+			t.Fatalf("Handler3 returned successfully despite DeleteServer()")
+		}
+	case <-time.After(100 * time.Millisecond):
+		t.Fatalf("Handler3 should return after DeleteServer()")
+	}
+}
+
+func TestBenchmark(t *testing.T) {
+	runtime.GOMAXPROCS(4)
+
+	rn := MakeNetwork()
+	defer rn.Cleanup()
+
+	e := rn.MakeEnd("end1-99")
+
+	js := &JunkServer{}
+	svc := MakeService(js)
+
+	rs := MakeServer()
+	rs.AddService(svc)
+	rn.AddServer("server99", rs)
+
+	rn.Connect("end1-99", "server99")
+	rn.Enable("end1-99", true)
+
+	t0 := time.Now()
+	n := 100000
+	for iters := 0; iters < n; iters++ {
+		reply := ""
+		e.Call("JunkServer.Handler2", 111, &reply)
+		if reply != "handler2-111" {
+			t.Fatalf("wrong reply from Handler2")
+		}
+	}
+	fmt.Printf("%v for %v\n", time.Since(t0), n)
+	// march 2016, rtm laptop, 22 microseconds per RPC
+}
--- a/src/main/diskvd.go
+++ b/src/main/diskvd.go
@ -0,0 +1,74 @@
+package main
+
+//
+// start a diskvd server. it's a member of some replica
+// group, which has other members, and it needs to know
+// how to talk to the members of the shardmaster service.
+// used by ../diskv/test_test.go
+//
+// arguments:
+//   -g groupid
+//   -m masterport1 -m masterport2 ...
+//   -s replicaport1 -s replicaport2 ...
+//   -i my-index-in-server-port-list
+//   -u unreliable
+//   -d directory
+//   -r restart
+
+import "time"
+import "6.824/diskv"
+import "os"
+import "fmt"
+import "strconv"
+import "runtime"
+
+func usage() {
+	fmt.Printf("Usage: diskvd -g gid -m master... -s server... -i my-index -d dir\n")
+	os.Exit(1)
+}
+
+func main() {
+	var gid int64 = -1     // my replica group ID
+	masters := []string{}  // ports of shardmasters
+	replicas := []string{} // ports of servers in my replica group
+	me := -1               // my index in replicas[]
+	unreliable := false
+	dir := "" // store persistent data here
+	restart := false
+
+	for i := 1; i+1 < len(os.Args); i += 2 {
+		a0 := os.Args[i]
+		a1 := os.Args[i+1]
+		if a0 == "-g" {
+			gid, _ = strconv.ParseInt(a1, 10, 64)
+		} else if a0 == "-m" {
+			masters = append(masters, a1)
+		} else if a0 == "-s" {
+			replicas = append(replicas, a1)
+		} else if a0 == "-i" {
+			me, _ = strconv.Atoi(a1)
+		} else if a0 == "-u" {
+			unreliable, _ = strconv.ParseBool(a1)
+		} else if a0 == "-d" {
+			dir = a1
+		} else if a0 == "-r" {
+			restart, _ = strconv.ParseBool(a1)
+		} else {
+			usage()
+		}
+	}
+
+	if gid < 0 || me < 0 || len(masters) < 1 || me >= len(replicas) || dir == "" {
+		usage()
+	}
+
+	runtime.GOMAXPROCS(4)
+
+	srv := diskv.StartServer(gid, masters, replicas, me, dir, restart)
+	srv.Setunreliable(unreliable)
+
+	// for safety, force quit after 10 minutes.
+	time.Sleep(10 * 60 * time.Second)
+	mep, _ := os.FindProcess(os.Getpid())
+	mep.Kill()
+}
--- a/src/main/lockc.go
+++ b/src/main/lockc.go
@ -0,0 +1,31 @@
+package main
+
+//
+// see comments in lockd.go
+//
+
+import "6.824/lockservice"
+import "os"
+import "fmt"
+
+func usage() {
+	fmt.Printf("Usage: lockc -l|-u primaryport backupport lockname\n")
+	os.Exit(1)
+}
+
+func main() {
+	if len(os.Args) == 5 {
+		ck := lockservice.MakeClerk(os.Args[2], os.Args[3])
+		var ok bool
+		if os.Args[1] == "-l" {
+			ok = ck.Lock(os.Args[4])
+		} else if os.Args[1] == "-u" {
+			ok = ck.Unlock(os.Args[4])
+		} else {
+			usage()
+		}
+		fmt.Printf("reply: %v\n", ok)
+	} else {
+		usage()
+	}
+}
--- a/src/main/lockd.go
+++ b/src/main/lockd.go
@ -0,0 +1,31 @@
+package main
+
+// export GOPATH=~/6.824
+// go build lockd.go
+// go build lockc.go
+// ./lockd -p a b &
+// ./lockd -b a b &
+// ./lockc -l a b lx
+// ./lockc -u a b lx
+//
+// on Athena, use /tmp/myname-a and /tmp/myname-b
+// instead of a and b.
+
+import "time"
+import "6.824/lockservice"
+import "os"
+import "fmt"
+
+func main() {
+	if len(os.Args) == 4 && os.Args[1] == "-p" {
+		lockservice.StartServer(os.Args[2], os.Args[3], true)
+	} else if len(os.Args) == 4 && os.Args[1] == "-b" {
+		lockservice.StartServer(os.Args[2], os.Args[3], false)
+	} else {
+		fmt.Printf("Usage: lockd -p|-b primaryport backupport\n")
+		os.Exit(1)
+	}
+	for {
+		time.Sleep(100 * time.Second)
+	}
+}
--- a/src/main/mrcoordinator.go
+++ b/src/main/mrcoordinator.go
@ -0,0 +1,29 @@
+package main
+
+//
+// start the coordinator process, which is implemented
+// in ../mr/coordinator.go
+//
+// go run mrcoordinator.go pg*.txt
+//
+// Please do not change this file.
+//
+
+import "6.824/mr"
+import "time"
+import "os"
+import "fmt"
+
+func main() {
+	if len(os.Args) < 2 {
+		fmt.Fprintf(os.Stderr, "Usage: mrcoordinator inputfiles...\n")
+		os.Exit(1)
+	}
+
+	m := mr.MakeCoordinator(os.Args[1:], 10)
+	for m.Done() == false {
+		time.Sleep(time.Second)
+	}
+
+	time.Sleep(time.Second)
+}
--- a/src/main/mrsequential.go
+++ b/src/main/mrsequential.go
@ -0,0 +1,110 @@
+package main
+
+//
+// simple sequential MapReduce.
+//
+// go run mrsequential.go wc.so pg*.txt
+//
+
+import "fmt"
+import "6.824/mr"
+import "plugin"
+import "os"
+import "log"
+import "io/ioutil"
+import "sort"
+
+// for sorting by key.
+type ByKey []mr.KeyValue
+
+// for sorting by key.
+func (a ByKey) Len() int           { return len(a) }
+func (a ByKey) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
+func (a ByKey) Less(i, j int) bool { return a[i].Key < a[j].Key }
+
+func main() {
+	if len(os.Args) < 3 {
+		fmt.Fprintf(os.Stderr, "Usage: mrsequential xxx.so inputfiles...\n")
+		os.Exit(1)
+	}
+
+	mapf, reducef := loadPlugin(os.Args[1])
+
+	//
+	// read each input file,
+	// pass it to Map,
+	// accumulate the intermediate Map output.
+	//
+	intermediate := []mr.KeyValue{}
+	for _, filename := range os.Args[2:] {
+		file, err := os.Open(filename)
+		if err != nil {
+			log.Fatalf("cannot open %v", filename)
+		}
+		content, err := ioutil.ReadAll(file)
+		if err != nil {
+			log.Fatalf("cannot read %v", filename)
+		}
+		file.Close()
+		kva := mapf(filename, string(content))
+		intermediate = append(intermediate, kva...)
+	}
+
+	//
+	// a big difference from real MapReduce is that all the
+	// intermediate data is in one place, intermediate[],
+	// rather than being partitioned into NxM buckets.
+	//
+
+	sort.Sort(ByKey(intermediate))
+
+	oname := "mr-out-0"
+	ofile, _ := os.Create(oname)
+
+	//
+	// call Reduce on each distinct key in intermediate[],
+	// and print the result to mr-out-0.
+	//
+	i := 0
+	for i < len(intermediate) {
+		j := i + 1
+		for j < len(intermediate) && intermediate[j].Key == intermediate[i].Key {
+			j++
+		}
+		values := []string{}
+		for k := i; k < j; k++ {
+			values = append(values, intermediate[k].Value)
+		}
+		output := reducef(intermediate[i].Key, values)
+
+		// this is the correct format for each line of Reduce output.
+		fmt.Fprintf(ofile, "%v %v\n", intermediate[i].Key, output)
+
+		i = j
+	}
+
+	ofile.Close()
+}
+
+//
+// load the application Map and Reduce functions
+// from a plugin file, e.g. ../mrapps/wc.so
+//
+func loadPlugin(filename string) (func(string, string) []mr.KeyValue, func(string, []string) string) {
+	p, err := plugin.Open(filename)
+	if err != nil {
+		log.Fatalf("cannot load plugin %v", filename)
+	}
+	xmapf, err := p.Lookup("Map")
+	if err != nil {
+		log.Fatalf("cannot find Map in %v", filename)
+	}
+	mapf := xmapf.(func(string, string) []mr.KeyValue)
+	xreducef, err := p.Lookup("Reduce")
+	if err != nil {
+		log.Fatalf("cannot find Reduce in %v", filename)
+	}
+	reducef := xreducef.(func(string, []string) string)
+
+	return mapf, reducef
+}
--- a/src/main/mrworker.go
+++ b/src/main/mrworker.go
@ -0,0 +1,51 @@
+package main
+
+//
+// start a worker process, which is implemented
+// in ../mr/worker.go. typically there will be
+// multiple worker processes, talking to one coordinator.
+//
+// go run mrworker.go wc.so
+//
+// Please do not change this file.
+//
+
+import "6.824/mr"
+import "plugin"
+import "os"
+import "fmt"
+import "log"
+
+func main() {
+	if len(os.Args) != 2 {
+		fmt.Fprintf(os.Stderr, "Usage: mrworker xxx.so\n")
+		os.Exit(1)
+	}
+
+	mapf, reducef := loadPlugin(os.Args[1])
+
+	mr.Worker(mapf, reducef)
+}
+
+//
+// load the application Map and Reduce functions
+// from a plugin file, e.g. ../mrapps/wc.so
+//
+func loadPlugin(filename string) (func(string, string) []mr.KeyValue, func(string, []string) string) {
+	p, err := plugin.Open(filename)
+	if err != nil {
+		log.Fatalf("cannot load plugin %v", filename)
+	}
+	xmapf, err := p.Lookup("Map")
+	if err != nil {
+		log.Fatalf("cannot find Map in %v", filename)
+	}
+	mapf := xmapf.(func(string, string) []mr.KeyValue)
+	xreducef, err := p.Lookup("Reduce")
+	if err != nil {
+		log.Fatalf("cannot find Reduce in %v", filename)
+	}
+	reducef := xreducef.(func(string, []string) string)
+
+	return mapf, reducef
+}
--- a/src/main/pbc.go
+++ b/src/main/pbc.go
@ -0,0 +1,44 @@
+package main
+
+//
+// pbservice client application
+//
+// export GOPATH=~/6.824
+// go build viewd.go
+// go build pbd.go
+// go build pbc.go
+// ./viewd /tmp/rtm-v &
+// ./pbd /tmp/rtm-v /tmp/rtm-1 &
+// ./pbd /tmp/rtm-v /tmp/rtm-2 &
+// ./pbc /tmp/rtm-v key1 value1
+// ./pbc /tmp/rtm-v key1
+//
+// change "rtm" to your user name.
+// start the pbd programs in separate windows and kill
+// and restart them to exercise fault tolerance.
+//
+
+import "6.824/pbservice"
+import "os"
+import "fmt"
+
+func usage() {
+	fmt.Printf("Usage: pbc viewport key\n")
+	fmt.Printf("       pbc viewport key value\n")
+	os.Exit(1)
+}
+
+func main() {
+	if len(os.Args) == 3 {
+		// get
+		ck := pbservice.MakeClerk(os.Args[1], "")
+		v := ck.Get(os.Args[2])
+		fmt.Printf("%v\n", v)
+	} else if len(os.Args) == 4 {
+		// put
+		ck := pbservice.MakeClerk(os.Args[1], "")
+		ck.Put(os.Args[2], os.Args[3])
+	} else {
+		usage()
+	}
+}
--- a/src/main/pbd.go
+++ b/src/main/pbd.go
@ -0,0 +1,23 @@
+package main
+
+//
+// see directions in pbc.go
+//
+
+import "time"
+import "6.824/pbservice"
+import "os"
+import "fmt"
+
+func main() {
+	if len(os.Args) != 3 {
+		fmt.Printf("Usage: pbd viewport myport\n")
+		os.Exit(1)
+	}
+
+	pbservice.StartServer(os.Args[1], os.Args[2])
+
+	for {
+		time.Sleep(100 * time.Second)
+	}
+}
--- a/src/main/pg-being_ernest.txt
+++ b/src/main/pg-being_ernest.txt
--- a/src/main/pg-dorian_gray.txt
+++ b/src/main/pg-dorian_gray.txt
--- a/src/main/pg-frankenstein.txt
+++ b/src/main/pg-frankenstein.txt
--- a/src/main/pg-grimm.txt
+++ b/src/main/pg-grimm.txt
--- a/src/main/pg-huckleberry_finn.txt
+++ b/src/main/pg-huckleberry_finn.txt
--- a/src/main/pg-metamorphosis.txt
+++ b/src/main/pg-metamorphosis.txt
--- a/src/main/pg-sherlock_holmes.txt
+++ b/src/main/pg-sherlock_holmes.txt
--- a/src/main/pg-tom_sawyer.txt
+++ b/src/main/pg-tom_sawyer.txt
--- a/src/main/test-mr-many.sh
+++ b/src/main/test-mr-many.sh
@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+
+if [ $# -ne 1 ]; then
+    echo "Usage: $0 numTrials"
+    exit 1
+fi
+
+trap 'kill -INT -$pid; exit 1' INT
+
+# Note: because the socketID is based on the current userID,
+# ./test-mr.sh cannot be run in parallel
+runs=$1
+chmod +x test-mr.sh
+
+for i in $(seq 1 $runs); do
+    timeout -k 2s 900s ./test-mr.sh &
+    pid=$!
+    if ! wait $pid; then
+        echo '***' FAILED TESTS IN TRIAL $i
+        exit 1
+    fi
+done
+echo '***' PASSED ALL $i TESTING TRIALS
--- a/src/main/test-mr.sh
+++ b/src/main/test-mr.sh
@ -0,0 +1,278 @@
+#!/usr/bin/env bash
+
+#
+# basic map-reduce test
+#
+
+#RACE=
+
+# comment this to run the tests without the Go race detector.
+RACE=-race
+
+# run the test in a fresh sub-directory.
+rm -rf mr-tmp
+mkdir mr-tmp || exit 1
+cd mr-tmp || exit 1
+rm -f mr-*
+
+# make sure software is freshly built.
+(cd ../../mrapps && go build $RACE -buildmode=plugin wc.go) || exit 1
+(cd ../../mrapps && go build $RACE -buildmode=plugin indexer.go) || exit 1
+(cd ../../mrapps && go build $RACE -buildmode=plugin mtiming.go) || exit 1
+(cd ../../mrapps && go build $RACE -buildmode=plugin rtiming.go) || exit 1
+(cd ../../mrapps && go build $RACE -buildmode=plugin jobcount.go) || exit 1
+(cd ../../mrapps && go build $RACE -buildmode=plugin early_exit.go) || exit 1
+(cd ../../mrapps && go build $RACE -buildmode=plugin crash.go) || exit 1
+(cd ../../mrapps && go build $RACE -buildmode=plugin nocrash.go) || exit 1
+(cd .. && go build $RACE mrcoordinator.go) || exit 1
+(cd .. && go build $RACE mrworker.go) || exit 1
+(cd .. && go build $RACE mrsequential.go) || exit 1
+
+failed_any=0
+
+#########################################################
+# first word-count
+
+# generate the correct output
+../mrsequential ../../mrapps/wc.so ../pg*txt || exit 1
+sort mr-out-0 > mr-correct-wc.txt
+rm -f mr-out*
+
+echo '***' Starting wc test.
+
+timeout -k 2s 180s ../mrcoordinator ../pg*txt &
+pid=$!
+
+# give the coordinator time to create the sockets.
+sleep 1
+
+# start multiple workers.
+timeout -k 2s 180s ../mrworker ../../mrapps/wc.so &
+timeout -k 2s 180s ../mrworker ../../mrapps/wc.so &
+timeout -k 2s 180s ../mrworker ../../mrapps/wc.so &
+
+# wait for the coordinator to exit.
+wait $pid
+
+# since workers are required to exit when a job is completely finished,
+# and not before, that means the job has finished.
+sort mr-out* | grep . > mr-wc-all
+if cmp mr-wc-all mr-correct-wc.txt
+then
+  echo '---' wc test: PASS
+else
+  echo '---' wc output is not the same as mr-correct-wc.txt
+  echo '---' wc test: FAIL
+  failed_any=1
+fi
+
+# wait for remaining workers and coordinator to exit.
+wait
+
+#########################################################
+# now indexer
+rm -f mr-*
+
+# generate the correct output
+../mrsequential ../../mrapps/indexer.so ../pg*txt || exit 1
+sort mr-out-0 > mr-correct-indexer.txt
+rm -f mr-out*
+
+echo '***' Starting indexer test.
+
+timeout -k 2s 180s ../mrcoordinator ../pg*txt &
+sleep 1
+
+# start multiple workers
+timeout -k 2s 180s ../mrworker ../../mrapps/indexer.so &
+timeout -k 2s 180s ../mrworker ../../mrapps/indexer.so
+
+sort mr-out* | grep . > mr-indexer-all
+if cmp mr-indexer-all mr-correct-indexer.txt
+then
+  echo '---' indexer test: PASS
+else
+  echo '---' indexer output is not the same as mr-correct-indexer.txt
+  echo '---' indexer test: FAIL
+  failed_any=1
+fi
+
+wait
+
+#########################################################
+echo '***' Starting map parallelism test.
+
+rm -f mr-*
+
+timeout -k 2s 180s ../mrcoordinator ../pg*txt &
+sleep 1
+
+timeout -k 2s 180s ../mrworker ../../mrapps/mtiming.so &
+timeout -k 2s 180s ../mrworker ../../mrapps/mtiming.so
+
+NT=`cat mr-out* | grep '^times-' | wc -l | sed 's/ //g'`
+if [ "$NT" != "2" ]
+then
+  echo '---' saw "$NT" workers rather than 2
+  echo '---' map parallelism test: FAIL
+  failed_any=1
+fi
+
+if cat mr-out* | grep '^parallel.* 2' > /dev/null
+then
+  echo '---' map parallelism test: PASS
+else
+  echo '---' map workers did not run in parallel
+  echo '---' map parallelism test: FAIL
+  failed_any=1
+fi
+
+wait
+
+
+#########################################################
+echo '***' Starting reduce parallelism test.
+
+rm -f mr-*
+
+timeout -k 2s 180s ../mrcoordinator ../pg*txt &
+sleep 1
+
+timeout -k 2s 180s ../mrworker ../../mrapps/rtiming.so &
+timeout -k 2s 180s ../mrworker ../../mrapps/rtiming.so
+
+NT=`cat mr-out* | grep '^[a-z] 2' | wc -l | sed 's/ //g'`
+if [ "$NT" -lt "2" ]
+then
+  echo '---' too few parallel reduces.
+  echo '---' reduce parallelism test: FAIL
+  failed_any=1
+else
+  echo '---' reduce parallelism test: PASS
+fi
+
+wait
+
+#########################################################
+echo '***' Starting job count test.
+
+rm -f mr-*
+
+timeout -k 2s 180s ../mrcoordinator ../pg*txt &
+sleep 1
+
+timeout -k 2s 180s ../mrworker ../../mrapps/jobcount.so &
+timeout -k 2s 180s ../mrworker ../../mrapps/jobcount.so
+timeout -k 2s 180s ../mrworker ../../mrapps/jobcount.so &
+timeout -k 2s 180s ../mrworker ../../mrapps/jobcount.so
+
+NT=`cat mr-out* | awk '{print $2}'`
+if [ "$NT" -ne "8" ]
+then
+  echo '---' map jobs ran incorrect number of times "($NT != 8)"
+  echo '---' job count test: FAIL
+  failed_any=1
+else
+  echo '---' job count test: PASS
+fi
+
+wait
+
+#########################################################
+# test whether any worker or coordinator exits before the
+# task has completed (i.e., all output files have been finalized)
+rm -f mr-*
+
+echo '***' Starting early exit test.
+
+timeout -k 2s 180s ../mrcoordinator ../pg*txt &
+
+# give the coordinator time to create the sockets.
+sleep 1
+
+# start multiple workers.
+timeout -k 2s 180s ../mrworker ../../mrapps/early_exit.so &
+timeout -k 2s 180s ../mrworker ../../mrapps/early_exit.so &
+timeout -k 2s 180s ../mrworker ../../mrapps/early_exit.so &
+
+# wait for any of the coord or workers to exit
+# `jobs` ensures that any completed old processes from other tests
+# are not waited upon
+jobs &> /dev/null
+wait -n
+
+# a process has exited. this means that the output should be finalized
+# otherwise, either a worker or the coordinator exited early
+sort mr-out* | grep . > mr-wc-all-initial
+
+# wait for remaining workers and coordinator to exit.
+wait
+
+# compare initial and final outputs
+sort mr-out* | grep . > mr-wc-all-final
+if cmp mr-wc-all-final mr-wc-all-initial
+then
+  echo '---' early exit test: PASS
+else
+  echo '---' output changed after first worker exited
+  echo '---' early exit test: FAIL
+  failed_any=1
+fi
+rm -f mr-*
+
+#########################################################
+echo '***' Starting crash test.
+
+# generate the correct output
+../mrsequential ../../mrapps/nocrash.so ../pg*txt || exit 1
+sort mr-out-0 > mr-correct-crash.txt
+rm -f mr-out*
+
+rm -f mr-done
+(timeout -k 2s 180s ../mrcoordinator ../pg*txt ; touch mr-done ) &
+sleep 1
+
+# start multiple workers
+timeout -k 2s 180s ../mrworker ../../mrapps/crash.so &
+
+# mimic rpc.go's coordinatorSock()
+SOCKNAME=/var/tmp/824-mr-`id -u`
+
+( while [ -e $SOCKNAME -a ! -f mr-done ]
+  do
+    timeout -k 2s 180s ../mrworker ../../mrapps/crash.so
+    sleep 1
+  done ) &
+
+( while [ -e $SOCKNAME -a ! -f mr-done ]
+  do
+    timeout -k 2s 180s ../mrworker ../../mrapps/crash.so
+    sleep 1
+  done ) &
+
+while [ -e $SOCKNAME -a ! -f mr-done ]
+do
+  timeout -k 2s 180s ../mrworker ../../mrapps/crash.so
+  sleep 1
+done
+
+wait
+
+rm $SOCKNAME
+sort mr-out* | grep . > mr-crash-all
+if cmp mr-crash-all mr-correct-crash.txt
+then
+  echo '---' crash test: PASS
+else
+  echo '---' crash output is not the same as mr-correct-crash.txt
+  echo '---' crash test: FAIL
+  failed_any=1
+fi
+
+#########################################################
+if [ $failed_any -eq 0 ]; then
+    echo '***' PASSED ALL TESTS
+else
+    echo '***' FAILED SOME TESTS
+    exit 1
+fi
--- a/src/main/viewd.go
+++ b/src/main/viewd.go
@ -0,0 +1,23 @@
+package main
+
+//
+// see directions in pbc.go
+//
+
+import "time"
+import "6.824/viewservice"
+import "os"
+import "fmt"
+
+func main() {
+	if len(os.Args) != 2 {
+		fmt.Printf("Usage: viewd port\n")
+		os.Exit(1)
+	}
+
+	viewservice.StartServer(os.Args[1])
+
+	for {
+		time.Sleep(100 * time.Second)
+	}
+}
--- a/src/models/kv.go
+++ b/src/models/kv.go
@ -0,0 +1,69 @@
+package models
+
+import "6.824/porcupine"
+import "fmt"
+import "sort"
+
+type KvInput struct {
+	Op    uint8 // 0 => get, 1 => put, 2 => append
+	Key   string
+	Value string
+}
+
+type KvOutput struct {
+	Value string
+}
+
+var KvModel = porcupine.Model{
+	Partition: func(history []porcupine.Operation) [][]porcupine.Operation {
+		m := make(map[string][]porcupine.Operation)
+		for _, v := range history {
+			key := v.Input.(KvInput).Key
+			m[key] = append(m[key], v)
+		}
+		keys := make([]string, 0, len(m))
+		for k := range m {
+			keys = append(keys, k)
+		}
+		sort.Strings(keys)
+		ret := make([][]porcupine.Operation, 0, len(keys))
+		for _, k := range keys {
+			ret = append(ret, m[k])
+		}
+		return ret
+	},
+	Init: func() interface{} {
+		// note: we are modeling a single key's value here;
+		// we're partitioning by key, so this is okay
+		return ""
+	},
+	Step: func(state, input, output interface{}) (bool, interface{}) {
+		inp := input.(KvInput)
+		out := output.(KvOutput)
+		st := state.(string)
+		if inp.Op == 0 {
+			// get
+			return out.Value == st, state
+		} else if inp.Op == 1 {
+			// put
+			return true, inp.Value
+		} else {
+			// append
+			return true, (st + inp.Value)
+		}
+	},
+	DescribeOperation: func(input, output interface{}) string {
+		inp := input.(KvInput)
+		out := output.(KvOutput)
+		switch inp.Op {
+		case 0:
+			return fmt.Sprintf("get('%s') -> '%s'", inp.Key, out.Value)
+		case 1:
+			return fmt.Sprintf("put('%s', '%s')", inp.Key, inp.Value)
+		case 2:
+			return fmt.Sprintf("append('%s', '%s')", inp.Key, inp.Value)
+		default:
+			return "<invalid>"
+		}
+	},
+}
--- a/src/mr/coordinator.go
+++ b/src/mr/coordinator.go
@ -0,0 +1,70 @@
+package mr
+
+import "log"
+import "net"
+import "os"
+import "net/rpc"
+import "net/http"
+
+
+type Coordinator struct {
+	// Your definitions here.
+
+}
+
+// Your code here -- RPC handlers for the worker to call.
+
+//
+// an example RPC handler.
+//
+// the RPC argument and reply types are defined in rpc.go.
+//
+func (c *Coordinator) Example(args *ExampleArgs, reply *ExampleReply) error {
+	reply.Y = args.X + 1
+	return nil
+}
+
+
+//
+// start a thread that listens for RPCs from worker.go
+//
+func (c *Coordinator) server() {
+	rpc.Register(c)
+	rpc.HandleHTTP()
+	//l, e := net.Listen("tcp", ":1234")
+	sockname := coordinatorSock()
+	os.Remove(sockname)
+	l, e := net.Listen("unix", sockname)
+	if e != nil {
+		log.Fatal("listen error:", e)
+	}
+	go http.Serve(l, nil)
+}
+
+//
+// main/mrcoordinator.go calls Done() periodically to find out
+// if the entire job has finished.
+//
+func (c *Coordinator) Done() bool {
+	ret := false
+
+	// Your code here.
+
+
+	return ret
+}
+
+//
+// create a Coordinator.
+// main/mrcoordinator.go calls this function.
+// nReduce is the number of reduce tasks to use.
+//
+func MakeCoordinator(files []string, nReduce int) *Coordinator {
+	c := Coordinator{}
+
+	// Your code here.
+
+
+	c.server()
+	return &c
+}
--- a/src/mr/rpc.go
+++ b/src/mr/rpc.go
@ -0,0 +1,36 @@
+package mr
+
+//
+// RPC definitions.
+//
+// remember to capitalize all names.
+//
+
+import "os"
+import "strconv"
+
+//
+// example to show how to declare the arguments
+// and reply for an RPC.
+//
+
+type ExampleArgs struct {
+	X int
+}
+
+type ExampleReply struct {
+	Y int
+}
+
+// Add your RPC definitions here.
+
+
+// Cook up a unique-ish UNIX-domain socket name
+// in /var/tmp, for the coordinator.
+// Can't use the current directory since
+// Athena AFS doesn't support UNIX-domain sockets.
+func coordinatorSock() string {
+	s := "/var/tmp/824-mr-"
+	s += strconv.Itoa(os.Getuid())
+	return s
+}
--- a/src/mr/worker.go
+++ b/src/mr/worker.go
@ -0,0 +1,85 @@
+package mr
+
+import "fmt"
+import "log"
+import "net/rpc"
+import "hash/fnv"
+
+
+//
+// Map functions return a slice of KeyValue.
+//
+type KeyValue struct {
+	Key   string
+	Value string
+}
+
+//
+// use ihash(key) % NReduce to choose the reduce
+// task number for each KeyValue emitted by Map.
+//
+func ihash(key string) int {
+	h := fnv.New32a()
+	h.Write([]byte(key))
+	return int(h.Sum32() & 0x7fffffff)
+}
+
+
+//
+// main/mrworker.go calls this function.
+//
+func Worker(mapf func(string, string) []KeyValue,
+	reducef func(string, []string) string) {
+
+	// Your worker implementation here.
+
+	// uncomment to send the Example RPC to the coordinator.
+	// CallExample()
+
+}
+
+//
+// example function to show how to make an RPC call to the coordinator.
+//
+// the RPC argument and reply types are defined in rpc.go.
+//
+func CallExample() {
+
+	// declare an argument structure.
+	args := ExampleArgs{}
+
+	// fill in the argument(s).
+	args.X = 99
+
+	// declare a reply structure.
+	reply := ExampleReply{}
+
+	// send the RPC request, wait for the reply.
+	call("Coordinator.Example", &args, &reply)
+
+	// reply.Y should be 100.
+	fmt.Printf("reply.Y %v\n", reply.Y)
+}
+
+//
+// send an RPC request to the coordinator, wait for the response.
+// usually returns true.
+// returns false if something goes wrong.
+//
+func call(rpcname string, args interface{}, reply interface{}) bool {
+	// c, err := rpc.DialHTTP("tcp", "127.0.0.1"+":1234")
+	sockname := coordinatorSock()
+	c, err := rpc.DialHTTP("unix", sockname)
+	if err != nil {
+		log.Fatal("dialing:", err)
+	}
+	defer c.Close()
+
+	err = c.Call(rpcname, args, reply)
+	if err == nil {
+		return true
+	}
+
+	fmt.Println(err)
+	return false
+}
--- a/src/mrapps/crash.go
+++ b/src/mrapps/crash.go
@ -0,0 +1,55 @@
+package main
+
+//
+// a MapReduce pseudo-application that sometimes crashes,
+// and sometimes takes a long time,
+// to test MapReduce's ability to recover.
+//
+// go build -buildmode=plugin crash.go
+//
+
+import "6.824/mr"
+import crand "crypto/rand"
+import "math/big"
+import "strings"
+import "os"
+import "sort"
+import "strconv"
+import "time"
+
+func maybeCrash() {
+	max := big.NewInt(1000)
+	rr, _ := crand.Int(crand.Reader, max)
+	if rr.Int64() < 330 {
+		// crash!
+		os.Exit(1)
+	} else if rr.Int64() < 660 {
+		// delay for a while.
+		maxms := big.NewInt(10 * 1000)
+		ms, _ := crand.Int(crand.Reader, maxms)
+		time.Sleep(time.Duration(ms.Int64()) * time.Millisecond)
+	}
+}
+
+func Map(filename string, contents string) []mr.KeyValue {
+	maybeCrash()
+
+	kva := []mr.KeyValue{}
+	kva = append(kva, mr.KeyValue{"a", filename})
+	kva = append(kva, mr.KeyValue{"b", strconv.Itoa(len(filename))})
+	kva = append(kva, mr.KeyValue{"c", strconv.Itoa(len(contents))})
+	kva = append(kva, mr.KeyValue{"d", "xyzzy"})
+	return kva
+}
+
+func Reduce(key string, values []string) string {
+	maybeCrash()
+
+	// sort values to ensure deterministic output.
+	vv := make([]string, len(values))
+	copy(vv, values)
+	sort.Strings(vv)
+
+	val := strings.Join(vv, " ")
+	return val
+}
--- a/src/mrapps/early_exit.go
+++ b/src/mrapps/early_exit.go
@ -0,0 +1,40 @@
+package main
+
+//
+// a word-count application "plugin" for MapReduce.
+//
+// go build -buildmode=plugin wc_long.go
+//
+
+import (
+	"strconv"
+	"strings"
+	"time"
+
+	"6.824/mr"
+)
+
+//
+// The map function is called once for each file of input.
+// This map function just returns 1 for each file
+//
+func Map(filename string, contents string) []mr.KeyValue {
+	kva := []mr.KeyValue{}
+	kva = append(kva, mr.KeyValue{filename, "1"})
+	return kva
+}
+
+//
+// The reduce function is called once for each key generated by the
+// map tasks, with a list of all the values created for that key by
+// any map task.
+//
+func Reduce(key string, values []string) string {
+	// some reduce tasks sleep for a long time; potentially seeing if
+	// a worker will accidentally exit early
+	if strings.Contains(key, "sherlock") || strings.Contains(key, "tom") {
+		time.Sleep(time.Duration(3 * time.Second))
+	}
+	// return the number of occurrences of this file.
+	return strconv.Itoa(len(values))
+}
--- a/src/mrapps/indexer.go
+++ b/src/mrapps/indexer.go
@ -0,0 +1,39 @@
+package main
+
+//
+// an indexing application "plugin" for MapReduce.
+//
+// go build -buildmode=plugin indexer.go
+//
+
+import "fmt"
+import "6.824/mr"
+
+import "strings"
+import "unicode"
+import "sort"
+
+// The mapping function is called once for each piece of the input.
+// In this framework, the key is the name of the file that is being processed,
+// and the value is the file's contents. The return value should be a slice of
+// key/value pairs, each represented by a mr.KeyValue.
+func Map(document string, value string) (res []mr.KeyValue) {
+	m := make(map[string]bool)
+	words := strings.FieldsFunc(value, func(x rune) bool { return !unicode.IsLetter(x) })
+	for _, w := range words {
+		m[w] = true
+	}
+	for w := range m {
+		kv := mr.KeyValue{w, document}
+		res = append(res, kv)
+	}
+	return
+}
+
+// The reduce function is called once for each key generated by Map, with a
+// list of that key's string value (merged across all inputs). The return value
+// should be a single output value for that key.
+func Reduce(key string, values []string) string {
+	sort.Strings(values)
+	return fmt.Sprintf("%d %s", len(values), strings.Join(values, ","))
+}
--- a/src/mrapps/jobcount.go
+++ b/src/mrapps/jobcount.go
@ -0,0 +1,46 @@
+package main
+
+//
+// a MapReduce pseudo-application that counts the number of times map/reduce
+// tasks are run, to test whether jobs are assigned multiple times even when
+// there is no failure.
+//
+// go build -buildmode=plugin crash.go
+//
+
+import "6.824/mr"
+import "math/rand"
+import "strings"
+import "strconv"
+import "time"
+import "fmt"
+import "os"
+import "io/ioutil"
+
+var count int
+
+func Map(filename string, contents string) []mr.KeyValue {
+	me := os.Getpid()
+	f := fmt.Sprintf("mr-worker-jobcount-%d-%d", me, count)
+	count++
+	err := ioutil.WriteFile(f, []byte("x"), 0666)
+	if err != nil {
+		panic(err)
+	}
+	time.Sleep(time.Duration(2000+rand.Intn(3000)) * time.Millisecond)
+	return []mr.KeyValue{mr.KeyValue{"a", "x"}}
+}
+
+func Reduce(key string, values []string) string {
+	files, err := ioutil.ReadDir(".")
+	if err != nil {
+		panic(err)
+	}
+	invocations := 0
+	for _, f := range files {
+		if strings.HasPrefix(f.Name(), "mr-worker-jobcount") {
+			invocations++
+		}
+	}
+	return strconv.Itoa(invocations)
+}
--- a/src/mrapps/mtiming.go
+++ b/src/mrapps/mtiming.go
@ -0,0 +1,91 @@
+package main
+
+//
+// a MapReduce pseudo-application to test that workers
+// execute map tasks in parallel.
+//
+// go build -buildmode=plugin mtiming.go
+//
+
+import "6.824/mr"
+import "strings"
+import "fmt"
+import "os"
+import "syscall"
+import "time"
+import "sort"
+import "io/ioutil"
+
+func nparallel(phase string) int {
+	// create a file so that other workers will see that
+	// we're running at the same time as them.
+	pid := os.Getpid()
+	myfilename := fmt.Sprintf("mr-worker-%s-%d", phase, pid)
+	err := ioutil.WriteFile(myfilename, []byte("x"), 0666)
+	if err != nil {
+		panic(err)
+	}
+
+	// are any other workers running?
+	// find their PIDs by scanning directory for mr-worker-XXX files.
+	dd, err := os.Open(".")
+	if err != nil {
+		panic(err)
+	}
+	names, err := dd.Readdirnames(1000000)
+	if err != nil {
+		panic(err)
+	}
+	ret := 0
+	for _, name := range names {
+		var xpid int
+		pat := fmt.Sprintf("mr-worker-%s-%%d", phase)
+		n, err := fmt.Sscanf(name, pat, &xpid)
+		if n == 1 && err == nil {
+			err := syscall.Kill(xpid, 0)
+			if err == nil {
+				// if err == nil, xpid is alive.
+				ret += 1
+			}
+		}
+	}
+	dd.Close()
+
+	time.Sleep(1 * time.Second)
+
+	err = os.Remove(myfilename)
+	if err != nil {
+		panic(err)
+	}
+
+	return ret
+}
+
+func Map(filename string, contents string) []mr.KeyValue {
+	t0 := time.Now()
+	ts := float64(t0.Unix()) + (float64(t0.Nanosecond()) / 1000000000.0)
+	pid := os.Getpid()
+
+	n := nparallel("map")
+
+	kva := []mr.KeyValue{}
+	kva = append(kva, mr.KeyValue{
+		fmt.Sprintf("times-%v", pid),
+		fmt.Sprintf("%.1f", ts)})
+	kva = append(kva, mr.KeyValue{
+		fmt.Sprintf("parallel-%v", pid),
+		fmt.Sprintf("%d", n)})
+	return kva
+}
+
+func Reduce(key string, values []string) string {
+	//n := nparallel("reduce")
+
+	// sort values to ensure deterministic output.
+	vv := make([]string, len(values))
+	copy(vv, values)
+	sort.Strings(vv)
+
+	val := strings.Join(vv, " ")
+	return val
+}
--- a/src/mrapps/nocrash.go
+++ b/src/mrapps/nocrash.go
@ -0,0 +1,47 @@
+package main
+
+//
+// same as crash.go but doesn't actually crash.
+//
+// go build -buildmode=plugin nocrash.go
+//
+
+import "6.824/mr"
+import crand "crypto/rand"
+import "math/big"
+import "strings"
+import "os"
+import "sort"
+import "strconv"
+
+func maybeCrash() {
+	max := big.NewInt(1000)
+	rr, _ := crand.Int(crand.Reader, max)
+	if false && rr.Int64() < 500 {
+		// crash!
+		os.Exit(1)
+	}
+}
+
+func Map(filename string, contents string) []mr.KeyValue {
+	maybeCrash()
+
+	kva := []mr.KeyValue{}
+	kva = append(kva, mr.KeyValue{"a", filename})
+	kva = append(kva, mr.KeyValue{"b", strconv.Itoa(len(filename))})
+	kva = append(kva, mr.KeyValue{"c", strconv.Itoa(len(contents))})
+	kva = append(kva, mr.KeyValue{"d", "xyzzy"})
+	return kva
+}
+
+func Reduce(key string, values []string) string {
+	maybeCrash()
+
+	// sort values to ensure deterministic output.
+	vv := make([]string, len(values))
+	copy(vv, values)
+	sort.Strings(vv)
+
+	val := strings.Join(vv, " ")
+	return val
+}
--- a/src/mrapps/rtiming.go
+++ b/src/mrapps/rtiming.go
@ -0,0 +1,84 @@
+package main
+
+//
+// a MapReduce pseudo-application to test that workers
+// execute reduce tasks in parallel.
+//
+// go build -buildmode=plugin rtiming.go
+//
+
+import "6.824/mr"
+import "fmt"
+import "os"
+import "syscall"
+import "time"
+import "io/ioutil"
+
+func nparallel(phase string) int {
+	// create a file so that other workers will see that
+	// we're running at the same time as them.
+	pid := os.Getpid()
+	myfilename := fmt.Sprintf("mr-worker-%s-%d", phase, pid)
+	err := ioutil.WriteFile(myfilename, []byte("x"), 0666)
+	if err != nil {
+		panic(err)
+	}
+
+	// are any other workers running?
+	// find their PIDs by scanning directory for mr-worker-XXX files.
+	dd, err := os.Open(".")
+	if err != nil {
+		panic(err)
+	}
+	names, err := dd.Readdirnames(1000000)
+	if err != nil {
+		panic(err)
+	}
+	ret := 0
+	for _, name := range names {
+		var xpid int
+		pat := fmt.Sprintf("mr-worker-%s-%%d", phase)
+		n, err := fmt.Sscanf(name, pat, &xpid)
+		if n == 1 && err == nil {
+			err := syscall.Kill(xpid, 0)
+			if err == nil {
+				// if err == nil, xpid is alive.
+				ret += 1
+			}
+		}
+	}
+	dd.Close()
+
+	time.Sleep(1 * time.Second)
+
+	err = os.Remove(myfilename)
+	if err != nil {
+		panic(err)
+	}
+
+	return ret
+}
+
+func Map(filename string, contents string) []mr.KeyValue {
+
+	kva := []mr.KeyValue{}
+	kva = append(kva, mr.KeyValue{"a", "1"})
+	kva = append(kva, mr.KeyValue{"b", "1"})
+	kva = append(kva, mr.KeyValue{"c", "1"})
+	kva = append(kva, mr.KeyValue{"d", "1"})
+	kva = append(kva, mr.KeyValue{"e", "1"})
+	kva = append(kva, mr.KeyValue{"f", "1"})
+	kva = append(kva, mr.KeyValue{"g", "1"})
+	kva = append(kva, mr.KeyValue{"h", "1"})
+	kva = append(kva, mr.KeyValue{"i", "1"})
+	kva = append(kva, mr.KeyValue{"j", "1"})
+	return kva
+}
+
+func Reduce(key string, values []string) string {
+	n := nparallel("reduce")
+
+	val := fmt.Sprintf("%d", n)
+
+	return val
+}
--- a/src/mrapps/wc.go
+++ b/src/mrapps/wc.go
@ -0,0 +1,44 @@
+package main
+
+//
+// a word-count application "plugin" for MapReduce.
+//
+// go build -buildmode=plugin wc.go
+//
+
+import "6.824/mr"
+import "unicode"
+import "strings"
+import "strconv"
+
+//
+// The map function is called once for each file of input. The first
+// argument is the name of the input file, and the second is the
+// file's complete contents. You should ignore the input file name,
+// and look only at the contents argument. The return value is a slice
+// of key/value pairs.
+//
+func Map(filename string, contents string) []mr.KeyValue {
+	// function to detect word separators.
+	ff := func(r rune) bool { return !unicode.IsLetter(r) }
+
+	// split contents into an array of words.
+	words := strings.FieldsFunc(contents, ff)
+
+	kva := []mr.KeyValue{}
+	for _, w := range words {
+		kv := mr.KeyValue{w, "1"}
+		kva = append(kva, kv)
+	}
+	return kva
+}
+
+//
+// The reduce function is called once for each key generated by the
+// map tasks, with a list of all the values created for that key by
+// any map task.
+//
+func Reduce(key string, values []string) string {
+	// return the number of occurrences of this word.
+	return strconv.Itoa(len(values))
+}
--- a/src/porcupine/bitset.go
+++ b/src/porcupine/bitset.go
@ -0,0 +1,72 @@
+package porcupine
+
+import "math/bits"
+
+type bitset []uint64
+
+// data layout:
+// bits 0-63 are in data[0], the next are in data[1], etc.
+
+func newBitset(bits uint) bitset {
+	extra := uint(0)
+	if bits%64 != 0 {
+		extra = 1
+	}
+	chunks := bits/64 + extra
+	return bitset(make([]uint64, chunks))
+}
+
+func (b bitset) clone() bitset {
+	dataCopy := make([]uint64, len(b))
+	copy(dataCopy, b)
+	return bitset(dataCopy)
+}
+
+func bitsetIndex(pos uint) (uint, uint) {
+	return pos / 64, pos % 64
+}
+
+func (b bitset) set(pos uint) bitset {
+	major, minor := bitsetIndex(pos)
+	b[major] |= (1 << minor)
+	return b
+}
+
+func (b bitset) clear(pos uint) bitset {
+	major, minor := bitsetIndex(pos)
+	b[major] &^= (1 << minor)
+	return b
+}
+
+func (b bitset) get(pos uint) bool {
+	major, minor := bitsetIndex(pos)
+	return b[major]&(1<<minor) != 0
+}
+
+func (b bitset) popcnt() uint {
+	total := 0
+	for _, v := range b {
+		total += bits.OnesCount64(v)
+	}
+	return uint(total)
+}
+
+func (b bitset) hash() uint64 {
+	hash := uint64(b.popcnt())
+	for _, v := range b {
+		hash ^= v
+	}
+	return hash
+}
+
+func (b bitset) equals(b2 bitset) bool {
+	if len(b) != len(b2) {
+		return false
+	}
+	for i := range b {
+		if b[i] != b2[i] {
+			return false
+		}
+	}
+	return true
+}
--- a/src/porcupine/checker.go
+++ b/src/porcupine/checker.go
@ -0,0 +1,373 @@
+package porcupine
+
+import (
+	"sort"
+	"sync/atomic"
+	"time"
+)
+
+type entryKind bool
+
+const (
+	callEntry   entryKind = false
+	returnEntry           = true
+)
+
+type entry struct {
+	kind     entryKind
+	value    interface{}
+	id       int
+	time     int64
+	clientId int
+}
+
+type linearizationInfo struct {
+	history               [][]entry // for each partition, a list of entries
+	partialLinearizations [][][]int // for each partition, a set of histories (list of ids)
+}
+
+type byTime []entry
+
+func (a byTime) Len() int {
+	return len(a)
+}
+
+func (a byTime) Swap(i, j int) {
+	a[i], a[j] = a[j], a[i]
+}
+
+func (a byTime) Less(i, j int) bool {
+	if a[i].time != a[j].time {
+		return a[i].time < a[j].time
+	}
+	// if the timestamps are the same, we need to make sure we order calls
+	// before returns
+	return a[i].kind == callEntry && a[j].kind == returnEntry
+}
+
+func makeEntries(history []Operation) []entry {
+	var entries []entry = nil
+	id := 0
+	for _, elem := range history {
+		entries = append(entries, entry{
+			callEntry, elem.Input, id, elem.Call, elem.ClientId})
+		entries = append(entries, entry{
+			returnEntry, elem.Output, id, elem.Return, elem.ClientId})
+		id++
+	}
+	sort.Sort(byTime(entries))
+	return entries
+}
+
+type node struct {
+	value interface{}
+	match *node // call if match is nil, otherwise return
+	id    int
+	next  *node
+	prev  *node
+}
+
+func insertBefore(n *node, mark *node) *node {
+	if mark != nil {
+		beforeMark := mark.prev
+		mark.prev = n
+		n.next = mark
+		if beforeMark != nil {
+			n.prev = beforeMark
+			beforeMark.next = n
+		}
+	}
+	return n
+}
+
+func length(n *node) int {
+	l := 0
+	for n != nil {
+		n = n.next
+		l++
+	}
+	return l
+}
+
+func renumber(events []Event) []Event {
+	var e []Event
+	m := make(map[int]int) // renumbering
+	id := 0
+	for _, v := range events {
+		if r, ok := m[v.Id]; ok {
+			e = append(e, Event{v.ClientId, v.Kind, v.Value, r})
+		} else {
+			e = append(e, Event{v.ClientId, v.Kind, v.Value, id})
+			m[v.Id] = id
+			id++
+		}
+	}
+	return e
+}
+
+func convertEntries(events []Event) []entry {
+	var entries []entry
+	for i, elem := range events {
+		kind := callEntry
+		if elem.Kind == ReturnEvent {
+			kind = returnEntry
+		}
+		// use index as "time"
+		entries = append(entries, entry{kind, elem.Value, elem.Id, int64(i), elem.ClientId})
+	}
+	return entries
+}
+
+func makeLinkedEntries(entries []entry) *node {
+	var root *node = nil
+	match := make(map[int]*node)
+	for i := len(entries) - 1; i >= 0; i-- {
+		elem := entries[i]
+		if elem.kind == returnEntry {
+			entry := &node{value: elem.value, match: nil, id: elem.id}
+			match[elem.id] = entry
+			insertBefore(entry, root)
+			root = entry
+		} else {
+			entry := &node{value: elem.value, match: match[elem.id], id: elem.id}
+			insertBefore(entry, root)
+			root = entry
+		}
+	}
+	return root
+}
+
+type cacheEntry struct {
+	linearized bitset
+	state      interface{}
+}
+
+func cacheContains(model Model, cache map[uint64][]cacheEntry, entry cacheEntry) bool {
+	for _, elem := range cache[entry.linearized.hash()] {
+		if entry.linearized.equals(elem.linearized) && model.Equal(entry.state, elem.state) {
+			return true
+		}
+	}
+	return false
+}
+
+type callsEntry struct {
+	entry *node
+	state interface{}
+}
+
+func lift(entry *node) {
+	entry.prev.next = entry.next
+	entry.next.prev = entry.prev
+	match := entry.match
+	match.prev.next = match.next
+	if match.next != nil {
+		match.next.prev = match.prev
+	}
+}
+
+func unlift(entry *node) {
+	match := entry.match
+	match.prev.next = match
+	if match.next != nil {
+		match.next.prev = match
+	}
+	entry.prev.next = entry
+	entry.next.prev = entry
+}
+
+func checkSingle(model Model, history []entry, computePartial bool, kill *int32) (bool, []*[]int) {
+	entry := makeLinkedEntries(history)
+	n := length(entry) / 2
+	linearized := newBitset(uint(n))
+	cache := make(map[uint64][]cacheEntry) // map from hash to cache entry
+	var calls []callsEntry
+	// longest linearizable prefix that includes the given entry
+	longest := make([]*[]int, n)
+
+	state := model.Init()
+	headEntry := insertBefore(&node{value: nil, match: nil, id: -1}, entry)
+	for headEntry.next != nil {
+		if atomic.LoadInt32(kill) != 0 {
+			return false, longest
+		}
+		if entry.match != nil {
+			matching := entry.match // the return entry
+			ok, newState := model.Step(state, entry.value, matching.value)
+			if ok {
+				newLinearized := linearized.clone().set(uint(entry.id))
+				newCacheEntry := cacheEntry{newLinearized, newState}
+				if !cacheContains(model, cache, newCacheEntry) {
+					hash := newLinearized.hash()
+					cache[hash] = append(cache[hash], newCacheEntry)
+					calls = append(calls, callsEntry{entry, state})
+					state = newState
+					linearized.set(uint(entry.id))
+					lift(entry)
+					entry = headEntry.next
+				} else {
+					entry = entry.next
+				}
+			} else {
+				entry = entry.next
+			}
+		} else {
+			if len(calls) == 0 {
+				return false, longest
+			}
+			// longest
+			if computePartial {
+				callsLen := len(calls)
+				var seq []int = nil
+				for _, v := range calls {
+					if longest[v.entry.id] == nil || callsLen > len(*longest[v.entry.id]) {
+						// create seq lazily
+						if seq == nil {
+							seq = make([]int, len(calls))
+							for i, v := range calls {
+								seq[i] = v.entry.id
+							}
+						}
+						longest[v.entry.id] = &seq
+					}
+				}
+			}
+			callsTop := calls[len(calls)-1]
+			entry = callsTop.entry
+			state = callsTop.state
+			linearized.clear(uint(entry.id))
+			calls = calls[:len(calls)-1]
+			unlift(entry)
+			entry = entry.next
+		}
+	}
+	// longest linearization is the complete linearization, which is calls
+	seq := make([]int, len(calls))
+	for i, v := range calls {
+		seq[i] = v.entry.id
+	}
+	for i := 0; i < n; i++ {
+		longest[i] = &seq
+	}
+	return true, longest
+}
+
+func fillDefault(model Model) Model {
+	if model.Partition == nil {
+		model.Partition = NoPartition
+	}
+	if model.PartitionEvent == nil {
+		model.PartitionEvent = NoPartitionEvent
+	}
+	if model.Equal == nil {
+		model.Equal = ShallowEqual
+	}
+	if model.DescribeOperation == nil {
+		model.DescribeOperation = DefaultDescribeOperation
+	}
+	if model.DescribeState == nil {
+		model.DescribeState = DefaultDescribeState
+	}
+	return model
+}
+
+func checkParallel(model Model, history [][]entry, computeInfo bool, timeout time.Duration) (CheckResult, linearizationInfo) {
+	ok := true
+	timedOut := false
+	results := make(chan bool, len(history))
+	longest := make([][]*[]int, len(history))
+	kill := int32(0)
+	for i, subhistory := range history {
+		go func(i int, subhistory []entry) {
+			ok, l := checkSingle(model, subhistory, computeInfo, &kill)
+			longest[i] = l
+			results <- ok
+		}(i, subhistory)
+	}
+	var timeoutChan <-chan time.Time
+	if timeout > 0 {
+		timeoutChan = time.After(timeout)
+	}
+	count := 0
+loop:
+	for {
+		select {
+		case result := <-results:
+			count++
+			ok = ok && result
+			if !ok && !computeInfo {
+				atomic.StoreInt32(&kill, 1)
+				break loop
+			}
+			if count >= len(history) {
+				break loop
+			}
+		case <-timeoutChan:
+			timedOut = true
+			atomic.StoreInt32(&kill, 1)
+			break loop // if we time out, we might get a false positive
+		}
+	}
+	var info linearizationInfo
+	if computeInfo {
+		// make sure we've waited for all goroutines to finish,
+		// otherwise we might race on access to longest[]
+		for count < len(history) {
+			<-results
+			count++
+		}
+		// return longest linearizable prefixes that include each history element
+		partialLinearizations := make([][][]int, len(history))
+		for i := 0; i < len(history); i++ {
+			var partials [][]int
+			// turn longest into a set of unique linearizations
+			set := make(map[*[]int]struct{})
+			for _, v := range longest[i] {
+				if v != nil {
+					set[v] = struct{}{}
+				}
+			}
+			for k := range set {
+				arr := make([]int, len(*k))
+				for i, v := range *k {
+					arr[i] = v
+				}
+				partials = append(partials, arr)
+			}
+			partialLinearizations[i] = partials
+		}
+		info.history = history
+		info.partialLinearizations = partialLinearizations
+	}
+	var result CheckResult
+	if !ok {
+		result = Illegal
+	} else {
+		if timedOut {
+			result = Unknown
+		} else {
+			result = Ok
+		}
+	}
+	return result, info
+}
+
+func checkEvents(model Model, history []Event, verbose bool, timeout time.Duration) (CheckResult, linearizationInfo) {
+	model = fillDefault(model)
+	partitions := model.PartitionEvent(history)
+	l := make([][]entry, len(partitions))
+	for i, subhistory := range partitions {
+		l[i] = convertEntries(renumber(subhistory))
+	}
+	return checkParallel(model, l, verbose, timeout)
+}
+
+func checkOperations(model Model, history []Operation, verbose bool, timeout time.Duration) (CheckResult, linearizationInfo) {
+	model = fillDefault(model)
+	partitions := model.Partition(history)
+	l := make([][]entry, len(partitions))
+	for i, subhistory := range partitions {
+		l[i] = makeEntries(subhistory)
+	}
+	return checkParallel(model, l, verbose, timeout)
+}
--- a/src/porcupine/model.go
+++ b/src/porcupine/model.go
@ -0,0 +1,77 @@
+package porcupine
+
+import "fmt"
+
+type Operation struct {
+	ClientId int // optional, unless you want a visualization; zero-indexed
+	Input    interface{}
+	Call     int64 // invocation time
+	Output   interface{}
+	Return   int64 // response time
+}
+
+type EventKind bool
+
+const (
+	CallEvent   EventKind = false
+	ReturnEvent EventKind = true
+)
+
+type Event struct {
+	ClientId int // optional, unless you want a visualization; zero-indexed
+	Kind     EventKind
+	Value    interface{}
+	Id       int
+}
+
+type Model struct {
+	// Partition functions, such that a history is linearizable if and only
+	// if each partition is linearizable. If you don't want to implement
+	// this, you can always use the `NoPartition` functions implemented
+	// below.
+	Partition      func(history []Operation) [][]Operation
+	PartitionEvent func(history []Event) [][]Event
+	// Initial state of the system.
+	Init func() interface{}
+	// Step function for the system. Returns whether or not the system
+	// could take this step with the given inputs and outputs and also
+	// returns the new state. This should not mutate the existing state.
+	Step func(state interface{}, input interface{}, output interface{}) (bool, interface{})
+	// Equality on states. If you are using a simple data type for states,
+	// you can use the `ShallowEqual` function implemented below.
+	Equal func(state1, state2 interface{}) bool
+	// For visualization, describe an operation as a string.
+	// For example, "Get('x') -> 'y'".
+	DescribeOperation func(input interface{}, output interface{}) string
+	// For visualization purposes, describe a state as a string.
+	// For example, "{'x' -> 'y', 'z' -> 'w'}"
+	DescribeState func(state interface{}) string
+}
+
+func NoPartition(history []Operation) [][]Operation {
+	return [][]Operation{history}
+}
+
+func NoPartitionEvent(history []Event) [][]Event {
+	return [][]Event{history}
+}
+
+func ShallowEqual(state1, state2 interface{}) bool {
+	return state1 == state2
+}
+
+func DefaultDescribeOperation(input interface{}, output interface{}) string {
+	return fmt.Sprintf("%v -> %v", input, output)
+}
+
+func DefaultDescribeState(state interface{}) string {
+	return fmt.Sprintf("%v", state)
+}
+
+type CheckResult string
+
+const (
+	Unknown CheckResult = "Unknown" // timed out
+	Ok                  = "Ok"
+	Illegal             = "Illegal"
+)
--- a/src/porcupine/porcupine.go
+++ b/src/porcupine/porcupine.go
@ -0,0 +1,39 @@
+package porcupine
+
+import "time"
+
+func CheckOperations(model Model, history []Operation) bool {
+	res, _ := checkOperations(model, history, false, 0)
+	return res == Ok
+}
+
+// timeout = 0 means no timeout
+// if this operation times out, then a false positive is possible
+func CheckOperationsTimeout(model Model, history []Operation, timeout time.Duration) CheckResult {
+	res, _ := checkOperations(model, history, false, timeout)
+	return res
+}
+
+// timeout = 0 means no timeout
+// if this operation times out, then a false positive is possible
+func CheckOperationsVerbose(model Model, history []Operation, timeout time.Duration) (CheckResult, linearizationInfo) {
+	return checkOperations(model, history, true, timeout)
+}
+
+func CheckEvents(model Model, history []Event) bool {
+	res, _ := checkEvents(model, history, false, 0)
+	return res == Ok
+}
+
+// timeout = 0 means no timeout
+// if this operation times out, then a false positive is possible
+func CheckEventsTimeout(model Model, history []Event, timeout time.Duration) CheckResult {
+	res, _ := checkEvents(model, history, false, timeout)
+	return res
+}
+
+// timeout = 0 means no timeout
+// if this operation times out, then a false positive is possible
+func CheckEventsVerbose(model Model, history []Event, timeout time.Duration) (CheckResult, linearizationInfo) {
+	return checkEvents(model, history, true, timeout)
+}
--- a/src/porcupine/visualization.go
+++ b/src/porcupine/visualization.go
@ -0,0 +1,897 @@
+package porcupine
+
+import (
+	"encoding/json"
+	"fmt"
+	"io"
+	"os"
+	"sort"
+)
+
+type historyElement struct {
+	ClientId    int
+	Start       int64
+	End         int64
+	Description string
+}
+
+type linearizationStep struct {
+	Index            int
+	StateDescription string
+}
+
+type partialLinearization = []linearizationStep
+
+type partitionVisualizationData struct {
+	History               []historyElement
+	PartialLinearizations []partialLinearization
+	Largest               map[int]int
+}
+
+type visualizationData = []partitionVisualizationData
+
+func computeVisualizationData(model Model, info linearizationInfo) visualizationData {
+	model = fillDefault(model)
+	data := make(visualizationData, len(info.history))
+	for partition := 0; partition < len(info.history); partition++ {
+		// history
+		n := len(info.history[partition]) / 2
+		history := make([]historyElement, n)
+		callValue := make(map[int]interface{})
+		returnValue := make(map[int]interface{})
+		for _, elem := range info.history[partition] {
+			switch elem.kind {
+			case callEntry:
+				history[elem.id].ClientId = elem.clientId
+				history[elem.id].Start = elem.time
+				callValue[elem.id] = elem.value
+			case returnEntry:
+				history[elem.id].End = elem.time
+				history[elem.id].Description = model.DescribeOperation(callValue[elem.id], elem.value)
+				returnValue[elem.id] = elem.value
+			}
+		}
+		// partial linearizations
+		largestIndex := make(map[int]int)
+		largestSize := make(map[int]int)
+		linearizations := make([]partialLinearization, len(info.partialLinearizations[partition]))
+		partials := info.partialLinearizations[partition]
+		sort.Slice(partials, func(i, j int) bool {
+			return len(partials[i]) > len(partials[j])
+		})
+		for i, partial := range partials {
+			linearization := make(partialLinearization, len(partial))
+			state := model.Init()
+			for j, histId := range partial {
+				var ok bool
+				ok, state = model.Step(state, callValue[histId], returnValue[histId])
+				if ok != true {
+					panic("valid partial linearization returned non-ok result from model step")
+				}
+				stateDesc := model.DescribeState(state)
+				linearization[j] = linearizationStep{histId, stateDesc}
+				if largestSize[histId] < len(partial) {
+					largestSize[histId] = len(partial)
+					largestIndex[histId] = i
+				}
+			}
+			linearizations[i] = linearization
+		}
+		data[partition] = partitionVisualizationData{
+			History:               history,
+			PartialLinearizations: linearizations,
+			Largest:               largestIndex,
+		}
+	}
+	return data
+}
+
+func Visualize(model Model, info linearizationInfo, output io.Writer) error {
+	data := computeVisualizationData(model, info)
+	jsonData, err := json.Marshal(data)
+	if err != nil {
+		return err
+	}
+	_, err = fmt.Fprintf(output, html, jsonData)
+	if err != nil {
+		return err
+	}
+	return nil
+}
+
+func VisualizePath(model Model, info linearizationInfo, path string) error {
+	f, err := os.Create(path)
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+	return Visualize(model, info, f)
+}
+
+const html = `
+<!DOCTYPE html>
+<html>
+  <head><title>Porcupine</title>
+    <style>
+html {
+  font-family: Helvetica, Arial, sans-serif;
+  font-size: 16px;
+}
+
+text {
+  dominant-baseline: middle;
+}
+
+#legend {
+  position: fixed;
+  left: 10px;
+  top: 10px;
+  background-color: rgba(255, 255, 255, 0.5);
+  backdrop-filter: blur(3px);
+  padding: 5px 2px 1px 2px;
+  border-radius: 4px;
+}
+
+#canvas {
+  margin-top: 45px;
+}
+
+#calc {
+  width: 0;
+  height: 0;
+  visibility: hidden;
+}
+
+.bg {
+  fill: transparent;
+}
+
+.divider {
+  stroke: #ccc;
+  stroke-width: 1;
+}
+
+.history-rect {
+  stroke: #888;
+  stroke-width: 1;
+  fill: #42d1f5;
+}
+
+.link {
+  fill: #206475;
+  cursor: pointer;
+}
+
+.selected {
+  stroke-width: 5;
+}
+
+.target-rect {
+  opacity: 0;
+}
+
+.history-text {
+  font-size: 0.9rem;
+  font-family: Menlo, Courier New, monospace;
+}
+
+.hidden {
+  opacity: 0.2;
+}
+
+.hidden line {
+  opacity: 0.5; /* note: this is multiplicative */
+}
+
+.linearization {
+  stroke: rgba(0, 0, 0, 0.5);
+}
+
+.linearization-invalid {
+  stroke: rgba(255, 0, 0, 0.5);
+}
+
+.linearization-point {
+  stroke-width: 5;
+}
+
+.linearization-line {
+  stroke-width: 2;
+}
+
+.tooltip {
+  position: absolute;
+  opacity: 0;
+  border: 1px solid #ccc;
+  background: white;
+  border-radius: 4px;
+  padding: 5px;
+  font-size: 0.8rem;
+}
+
+.inactive {
+  display: none;
+}
+    </style>
+  </head>
+  <body>
+    <div id="legend">
+      <svg xmlns="http://www.w3.org/2000/svg" width="660" height="20">
+        <text x="0" y="10">Clients</text>
+        <line x1="50" y1="0" x2="70" y2="20" stroke="#000" stroke-width="1"></line>
+        <text x="70" y="10">Time</text>
+        <line x1="110" y1="10" x2="200" y2="10" stroke="#000" stroke-width="2"></line>
+        <polygon points="200,5 200,15, 210,10" fill="#000"></polygon>
+        <rect x="300" y="5" width="10" height="10" fill="rgba(0, 0, 0, 0.5)"></rect>
+        <text x="315" y="10">Valid LP</text>
+        <rect x="400" y="5" width="10" height="10" fill="rgba(255, 0, 0, 0.5)"></rect>
+        <text x="415" y="10">Invalid LP</text>
+        <text x="520" y="10" id="jump-link" class="link">[ jump to first error ]</text>
+      </svg>
+    </div>
+    <div id="canvas">
+    </div>
+    <div id="calc">
+    </div>
+    <script>
+      'use strict'
+
+      const SVG_NS = 'http://www.w3.org/2000/svg'
+
+      function svgnew(tag, attrs) {
+        const el = document.createElementNS(SVG_NS, tag)
+        svgattr(el, attrs)
+        return el
+      }
+
+      function svgattr(el, attrs) {
+        if (attrs != null) {
+          for (var k in attrs) {
+            if (Object.prototype.hasOwnProperty.call(attrs, k)) {
+              el.setAttributeNS(null, k, attrs[k])
+            }
+          }
+        }
+      }
+
+      function svgattach(parent, child) {
+        return parent.appendChild(child)
+      }
+
+      function svgadd(el, tag, attrs) {
+        return svgattach(el, svgnew(tag, attrs))
+      }
+
+      function newArray(n, fn) {
+        const arr = new Array(n)
+        for (let i = 0; i < n; i++) {
+          arr[i] = fn(i)
+        }
+        return arr
+      }
+
+      function arrayEq(a, b) {
+        if (a === b) {
+          return true
+        }
+        if (a == null || b == null) {
+          return false
+        }
+        if (a.length != b.length) {
+          return false
+        }
+        for (let i = 0; i < a.length; i++) {
+          if (a[i] !== b[i]) {
+            return false
+          }
+        }
+        return true
+      }
+
+      function render(data) {
+        const PADDING = 10
+        const BOX_HEIGHT = 30
+        const BOX_SPACE = 15
+        const XOFF = 20
+        const EPSILON = 20
+        const LINE_BLEED = 5
+        const BOX_GAP = 20
+        const BOX_TEXT_PADDING = 10
+        const HISTORY_RECT_RADIUS = 4
+
+        let maxClient = -1
+        data.forEach(partition => {
+          partition['History'].forEach(el => {
+            maxClient = Math.max(maxClient, el['ClientId'])
+          })
+        })
+        const nClient = maxClient + 1
+
+        // Prepare some useful data to be used later:
+        // - Add a GID to each event
+        // - Create a mapping from GIDs back to events
+        // - Create a set of all timestamps
+        // - Create a set of all start timestamps
+        const allTimestamps = new Set()
+        const startTimestamps = new Set()
+        let gid = 0
+        const byGid = {}
+        data.forEach(partition => {
+          partition['History'].forEach(el => {
+            allTimestamps.add(el['Start'])
+            startTimestamps.add(el['Start'])
+            allTimestamps.add(el['End'])
+            // give elements GIDs
+            el['Gid'] = gid
+            byGid[gid] = el
+            gid++
+          })
+        })
+        let sortedTimestamps = Array.from(allTimestamps).sort((a, b) => a - b)
+
+        // This should not happen with "real" histories, but for certain edge
+        // cases, we need to deal with having multiple events share a start/end
+        // time. We solve this by tweaking the events that share the end time,
+        // updating the time to end+epsilon. In practice, rather than having to
+        // choose an epsilon, we choose to average the value with the next largest
+        // timestamp.
+        const nextTs = {}
+        for (let i = 0; i < sortedTimestamps.length-1; i++) {
+          nextTs[sortedTimestamps[i]] = sortedTimestamps[i+1]
+        }
+        data.forEach(partition => {
+          partition['History'].forEach(el => {
+            let end = el['End']
+            el['OriginalEnd'] = end // for display purposes
+            if (startTimestamps.has(end)) {
+              if (Object.prototype.hasOwnProperty.call(nextTs, end)) {
+                const tweaked = (end + nextTs[end])/2
+                el['End'] = tweaked
+                allTimestamps.add(tweaked)
+              }
+            }
+          })
+        })
+        // Update sortedTimestamps, because we created some new timestamps.
+        sortedTimestamps = Array.from(allTimestamps).sort((a, b) => a - b)
+
+        // Compute layout.
+        //
+        // We warp time to make it easier to see what's going on. We can think
+        // of there being a monotonically increasing mapping from timestamps to
+        // x-positions. This mapping should satisfy some criteria to make the
+        // visualization interpretable:
+        //
+        // - distinguishability: there should be some minimum distance between
+        // unequal timestamps
+        // - visible text: history boxes should be wide enough to fit the text
+        // they contain
+        // - enough space for LPs: history boxes should be wide enough to fit
+        // all linearization points that go through them, while maintaining
+        // readability of linearizations (where each LP in a sequence is spaced
+        // some minimum distance away from the previous one)
+        //
+        // Originally, I thought about this as a linear program:
+        //
+        // - variables for every unique timestamp, x_i = warp(timestamp_i)
+        // - objective: minimize sum x_i
+        // - constraint: non-negative
+        // - constraint: ordering + distinguishability, timestamp_i < timestamp_j -> x_i + EPS < x_j
+        // - constraint: visible text, size_text_j < x_{timestamp_j_end} - x_{timestamp_j_start}
+        // - constraint: linearization lines have points that fit within box, ...
+        //
+        // This used to actually be implemented using an LP solver (without the
+        // linearization point part, though that should be doable too), but
+        // then I realized it's possible to solve optimally using a greedy
+        // left-to-right scan in linear time.
+        //
+        // So that is what we do here. We optimally solve the above, and while
+        // doing so, also compute some useful information (e.g. x-positions of
+        // linearization points) that is useful later.
+        const xPos = {}
+        // Compute some information about history elements, sorted by end time;
+        // the most important information here is box width.
+        const byEnd = data.flatMap(partition =>
+          partition['History'].map(el => {
+            // compute width of the text inside the history element by actually
+            // drawing it (in a hidden div)
+            const scratch = document.getElementById('calc')
+            scratch.innerHTML = ''
+            const svg = svgadd(scratch, 'svg')
+            const text = svgadd(svg, 'text', {
+              'text-anchor': 'middle',
+              'class': 'history-text',
+            })
+            text.textContent = el['Description']
+            const bbox = text.getBBox()
+            const width = bbox.width + 2*BOX_TEXT_PADDING
+            return {
+              'start': el['Start'],
+              'end': el['End'],
+              'width': width,
+              'gid': el['Gid']
+            }
+          })
+        ).sort((a, b) => a.end - b.end)
+        // Some preprocessing for linearization points and illegal next
+        // linearizations. We need to figure out where exactly LPs end up
+        // as we go, so we can make sure event boxes are wide enough.
+        const eventToLinearizations = newArray(gid, () => []) // event -> [{index, position}]
+        const eventIllegalLast = newArray(gid, () => []) // event -> [index]
+        const allLinearizations = []
+        let lgid = 0
+        data.forEach(partition => {
+          partition['PartialLinearizations'].forEach(lin => {
+            const globalized = [] // linearization with global indexes instead of partition-local ones
+            const included = new Set() // for figuring out illegal next LPs
+            lin.forEach((id, position) => {
+              included.add(id['Index'])
+              const gid = partition['History'][id['Index']]['Gid']
+              globalized.push(gid)
+              eventToLinearizations[gid].push({'index': lgid, 'position': position})
+            })
+            allLinearizations.push(globalized)
+            let minEnd = Infinity
+            partition['History'].forEach((el, index) => {
+              if (!included.has(index)) {
+                minEnd = Math.min(minEnd, el['End'])
+              }
+            })
+            partition['History'].forEach((el, index) => {
+              if (!included.has(index) && el['Start'] < minEnd) {
+                eventIllegalLast[el['Gid']].push(lgid)
+              }
+            })
+            lgid++
+          })
+        })
+        const linearizationPositions = newArray(lgid, () => []) // [[xpos]]
+        // Okay, now we're ready to do the left-to-right scan.
+        // Solve timestamp -> xPos.
+        let eventIndex = 0
+        xPos[sortedTimestamps[0]] = 0 // positions start at 0
+        for (let i = 1; i < sortedTimestamps.length; i++) {
+          // left-to-right scan, finding minimum time we can use
+          const ts = sortedTimestamps[i]
+          // ensure some gap from last timestamp
+          let pos = xPos[sortedTimestamps[i-1]] + BOX_GAP
+          // ensure that text fits in boxes
+          while (eventIndex < byEnd.length && byEnd[eventIndex].end <= ts) {
+            // push our position as far as necessary to accommodate text in box
+            const event = byEnd[eventIndex]
+            const textEndPos = xPos[event.start] + event.width
+            pos = Math.max(pos, textEndPos)
+            // Ensure that LPs fit in box.
+            //
+            // When placing the end of an event, for all partial linearizations
+            // that include that event, for the prefix that comes before that event,
+            // all their start points must have been placed already, so we can figure
+            // out the minimum width that the box needs to be to accommodate the LP.
+            eventToLinearizations[event.gid]
+              .concat(eventIllegalLast[event.gid].map(index => {
+                return {
+                  'index': index,
+                  'position': allLinearizations[index].length-1,
+                }
+              }))
+              .forEach(li => {
+                const {index, position} = li
+                for (let i = linearizationPositions[index].length; i <= position; i++) {
+                  // determine past points
+                  let prev = null
+                  if (linearizationPositions[index].length != 0) {
+                    prev = linearizationPositions[index][i-1]
+                  }
+                  const nextGid = allLinearizations[index][i]
+                  let nextPos
+                  if (prev === null) {
+                    nextPos = xPos[byGid[nextGid]['Start']]
+                  } else {
+                    nextPos = Math.max(xPos[byGid[nextGid]['Start']], prev + EPSILON)
+                  }
+                  linearizationPositions[index].push(nextPos)
+                }
+                // this next line only really makes sense for the ones in
+                // eventToLinearizations, not the ones from eventIllegalLast,
+                // but it's safe to do it for all points, so we don't bother to
+                // distinguish.
+                pos = Math.max(pos, linearizationPositions[index][position])
+              })
+            // ensure that illegal next LPs fit in box too
+            eventIllegalLast[event.gid].forEach(li => {
+              const lin = linearizationPositions[li]
+              const prev = lin[lin.length-1]
+              pos = Math.max(pos, prev + EPSILON)
+            })
+
+            eventIndex++
+          }
+          xPos[ts] = pos
+        }
+
+        // Solved, now draw UI.
+
+        let selected = false
+        let selectedIndex = [-1, -1]
+
+        const height = 2*PADDING + BOX_HEIGHT * nClient + BOX_SPACE * (nClient - 1)
+        const width = 2*PADDING + XOFF + xPos[sortedTimestamps[sortedTimestamps.length-1]]
+        const svg = svgadd(document.getElementById('canvas'), 'svg', {
+          'width': width,
+          'height': height,
+        })
+
+        // draw background, etc.
+        const bg = svgadd(svg, 'g')
+        const bgRect = svgadd(bg, 'rect', {
+          'height': height,
+          'width': width,
+          'x': 0,
+          'y': 0,
+          'class': 'bg',
+        })
+        bgRect.onclick = handleBgClick
+        for (let i = 0; i < nClient; i++) {
+          const text = svgadd(bg, 'text', {
+            'x': XOFF/2,
+            'y': PADDING + BOX_HEIGHT/2 + i * (BOX_HEIGHT + BOX_SPACE),
+            'text-anchor': 'middle',
+          })
+          text.textContent = i
+        }
+        svgadd(bg, 'line', {
+          'x1': PADDING + XOFF,
+          'y1': PADDING,
+          'x2': PADDING + XOFF,
+          'y2': height - PADDING,
+          'class': 'divider'
+        })
+
+        // draw history
+        const historyLayers = []
+        const historyRects = []
+        const targetRects = svgnew('g')
+        data.forEach((partition, partitionIndex) => {
+          const l = svgadd(svg, 'g')
+          historyLayers.push(l)
+          const rects = []
+          partition['History'].forEach((el, elIndex) => {
+            const g = svgadd(l, 'g')
+            const rx = xPos[el['Start']]
+            const width = xPos[el['End']] - rx
+            const x = rx + XOFF + PADDING
+            const y = PADDING + el['ClientId'] * (BOX_HEIGHT + BOX_SPACE)
+            rects.push(svgadd(g, 'rect', {
+              'height': BOX_HEIGHT,
+              'width': width,
+              'x': x,
+              'y': y,
+              'rx': HISTORY_RECT_RADIUS,
+              'ry': HISTORY_RECT_RADIUS,
+              'class': 'history-rect'
+            }))
+            const text = svgadd(g, 'text', {
+              'x': x + width/2,
+              'y': y + BOX_HEIGHT/2,
+              'text-anchor': 'middle',
+              'class': 'history-text',
+            })
+            text.textContent = el['Description']
+            // we don't add mouseTarget to g, but to targetRects, because we
+            // want to layer this on top of everything at the end; otherwise, the
+            // LPs and lines will be over the target, which will create holes
+            // where hover etc. won't work
+            const mouseTarget = svgadd(targetRects, 'rect', {
+              'height': BOX_HEIGHT,
+              'width': width,
+              'x': x,
+              'y': y,
+              'class': 'target-rect',
+              'data-partition': partitionIndex,
+              'data-index': elIndex,
+            })
+            mouseTarget.onmouseover = handleMouseOver
+            mouseTarget.onmousemove = handleMouseMove
+            mouseTarget.onmouseout = handleMouseOut
+            mouseTarget.onclick = handleClick
+          })
+          historyRects.push(rects)
+        })
+
+        // draw partial linearizations
+        const illegalLast = data.map(partition => {
+          return partition['PartialLinearizations'].map(() => new Set())
+        })
+        const largestIllegal = data.map(() => {return {}})
+        const largestIllegalLength = data.map(() => {return {}})
+        const partialLayers = []
+        const errorPoints = []
+        data.forEach((partition, partitionIndex) => {
+          const l = []
+          partialLayers.push(l)
+          partition['PartialLinearizations'].forEach((lin, linIndex) => {
+            const g = svgadd(svg, 'g')
+            l.push(g)
+            let prevX = null
+            let prevY = null
+            let prevEl = null
+            const included = new Set()
+            lin.forEach(id => {
+              const el = partition['History'][id['Index']]
+              const hereX = PADDING + XOFF + xPos[el['Start']]
+              const x = prevX !== null ? Math.max(hereX, prevX + EPSILON) : hereX
+              const y = PADDING + el['ClientId'] * (BOX_HEIGHT + BOX_SPACE) - LINE_BLEED
+              // line from previous
+              if (prevEl !== null) {
+                svgadd(g, 'line', {
+                  'x1': prevX,
+                  'x2': x,
+                  'y1': prevEl['ClientId'] >= el['ClientId'] ? prevY : prevY + BOX_HEIGHT + 2*LINE_BLEED,
+                  'y2': prevEl['ClientId'] <= el['ClientId'] ? y : y + BOX_HEIGHT + 2*LINE_BLEED,
+                  'class': 'linearization linearization-line',
+                })
+              }
+              // current line
+              svgadd(g, 'line', {
+                'x1': x,
+                'x2': x,
+                'y1': y,
+                'y2': y + BOX_HEIGHT + 2*LINE_BLEED,
+                'class': 'linearization linearization-point'
+              })
+              prevX = x
+              prevY = y
+              prevEl = el
+              included.add(id['Index'])
+            })
+            // show possible but illegal next linearizations
+            // a history element is a possible next try
+            // if no other history element must be linearized earlier
+            // i.e. forall others, this.start < other.end
+            let minEnd = Infinity
+            partition['History'].forEach((el, index) => {
+              if (!included.has(index)) {
+                minEnd = Math.min(minEnd, el['End'])
+              }
+            })
+            partition['History'].forEach((el, index) => {
+              if (!included.has(index) && el['Start'] < minEnd) {
+                const hereX = PADDING + XOFF + xPos[el['Start']]
+                const x = prevX !== null ? Math.max(hereX, prevX + EPSILON) : hereX
+                const y = PADDING + el['ClientId'] * (BOX_HEIGHT + BOX_SPACE) - LINE_BLEED
+                // line from previous
+                svgadd(g, 'line', {
+                  'x1': prevX,
+                  'x2': x,
+                  'y1': prevEl['ClientId'] >= el['ClientId'] ? prevY : prevY + BOX_HEIGHT + 2*LINE_BLEED,
+                  'y2': prevEl['ClientId'] <= el['ClientId'] ? y : y + BOX_HEIGHT + 2*LINE_BLEED,
+                  'class': 'linearization-invalid linearization-line',
+                })
+                // current line
+                const point = svgadd(g, 'line', {
+                  'x1': x,
+                  'x2': x,
+                  'y1': y,
+                  'y2': y + BOX_HEIGHT + 2*LINE_BLEED,
+                  'class': 'linearization-invalid linearization-point',
+                })
+                errorPoints.push({
+                  x: x,
+                  partition: partitionIndex,
+                  index: lin[lin.length-1]['Index'], // NOTE not index
+                  element: point
+                })
+                illegalLast[partitionIndex][linIndex].add(index)
+                if (!Object.prototype.hasOwnProperty.call(largestIllegalLength[partitionIndex], index) || largestIllegalLength[partitionIndex][index] < lin.length) {
+                  largestIllegalLength[partitionIndex][index] = lin.length
+                  largestIllegal[partitionIndex][index] = linIndex
+                }
+              }
+            })
+          })
+        })
+        errorPoints.sort((a, b) => a.x - b.x)
+
+        // attach targetRects
+        svgattach(svg, targetRects)
+
+        // tooltip
+        const tooltip = document.getElementById('canvas').appendChild(document.createElement('div'))
+        tooltip.setAttribute('class', 'tooltip')
+
+        function handleMouseOver() {
+          if (!selected) {
+            const partition = parseInt(this.dataset['partition'])
+            const index = parseInt(this.dataset['index'])
+            highlight(partition, index)
+          }
+          tooltip.style.opacity = 1
+        }
+
+        function linearizationIndex(partition, index) {
+          // show this linearization
+          if (Object.prototype.hasOwnProperty.call(data[partition]['Largest'], index)) {
+            return data[partition]['Largest'][index]
+          } else if (Object.prototype.hasOwnProperty.call(largestIllegal[partition], index)) {
+            return largestIllegal[partition][index]
+          }
+          return null
+        }
+
+        function highlight(partition, index) {
+          // hide all but this partition
+          historyLayers.forEach((layer, i) => {
+            if (i === partition) {
+              layer.classList.remove('hidden')
+            } else {
+              layer.classList.add('hidden')
+            }
+          })
+          // hide all but the relevant linearization
+          partialLayers.forEach(layer => {
+            layer.forEach(g => {
+              g.classList.add('hidden')
+            })
+          })
+          // show this linearization
+          const maxIndex = linearizationIndex(partition, index)
+          if (maxIndex !== null) {
+            partialLayers[partition][maxIndex].classList.remove('hidden')
+          }
+          updateJump()
+        }
+
+        let lastTooltip = [null, null, null, null, null]
+        function handleMouseMove() {
+          const partition = parseInt(this.dataset['partition'])
+          const index = parseInt(this.dataset['index'])
+          const [sPartition, sIndex] = selectedIndex
+          const thisTooltip = [partition, index, selected, sPartition, sIndex]
+
+          if (!arrayEq(lastTooltip, thisTooltip)) {
+            let maxIndex
+            if (!selected) {
+              maxIndex = linearizationIndex(partition, index)
+            } else {
+              // if selected, show info relevant to the selected linearization
+              maxIndex = linearizationIndex(sPartition, sIndex)
+            }
+            if (selected && sPartition !== partition) {
+              tooltip.innerHTML = 'Not part of selected partition.'
+            } else if (maxIndex === null) {
+              if (!selected) {
+                tooltip.innerHTML = 'Not part of any partial linearization.'
+              } else {
+                tooltip.innerHTML = 'Selected element is not part of any partial linearization.'
+              }
+            } else {
+              const lin = data[partition]['PartialLinearizations'][maxIndex]
+              let prev = null, curr = null
+              let found = false
+              for (let i = 0; i < lin.length; i++) {
+                prev = curr
+                curr = lin[i]
+                if (curr['Index'] === index) {
+                  found = true
+                  break
+                }
+              }
+              let call = data[partition]['History'][index]['Start']
+              let ret = data[partition]['History'][index]['OriginalEnd']
+              let msg = ''
+              if (found) {
+                // part of linearization
+                if (prev !== null) {
+                  msg = '<strong>Previous state:</strong><br>' + prev['StateDescription'] + '<br><br>'
+                }
+                msg += '<strong>New state:</strong><br>' + curr['StateDescription'] +
+                  '<br><br>Call: ' + call +
+                  '<br><br>Return: ' + ret
+              } else if (illegalLast[partition][maxIndex].has(index)) {
+                // illegal next one
+                msg = '<strong>Previous state:</strong><br>' + lin[lin.length-1]['StateDescription'] +
+                  '<br><br><strong>New state:</strong><br>&langle;invalid op&rangle;' +
+                  '<br><br>Call: ' + call +
+                  '<br><br>Return: ' + ret
+              } else {
+                // not part of this one
+                msg = 'Not part of selected element\'s partial linearization.'
+              }
+              tooltip.innerHTML = msg
+            }
+            lastTooltip = thisTooltip
+          }
+          tooltip.style.left = (event.pageX+20) + 'px'
+          tooltip.style.top = (event.pageY+20) + 'px'
+        }
+
+        function handleMouseOut() {
+          if (!selected) {
+            resetHighlight()
+          }
+          tooltip.style.opacity = 0
+          lastTooltip = [null, null, null, null, null]
+        }
+
+        function resetHighlight() {
+          // show all layers
+          historyLayers.forEach(layer => {
+            layer.classList.remove('hidden')
+          })
+          // show longest linearizations, which are first
+          partialLayers.forEach(layers => {
+            layers.forEach((l, i) => {
+              if (i === 0) {
+                l.classList.remove('hidden')
+              } else {
+                l.classList.add('hidden')
+              }
+            })
+          })
+          updateJump()
+        }
+
+        function updateJump() {
+          const jump = document.getElementById('jump-link')
+          // find first non-hidden point
+          // feels a little hacky, but it works
+          const point = errorPoints.find(pt => !pt.element.parentElement.classList.contains('hidden'))
+          if (point) {
+            jump.classList.remove('inactive')
+            jump.onclick = () => {
+              point.element.scrollIntoView({behavior: 'smooth', inline: 'center', block: 'center'})
+              if (!selected) {
+                select(point.partition, point.index)
+              }
+            }
+          } else {
+            jump.classList.add('inactive')
+          }
+        }
+
+        function handleClick() {
+          const partition = parseInt(this.dataset['partition'])
+          const index = parseInt(this.dataset['index'])
+          if (selected) {
+            const [sPartition, sIndex] = selectedIndex
+            if (partition === sPartition && index === sIndex) {
+              deselect()
+              return
+            } else {
+              historyRects[sPartition][sIndex].classList.remove('selected')
+            }
+          }
+          select(partition, index)
+        }
+
+        function handleBgClick() {
+          deselect()
+        }
+
+        function select(partition, index) {
+          selected = true
+          selectedIndex = [partition, index]
+          highlight(partition, index)
+          historyRects[partition][index].classList.add('selected')
+        }
+
+        function deselect() {
+          if (!selected) {
+            return
+          }
+          selected = false
+          resetHighlight()
+          const [partition, index] = selectedIndex
+          historyRects[partition][index].classList.remove('selected')
+        }
+
+        handleMouseOut() // initialize, same as mouse out
+      }
+
+      const data = %s
+
+      render(data)
+    </script>
+  </body>
+</html>
+`
--- a/src/raft/config.go
+++ b/src/raft/config.go
@ -0,0 +1,591 @@
+package raft
+
+//
+// support for Raft tester.
+//
+// we will use the original config.go to test your code for grading.
+// so, while you can modify this code to help you debug, please
+// test with the original before submitting.
+//
+
+import "6.824/labgob"
+import "6.824/labrpc"
+import "bytes"
+import "log"
+import "sync"
+import "testing"
+import "runtime"
+import "math/rand"
+import crand "crypto/rand"
+import "math/big"
+import "encoding/base64"
+import "time"
+import "fmt"
+
+func randstring(n int) string {
+	b := make([]byte, 2*n)
+	crand.Read(b)
+	s := base64.URLEncoding.EncodeToString(b)
+	return s[0:n]
+}
+
+func makeSeed() int64 {
+	max := big.NewInt(int64(1) << 62)
+	bigx, _ := crand.Int(crand.Reader, max)
+	x := bigx.Int64()
+	return x
+}
+
+type config struct {
+	mu        sync.Mutex
+	t         *testing.T
+	net       *labrpc.Network
+	n         int
+	rafts     []*Raft
+	applyErr  []string // from apply channel readers
+	connected []bool   // whether each server is on the net
+	saved     []*Persister
+	endnames  [][]string            // the port file names each sends to
+	logs      []map[int]interface{} // copy of each server's committed entries
+	start     time.Time             // time at which make_config() was called
+	// begin()/end() statistics
+	t0        time.Time // time at which test_test.go called cfg.begin()
+	rpcs0     int       // rpcTotal() at start of test
+	cmds0     int       // number of agreements
+	bytes0    int64
+	maxIndex  int
+	maxIndex0 int
+}
+
+var ncpu_once sync.Once
+
+func make_config(t *testing.T, n int, unreliable bool, snapshot bool) *config {
+	ncpu_once.Do(func() {
+		if runtime.NumCPU() < 2 {
+			fmt.Printf("warning: only one CPU, which may conceal locking bugs\n")
+		}
+		rand.Seed(makeSeed())
+	})
+	runtime.GOMAXPROCS(4)
+	cfg := &config{}
+	cfg.t = t
+	cfg.net = labrpc.MakeNetwork()
+	cfg.n = n
+	cfg.applyErr = make([]string, cfg.n)
+	cfg.rafts = make([]*Raft, cfg.n)
+	cfg.connected = make([]bool, cfg.n)
+	cfg.saved = make([]*Persister, cfg.n)
+	cfg.endnames = make([][]string, cfg.n)
+	cfg.logs = make([]map[int]interface{}, cfg.n)
+	cfg.start = time.Now()
+
+	cfg.setunreliable(unreliable)
+
+	cfg.net.LongDelays(true)
+
+	applier := cfg.applier
+	if snapshot {
+		applier = cfg.applierSnap
+	}
+	// create a full set of Rafts.
+	for i := 0; i < cfg.n; i++ {
+		cfg.logs[i] = map[int]interface{}{}
+		cfg.start1(i, applier)
+	}
+
+	// connect everyone
+	for i := 0; i < cfg.n; i++ {
+		cfg.connect(i)
+	}
+
+	return cfg
+}
+
+// shut down a Raft server but save its persistent state.
+func (cfg *config) crash1(i int) {
+	cfg.disconnect(i)
+	cfg.net.DeleteServer(i) // disable client connections to the server.
+
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+
+	// a fresh persister, in case old instance
+	// continues to update the Persister.
+	// but copy old persister's content so that we always
+	// pass Make() the last persisted state.
+	if cfg.saved[i] != nil {
+		cfg.saved[i] = cfg.saved[i].Copy()
+	}
+
+	rf := cfg.rafts[i]
+	if rf != nil {
+		cfg.mu.Unlock()
+		rf.Kill()
+		cfg.mu.Lock()
+		cfg.rafts[i] = nil
+	}
+
+	if cfg.saved[i] != nil {
+		raftlog := cfg.saved[i].ReadRaftState()
+		snapshot := cfg.saved[i].ReadSnapshot()
+		cfg.saved[i] = &Persister{}
+		cfg.saved[i].SaveStateAndSnapshot(raftlog, snapshot)
+	}
+}
+
+func (cfg *config) checkLogs(i int, m ApplyMsg) (string, bool) {
+	err_msg := ""
+	v := m.Command
+	for j := 0; j < len(cfg.logs); j++ {
+		if old, oldok := cfg.logs[j][m.CommandIndex]; oldok && old != v {
+			log.Printf("%v: log %v; server %v\n", i, cfg.logs[i], cfg.logs[j])
+			// some server has already committed a different value for this entry!
+			err_msg = fmt.Sprintf("commit index=%v server=%v %v != server=%v %v",
+				m.CommandIndex, i, m.Command, j, old)
+		}
+	}
+	_, prevok := cfg.logs[i][m.CommandIndex-1]
+	cfg.logs[i][m.CommandIndex] = v
+	if m.CommandIndex > cfg.maxIndex {
+		cfg.maxIndex = m.CommandIndex
+	}
+	return err_msg, prevok
+}
+
+// applier reads message from apply ch and checks that they match the log
+// contents
+func (cfg *config) applier(i int, applyCh chan ApplyMsg) {
+	for m := range applyCh {
+		if m.CommandValid == false {
+			// ignore other types of ApplyMsg
+		} else {
+			cfg.mu.Lock()
+			err_msg, prevok := cfg.checkLogs(i, m)
+			cfg.mu.Unlock()
+			if m.CommandIndex > 1 && prevok == false {
+				err_msg = fmt.Sprintf("server %v apply out of order %v", i, m.CommandIndex)
+			}
+			if err_msg != "" {
+				log.Fatalf("apply error: %v\n", err_msg)
+				cfg.applyErr[i] = err_msg
+				// keep reading after error so that Raft doesn't block
+				// holding locks...
+			}
+		}
+	}
+}
+
+const SnapShotInterval = 10
+
+// periodically snapshot raft state
+func (cfg *config) applierSnap(i int, applyCh chan ApplyMsg) {
+	lastApplied := 0
+	for m := range applyCh {
+		if m.SnapshotValid {
+			//DPrintf("Installsnapshot %v %v\n", m.SnapshotIndex, lastApplied)
+			cfg.mu.Lock()
+			if cfg.rafts[i].CondInstallSnapshot(m.SnapshotTerm,
+				m.SnapshotIndex, m.Snapshot) {
+				cfg.logs[i] = make(map[int]interface{})
+				r := bytes.NewBuffer(m.Snapshot)
+				d := labgob.NewDecoder(r)
+				var v int
+				if d.Decode(&v) != nil {
+					log.Fatalf("decode error\n")
+				}
+				cfg.logs[i][m.SnapshotIndex] = v
+				lastApplied = m.SnapshotIndex
+			}
+			cfg.mu.Unlock()
+		} else if m.CommandValid && m.CommandIndex > lastApplied {
+			//DPrintf("apply %v lastApplied %v\n", m.CommandIndex, lastApplied)
+			cfg.mu.Lock()
+			err_msg, prevok := cfg.checkLogs(i, m)
+			cfg.mu.Unlock()
+			if m.CommandIndex > 1 && prevok == false {
+				err_msg = fmt.Sprintf("server %v apply out of order %v", i, m.CommandIndex)
+			}
+			if err_msg != "" {
+				log.Fatalf("apply error: %v\n", err_msg)
+				cfg.applyErr[i] = err_msg
+				// keep reading after error so that Raft doesn't block
+				// holding locks...
+			}
+			lastApplied = m.CommandIndex
+			if (m.CommandIndex+1)%SnapShotInterval == 0 {
+				w := new(bytes.Buffer)
+				e := labgob.NewEncoder(w)
+				v := m.Command
+				e.Encode(v)
+				cfg.rafts[i].Snapshot(m.CommandIndex, w.Bytes())
+			}
+		} else {
+			// Ignore other types of ApplyMsg or old
+			// commands. Old command may never happen,
+			// depending on the Raft implementation, but
+			// just in case.
+			// DPrintf("Ignore: Index %v lastApplied %v\n", m.CommandIndex, lastApplied)
+
+		}
+	}
+}
+
+//
+// start or re-start a Raft.
+// if one already exists, "kill" it first.
+// allocate new outgoing port file names, and a new
+// state persister, to isolate previous instance of
+// this server. since we cannot really kill it.
+//
+func (cfg *config) start1(i int, applier func(int, chan ApplyMsg)) {
+	cfg.crash1(i)
+
+	// a fresh set of outgoing ClientEnd names.
+	// so that old crashed instance's ClientEnds can't send.
+	cfg.endnames[i] = make([]string, cfg.n)
+	for j := 0; j < cfg.n; j++ {
+		cfg.endnames[i][j] = randstring(20)
+	}
+
+	// a fresh set of ClientEnds.
+	ends := make([]*labrpc.ClientEnd, cfg.n)
+	for j := 0; j < cfg.n; j++ {
+		ends[j] = cfg.net.MakeEnd(cfg.endnames[i][j])
+		cfg.net.Connect(cfg.endnames[i][j], j)
+	}
+
+	cfg.mu.Lock()
+
+	// a fresh persister, so old instance doesn't overwrite
+	// new instance's persisted state.
+	// but copy old persister's content so that we always
+	// pass Make() the last persisted state.
+	if cfg.saved[i] != nil {
+		cfg.saved[i] = cfg.saved[i].Copy()
+	} else {
+		cfg.saved[i] = MakePersister()
+	}
+
+	cfg.mu.Unlock()
+
+	applyCh := make(chan ApplyMsg)
+
+	rf := Make(ends, i, cfg.saved[i], applyCh)
+
+	cfg.mu.Lock()
+	cfg.rafts[i] = rf
+	cfg.mu.Unlock()
+
+	go applier(i, applyCh)
+
+	svc := labrpc.MakeService(rf)
+	srv := labrpc.MakeServer()
+	srv.AddService(svc)
+	cfg.net.AddServer(i, srv)
+}
+
+func (cfg *config) checkTimeout() {
+	// enforce a two minute real-time limit on each test
+	if !cfg.t.Failed() && time.Since(cfg.start) > 120*time.Second {
+		cfg.t.Fatal("test took longer than 120 seconds")
+	}
+}
+
+func (cfg *config) cleanup() {
+	for i := 0; i < len(cfg.rafts); i++ {
+		if cfg.rafts[i] != nil {
+			cfg.rafts[i].Kill()
+		}
+	}
+	cfg.net.Cleanup()
+	cfg.checkTimeout()
+}
+
+// attach server i to the net.
+func (cfg *config) connect(i int) {
+	// fmt.Printf("connect(%d)\n", i)
+
+	cfg.connected[i] = true
+
+	// outgoing ClientEnds
+	for j := 0; j < cfg.n; j++ {
+		if cfg.connected[j] {
+			endname := cfg.endnames[i][j]
+			cfg.net.Enable(endname, true)
+		}
+	}
+
+	// incoming ClientEnds
+	for j := 0; j < cfg.n; j++ {
+		if cfg.connected[j] {
+			endname := cfg.endnames[j][i]
+			cfg.net.Enable(endname, true)
+		}
+	}
+}
+
+// detach server i from the net.
+func (cfg *config) disconnect(i int) {
+	// fmt.Printf("disconnect(%d)\n", i)
+
+	cfg.connected[i] = false
+
+	// outgoing ClientEnds
+	for j := 0; j < cfg.n; j++ {
+		if cfg.endnames[i] != nil {
+			endname := cfg.endnames[i][j]
+			cfg.net.Enable(endname, false)
+		}
+	}
+
+	// incoming ClientEnds
+	for j := 0; j < cfg.n; j++ {
+		if cfg.endnames[j] != nil {
+			endname := cfg.endnames[j][i]
+			cfg.net.Enable(endname, false)
+		}
+	}
+}
+
+func (cfg *config) rpcCount(server int) int {
+	return cfg.net.GetCount(server)
+}
+
+func (cfg *config) rpcTotal() int {
+	return cfg.net.GetTotalCount()
+}
+
+func (cfg *config) setunreliable(unrel bool) {
+	cfg.net.Reliable(!unrel)
+}
+
+func (cfg *config) bytesTotal() int64 {
+	return cfg.net.GetTotalBytes()
+}
+
+func (cfg *config) setlongreordering(longrel bool) {
+	cfg.net.LongReordering(longrel)
+}
+
+// check that there's exactly one leader.
+// try a few times in case re-elections are needed.
+func (cfg *config) checkOneLeader() int {
+	for iters := 0; iters < 10; iters++ {
+		ms := 450 + (rand.Int63() % 100)
+		time.Sleep(time.Duration(ms) * time.Millisecond)
+
+		leaders := make(map[int][]int)
+		for i := 0; i < cfg.n; i++ {
+			if cfg.connected[i] {
+				if term, leader := cfg.rafts[i].GetState(); leader {
+					leaders[term] = append(leaders[term], i)
+				}
+			}
+		}
+
+		lastTermWithLeader := -1
+		for term, leaders := range leaders {
+			if len(leaders) > 1 {
+				cfg.t.Fatalf("term %d has %d (>1) leaders", term, len(leaders))
+			}
+			if term > lastTermWithLeader {
+				lastTermWithLeader = term
+			}
+		}
+
+		if len(leaders) != 0 {
+			return leaders[lastTermWithLeader][0]
+		}
+	}
+	cfg.t.Fatalf("expected one leader, got none")
+	return -1
+}
+
+// check that everyone agrees on the term.
+func (cfg *config) checkTerms() int {
+	term := -1
+	for i := 0; i < cfg.n; i++ {
+		if cfg.connected[i] {
+			xterm, _ := cfg.rafts[i].GetState()
+			if term == -1 {
+				term = xterm
+			} else if term != xterm {
+				cfg.t.Fatalf("servers disagree on term")
+			}
+		}
+	}
+	return term
+}
+
+// check that there's no leader
+func (cfg *config) checkNoLeader() {
+	for i := 0; i < cfg.n; i++ {
+		if cfg.connected[i] {
+			_, is_leader := cfg.rafts[i].GetState()
+			if is_leader {
+				cfg.t.Fatalf("expected no leader, but %v claims to be leader", i)
+			}
+		}
+	}
+}
+
+// how many servers think a log entry is committed?
+func (cfg *config) nCommitted(index int) (int, interface{}) {
+	count := 0
+	var cmd interface{} = nil
+	for i := 0; i < len(cfg.rafts); i++ {
+		if cfg.applyErr[i] != "" {
+			cfg.t.Fatal(cfg.applyErr[i])
+		}
+
+		cfg.mu.Lock()
+		cmd1, ok := cfg.logs[i][index]
+		cfg.mu.Unlock()
+
+		if ok {
+			if count > 0 && cmd != cmd1 {
+				cfg.t.Fatalf("committed values do not match: index %v, %v, %v\n",
+					index, cmd, cmd1)
+			}
+			count += 1
+			cmd = cmd1
+		}
+	}
+	return count, cmd
+}
+
+// wait for at least n servers to commit.
+// but don't wait forever.
+func (cfg *config) wait(index int, n int, startTerm int) interface{} {
+	to := 10 * time.Millisecond
+	for iters := 0; iters < 30; iters++ {
+		nd, _ := cfg.nCommitted(index)
+		if nd >= n {
+			break
+		}
+		time.Sleep(to)
+		if to < time.Second {
+			to *= 2
+		}
+		if startTerm > -1 {
+			for _, r := range cfg.rafts {
+				if t, _ := r.GetState(); t > startTerm {
+					// someone has moved on
+					// can no longer guarantee that we'll "win"
+					return -1
+				}
+			}
+		}
+	}
+	nd, cmd := cfg.nCommitted(index)
+	if nd < n {
+		cfg.t.Fatalf("only %d decided for index %d; wanted %d\n",
+			nd, index, n)
+	}
+	return cmd
+}
+
+// do a complete agreement.
+// it might choose the wrong leader initially,
+// and have to re-submit after giving up.
+// entirely gives up after about 10 seconds.
+// indirectly checks that the servers agree on the
+// same value, since nCommitted() checks this,
+// as do the threads that read from applyCh.
+// returns index.
+// if retry==true, may submit the command multiple
+// times, in case a leader fails just after Start().
+// if retry==false, calls Start() only once, in order
+// to simplify the early Lab 2B tests.
+func (cfg *config) one(cmd interface{}, expectedServers int, retry bool) int {
+	t0 := time.Now()
+	starts := 0
+	for time.Since(t0).Seconds() < 10 {
+		// try all the servers, maybe one is the leader.
+		index := -1
+		for si := 0; si < cfg.n; si++ {
+			starts = (starts + 1) % cfg.n
+			var rf *Raft
+			cfg.mu.Lock()
+			if cfg.connected[starts] {
+				rf = cfg.rafts[starts]
+			}
+			cfg.mu.Unlock()
+			if rf != nil {
+				index1, _, ok := rf.Start(cmd)
+				if ok {
+					index = index1
+					break
+				}
+			}
+		}
+
+		if index != -1 {
+			// somebody claimed to be the leader and to have
+			// submitted our command; wait a while for agreement.
+			t1 := time.Now()
+			for time.Since(t1).Seconds() < 2 {
+				nd, cmd1 := cfg.nCommitted(index)
+				if nd > 0 && nd >= expectedServers {
+					// committed
+					if cmd1 == cmd {
+						// and it was the command we submitted.
+						return index
+					}
+				}
+				time.Sleep(20 * time.Millisecond)
+			}
+			if retry == false {
+				cfg.t.Fatalf("one(%v) failed to reach agreement", cmd)
+			}
+		} else {
+			time.Sleep(50 * time.Millisecond)
+		}
+	}
+	cfg.t.Fatalf("one(%v) failed to reach agreement", cmd)
+	return -1
+}
+
+// start a Test.
+// print the Test message.
+// e.g. cfg.begin("Test (2B): RPC counts aren't too high")
+func (cfg *config) begin(description string) {
+	fmt.Printf("%s ...\n", description)
+	cfg.t0 = time.Now()
+	cfg.rpcs0 = cfg.rpcTotal()
+	cfg.bytes0 = cfg.bytesTotal()
+	cfg.cmds0 = 0
+	cfg.maxIndex0 = cfg.maxIndex
+}
+
+// end a Test -- the fact that we got here means there
+// was no failure.
+// print the Passed message,
+// and some performance numbers.
+func (cfg *config) end() {
+	cfg.checkTimeout()
+	if cfg.t.Failed() == false {
+		cfg.mu.Lock()
+		t := time.Since(cfg.t0).Seconds()       // real time
+		npeers := cfg.n                         // number of Raft peers
+		nrpc := cfg.rpcTotal() - cfg.rpcs0      // number of RPC sends
+		nbytes := cfg.bytesTotal() - cfg.bytes0 // number of bytes
+		ncmds := cfg.maxIndex - cfg.maxIndex0   // number of Raft agreements reported
+		cfg.mu.Unlock()
+
+		fmt.Printf("  ... Passed --")
+		fmt.Printf("  %4.1f  %d %4d %7d %4d\n", t, npeers, nrpc, nbytes, ncmds)
+	}
+}
+
+// Maximum log size across all servers
+func (cfg *config) LogSize() int {
+	logsize := 0
+	for i := 0; i < cfg.n; i++ {
+		n := cfg.saved[i].RaftStateSize()
+		if n > logsize {
+			logsize = n
+		}
+	}
+	return logsize
+}
--- a/src/raft/persister.go
+++ b/src/raft/persister.go
@ -0,0 +1,76 @@
+package raft
+
+//
+// support for Raft and kvraft to save persistent
+// Raft state (log &c) and k/v server snapshots.
+//
+// we will use the original persister.go to test your code for grading.
+// so, while you can modify this code to help you debug, please
+// test with the original before submitting.
+//
+
+import "sync"
+
+type Persister struct {
+	mu        sync.Mutex
+	raftstate []byte
+	snapshot  []byte
+}
+
+func MakePersister() *Persister {
+	return &Persister{}
+}
+
+func clone(orig []byte) []byte {
+	x := make([]byte, len(orig))
+	copy(x, orig)
+	return x
+}
+
+func (ps *Persister) Copy() *Persister {
+	ps.mu.Lock()
+	defer ps.mu.Unlock()
+	np := MakePersister()
+	np.raftstate = ps.raftstate
+	np.snapshot = ps.snapshot
+	return np
+}
+
+func (ps *Persister) SaveRaftState(state []byte) {
+	ps.mu.Lock()
+	defer ps.mu.Unlock()
+	ps.raftstate = clone(state)
+}
+
+func (ps *Persister) ReadRaftState() []byte {
+	ps.mu.Lock()
+	defer ps.mu.Unlock()
+	return clone(ps.raftstate)
+}
+
+func (ps *Persister) RaftStateSize() int {
+	ps.mu.Lock()
+	defer ps.mu.Unlock()
+	return len(ps.raftstate)
+}
+
+// Save both Raft state and K/V snapshot as a single atomic action,
+// to help avoid them getting out of sync.
+func (ps *Persister) SaveStateAndSnapshot(state []byte, snapshot []byte) {
+	ps.mu.Lock()
+	defer ps.mu.Unlock()
+	ps.raftstate = clone(state)
+	ps.snapshot = clone(snapshot)
+}
+
+func (ps *Persister) ReadSnapshot() []byte {
+	ps.mu.Lock()
+	defer ps.mu.Unlock()
+	return clone(ps.snapshot)
+}
+
+func (ps *Persister) SnapshotSize() int {
+	ps.mu.Lock()
+	defer ps.mu.Unlock()
+	return len(ps.snapshot)
+}
--- a/src/raft/raft.go
+++ b/src/raft/raft.go
@ -0,0 +1,284 @@
+package raft
+
+//
+// this is an outline of the API that raft must expose to
+// the service (or tester). see comments below for
+// each of these functions for more details.
+//
+// rf = Make(...)
+//   create a new Raft server.
+// rf.Start(command interface{}) (index, term, isleader)
+//   start agreement on a new log entry
+// rf.GetState() (term, isLeader)
+//   ask a Raft for its current term, and whether it thinks it is leader
+// ApplyMsg
+//   each time a new entry is committed to the log, each Raft peer
+//   should send an ApplyMsg to the service (or tester)
+//   in the same server.
+//
+
+import (
+//	"bytes"
+	"sync"
+	"sync/atomic"
+
+//	"6.824/labgob"
+	"6.824/labrpc"
+)
+
+
+//
+// as each Raft peer becomes aware that successive log entries are
+// committed, the peer should send an ApplyMsg to the service (or
+// tester) on the same server, via the applyCh passed to Make(). set
+// CommandValid to true to indicate that the ApplyMsg contains a newly
+// committed log entry.
+//
+// in part 2D you'll want to send other kinds of messages (e.g.,
+// snapshots) on the applyCh, but set CommandValid to false for these
+// other uses.
+//
+type ApplyMsg struct {
+	CommandValid bool
+	Command      interface{}
+	CommandIndex int
+
+	// For 2D:
+	SnapshotValid bool
+	Snapshot      []byte
+	SnapshotTerm  int
+	SnapshotIndex int
+}
+
+//
+// A Go object implementing a single Raft peer.
+//
+type Raft struct {
+	mu        sync.Mutex          // Lock to protect shared access to this peer's state
+	peers     []*labrpc.ClientEnd // RPC end points of all peers
+	persister *Persister          // Object to hold this peer's persisted state
+	me        int                 // this peer's index into peers[]
+	dead      int32               // set by Kill()
+
+	// Your data here (2A, 2B, 2C).
+	// Look at the paper's Figure 2 for a description of what
+	// state a Raft server must maintain.
+
+}
+
+// return currentTerm and whether this server
+// believes it is the leader.
+func (rf *Raft) GetState() (int, bool) {
+
+	var term int
+	var isleader bool
+	// Your code here (2A).
+	return term, isleader
+}
+
+//
+// save Raft's persistent state to stable storage,
+// where it can later be retrieved after a crash and restart.
+// see paper's Figure 2 for a description of what should be persistent.
+//
+func (rf *Raft) persist() {
+	// Your code here (2C).
+	// Example:
+	// w := new(bytes.Buffer)
+	// e := labgob.NewEncoder(w)
+	// e.Encode(rf.xxx)
+	// e.Encode(rf.yyy)
+	// data := w.Bytes()
+	// rf.persister.SaveRaftState(data)
+}
+
+
+//
+// restore previously persisted state.
+//
+func (rf *Raft) readPersist(data []byte) {
+	if data == nil || len(data) < 1 { // bootstrap without any state?
+		return
+	}
+	// Your code here (2C).
+	// Example:
+	// r := bytes.NewBuffer(data)
+	// d := labgob.NewDecoder(r)
+	// var xxx
+	// var yyy
+	// if d.Decode(&xxx) != nil ||
+	//    d.Decode(&yyy) != nil {
+	//   error...
+	// } else {
+	//   rf.xxx = xxx
+	//   rf.yyy = yyy
+	// }
+}
+
+
+//
+// A service wants to switch to snapshot.  Only do so if Raft hasn't
+// have more recent info since it communicate the snapshot on applyCh.
+//
+func (rf *Raft) CondInstallSnapshot(lastIncludedTerm int, lastIncludedIndex int, snapshot []byte) bool {
+
+	// Your code here (2D).
+
+	return true
+}
+
+// the service says it has created a snapshot that has
+// all info up to and including index. this means the
+// service no longer needs the log through (and including)
+// that index. Raft should now trim its log as much as possible.
+func (rf *Raft) Snapshot(index int, snapshot []byte) {
+	// Your code here (2D).
+
+}
+
+
+//
+// example RequestVote RPC arguments structure.
+// field names must start with capital letters!
+//
+type RequestVoteArgs struct {
+	// Your data here (2A, 2B).
+}
+
+//
+// example RequestVote RPC reply structure.
+// field names must start with capital letters!
+//
+type RequestVoteReply struct {
+	// Your data here (2A).
+}
+
+//
+// example RequestVote RPC handler.
+//
+func (rf *Raft) RequestVote(args *RequestVoteArgs, reply *RequestVoteReply) {
+	// Your code here (2A, 2B).
+}
+
+//
+// example code to send a RequestVote RPC to a server.
+// server is the index of the target server in rf.peers[].
+// expects RPC arguments in args.
+// fills in *reply with RPC reply, so caller should
+// pass &reply.
+// the types of the args and reply passed to Call() must be
+// the same as the types of the arguments declared in the
+// handler function (including whether they are pointers).
+//
+// The labrpc package simulates a lossy network, in which servers
+// may be unreachable, and in which requests and replies may be lost.
+// Call() sends a request and waits for a reply. If a reply arrives
+// within a timeout interval, Call() returns true; otherwise
+// Call() returns false. Thus Call() may not return for a while.
+// A false return can be caused by a dead server, a live server that
+// can't be reached, a lost request, or a lost reply.
+//
+// Call() is guaranteed to return (perhaps after a delay) *except* if the
+// handler function on the server side does not return.  Thus there
+// is no need to implement your own timeouts around Call().
+//
+// look at the comments in ../labrpc/labrpc.go for more details.
+//
+// if you're having trouble getting RPC to work, check that you've
+// capitalized all field names in structs passed over RPC, and
+// that the caller passes the address of the reply struct with &, not
+// the struct itself.
+//
+func (rf *Raft) sendRequestVote(server int, args *RequestVoteArgs, reply *RequestVoteReply) bool {
+	ok := rf.peers[server].Call("Raft.RequestVote", args, reply)
+	return ok
+}
+
+
+//
+// the service using Raft (e.g. a k/v server) wants to start
+// agreement on the next command to be appended to Raft's log. if this
+// server isn't the leader, returns false. otherwise start the
+// agreement and return immediately. there is no guarantee that this
+// command will ever be committed to the Raft log, since the leader
+// may fail or lose an election. even if the Raft instance has been killed,
+// this function should return gracefully.
+//
+// the first return value is the index that the command will appear at
+// if it's ever committed. the second return value is the current
+// term. the third return value is true if this server believes it is
+// the leader.
+//
+func (rf *Raft) Start(command interface{}) (int, int, bool) {
+	index := -1
+	term := -1
+	isLeader := true
+
+	// Your code here (2B).
+
+
+	return index, term, isLeader
+}
+
+//
+// the tester doesn't halt goroutines created by Raft after each test,
+// but it does call the Kill() method. your code can use killed() to
+// check whether Kill() has been called. the use of atomic avoids the
+// need for a lock.
+//
+// the issue is that long-running goroutines use memory and may chew
+// up CPU time, perhaps causing later tests to fail and generating
+// confusing debug output. any goroutine with a long-running loop
+// should call killed() to check whether it should stop.
+//
+func (rf *Raft) Kill() {
+	atomic.StoreInt32(&rf.dead, 1)
+	// Your code here, if desired.
+}
+
+func (rf *Raft) killed() bool {
+	z := atomic.LoadInt32(&rf.dead)
+	return z == 1
+}
+
+// The ticker go routine starts a new election if this peer hasn't received
+// heartsbeats recently.
+func (rf *Raft) ticker() {
+	for rf.killed() == false {
+
+		// Your code here to check if a leader election should
+		// be started and to randomize sleeping time using
+		// time.Sleep().
+
+	}
+}
+
+//
+// the service or tester wants to create a Raft server. the ports
+// of all the Raft servers (including this one) are in peers[]. this
+// server's port is peers[me]. all the servers' peers[] arrays
+// have the same order. persister is a place for this server to
+// save its persistent state, and also initially holds the most
+// recent saved state, if any. applyCh is a channel on which the
+// tester or service expects Raft to send ApplyMsg messages.
+// Make() must return quickly, so it should start goroutines
+// for any long-running work.
+//
+func Make(peers []*labrpc.ClientEnd, me int,
+	persister *Persister, applyCh chan ApplyMsg) *Raft {
+	rf := &Raft{}
+	rf.peers = peers
+	rf.persister = persister
+	rf.me = me
+
+	// Your initialization code here (2A, 2B, 2C).
+
+	// initialize from state persisted before a crash
+	rf.readPersist(persister.ReadRaftState())
+
+	// start ticker goroutine to start elections
+	go rf.ticker()
+
+
+	return rf
+}
--- a/src/raft/test_test.go
+++ b/src/raft/test_test.go
--- a/src/raft/util.go
+++ b/src/raft/util.go
@ -0,0 +1,13 @@
+package raft
+
+import "log"
+
+// Debugging
+const Debug = false
+
+func DPrintf(format string, a ...interface{}) (n int, err error) {
+	if Debug {
+		log.Printf(format, a...)
+	}
+	return
+}
--- a/src/shardctrler/client.go
+++ b/src/shardctrler/client.go
@ -0,0 +1,101 @@
+package shardctrler
+
+//
+// Shardctrler clerk.
+//
+
+import "6.824/labrpc"
+import "time"
+import "crypto/rand"
+import "math/big"
+
+type Clerk struct {
+	servers []*labrpc.ClientEnd
+	// Your data here.
+}
+
+func nrand() int64 {
+	max := big.NewInt(int64(1) << 62)
+	bigx, _ := rand.Int(rand.Reader, max)
+	x := bigx.Int64()
+	return x
+}
+
+func MakeClerk(servers []*labrpc.ClientEnd) *Clerk {
+	ck := new(Clerk)
+	ck.servers = servers
+	// Your code here.
+	return ck
+}
+
+func (ck *Clerk) Query(num int) Config {
+	args := &QueryArgs{}
+	// Your code here.
+	args.Num = num
+	for {
+		// try each known server.
+		for _, srv := range ck.servers {
+			var reply QueryReply
+			ok := srv.Call("ShardCtrler.Query", args, &reply)
+			if ok && reply.WrongLeader == false {
+				return reply.Config
+			}
+		}
+		time.Sleep(100 * time.Millisecond)
+	}
+}
+
+func (ck *Clerk) Join(servers map[int][]string) {
+	args := &JoinArgs{}
+	// Your code here.
+	args.Servers = servers
+
+	for {
+		// try each known server.
+		for _, srv := range ck.servers {
+			var reply JoinReply
+			ok := srv.Call("ShardCtrler.Join", args, &reply)
+			if ok && reply.WrongLeader == false {
+				return
+			}
+		}
+		time.Sleep(100 * time.Millisecond)
+	}
+}
+
+func (ck *Clerk) Leave(gids []int) {
+	args := &LeaveArgs{}
+	// Your code here.
+	args.GIDs = gids
+
+	for {
+		// try each known server.
+		for _, srv := range ck.servers {
+			var reply LeaveReply
+			ok := srv.Call("ShardCtrler.Leave", args, &reply)
+			if ok && reply.WrongLeader == false {
+				return
+			}
+		}
+		time.Sleep(100 * time.Millisecond)
+	}
+}
+
+func (ck *Clerk) Move(shard int, gid int) {
+	args := &MoveArgs{}
+	// Your code here.
+	args.Shard = shard
+	args.GID = gid
+
+	for {
+		// try each known server.
+		for _, srv := range ck.servers {
+			var reply MoveReply
+			ok := srv.Call("ShardCtrler.Move", args, &reply)
+			if ok && reply.WrongLeader == false {
+				return
+			}
+		}
+		time.Sleep(100 * time.Millisecond)
+	}
+}
--- a/src/shardctrler/common.go
+++ b/src/shardctrler/common.go
@ -0,0 +1,73 @@
+package shardctrler
+
+//
+// Shard controler: assigns shards to replication groups.
+//
+// RPC interface:
+// Join(servers) -- add a set of groups (gid -> server-list mapping).
+// Leave(gids) -- delete a set of groups.
+// Move(shard, gid) -- hand off one shard from current owner to gid.
+// Query(num) -> fetch Config # num, or latest config if num==-1.
+//
+// A Config (configuration) describes a set of replica groups, and the
+// replica group responsible for each shard. Configs are numbered. Config
+// #0 is the initial configuration, with no groups and all shards
+// assigned to group 0 (the invalid group).
+//
+// You will need to add fields to the RPC argument structs.
+//
+
+// The number of shards.
+const NShards = 10
+
+// A configuration -- an assignment of shards to groups.
+// Please don't change this.
+type Config struct {
+	Num    int              // config number
+	Shards [NShards]int     // shard -> gid
+	Groups map[int][]string // gid -> servers[]
+}
+
+const (
+	OK = "OK"
+)
+
+type Err string
+
+type JoinArgs struct {
+	Servers map[int][]string // new GID -> servers mappings
+}
+
+type JoinReply struct {
+	WrongLeader bool
+	Err         Err
+}
+
+type LeaveArgs struct {
+	GIDs []int
+}
+
+type LeaveReply struct {
+	WrongLeader bool
+	Err         Err
+}
+
+type MoveArgs struct {
+	Shard int
+	GID   int
+}
+
+type MoveReply struct {
+	WrongLeader bool
+	Err         Err
+}
+
+type QueryArgs struct {
+	Num int // desired config number
+}
+
+type QueryReply struct {
+	WrongLeader bool
+	Err         Err
+	Config      Config
+}
--- a/src/shardctrler/config.go
+++ b/src/shardctrler/config.go
@ -0,0 +1,357 @@
+package shardctrler
+
+import "6.824/labrpc"
+import "6.824/raft"
+import "testing"
+import "os"
+
+// import "log"
+import crand "crypto/rand"
+import "math/rand"
+import "encoding/base64"
+import "sync"
+import "runtime"
+import "time"
+
+func randstring(n int) string {
+	b := make([]byte, 2*n)
+	crand.Read(b)
+	s := base64.URLEncoding.EncodeToString(b)
+	return s[0:n]
+}
+
+// Randomize server handles
+func random_handles(kvh []*labrpc.ClientEnd) []*labrpc.ClientEnd {
+	sa := make([]*labrpc.ClientEnd, len(kvh))
+	copy(sa, kvh)
+	for i := range sa {
+		j := rand.Intn(i + 1)
+		sa[i], sa[j] = sa[j], sa[i]
+	}
+	return sa
+}
+
+type config struct {
+	mu           sync.Mutex
+	t            *testing.T
+	net          *labrpc.Network
+	n            int
+	servers      []*ShardCtrler
+	saved        []*raft.Persister
+	endnames     [][]string // names of each server's sending ClientEnds
+	clerks       map[*Clerk][]string
+	nextClientId int
+	start        time.Time // time at which make_config() was called
+}
+
+func (cfg *config) checkTimeout() {
+	// enforce a two minute real-time limit on each test
+	if !cfg.t.Failed() && time.Since(cfg.start) > 120*time.Second {
+		cfg.t.Fatal("test took longer than 120 seconds")
+	}
+}
+
+func (cfg *config) cleanup() {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+	for i := 0; i < len(cfg.servers); i++ {
+		if cfg.servers[i] != nil {
+			cfg.servers[i].Kill()
+		}
+	}
+	cfg.net.Cleanup()
+	cfg.checkTimeout()
+}
+
+// Maximum log size across all servers
+func (cfg *config) LogSize() int {
+	logsize := 0
+	for i := 0; i < cfg.n; i++ {
+		n := cfg.saved[i].RaftStateSize()
+		if n > logsize {
+			logsize = n
+		}
+	}
+	return logsize
+}
+
+// attach server i to servers listed in to
+// caller must hold cfg.mu
+func (cfg *config) connectUnlocked(i int, to []int) {
+	// log.Printf("connect peer %d to %v\n", i, to)
+
+	// outgoing socket files
+	for j := 0; j < len(to); j++ {
+		endname := cfg.endnames[i][to[j]]
+		cfg.net.Enable(endname, true)
+	}
+
+	// incoming socket files
+	for j := 0; j < len(to); j++ {
+		endname := cfg.endnames[to[j]][i]
+		cfg.net.Enable(endname, true)
+	}
+}
+
+func (cfg *config) connect(i int, to []int) {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+	cfg.connectUnlocked(i, to)
+}
+
+// detach server i from the servers listed in from
+// caller must hold cfg.mu
+func (cfg *config) disconnectUnlocked(i int, from []int) {
+	// log.Printf("disconnect peer %d from %v\n", i, from)
+
+	// outgoing socket files
+	for j := 0; j < len(from); j++ {
+		if cfg.endnames[i] != nil {
+			endname := cfg.endnames[i][from[j]]
+			cfg.net.Enable(endname, false)
+		}
+	}
+
+	// incoming socket files
+	for j := 0; j < len(from); j++ {
+		if cfg.endnames[j] != nil {
+			endname := cfg.endnames[from[j]][i]
+			cfg.net.Enable(endname, false)
+		}
+	}
+}
+
+func (cfg *config) disconnect(i int, from []int) {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+	cfg.disconnectUnlocked(i, from)
+}
+
+func (cfg *config) All() []int {
+	all := make([]int, cfg.n)
+	for i := 0; i < cfg.n; i++ {
+		all[i] = i
+	}
+	return all
+}
+
+func (cfg *config) ConnectAll() {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+	for i := 0; i < cfg.n; i++ {
+		cfg.connectUnlocked(i, cfg.All())
+	}
+}
+
+// Sets up 2 partitions with connectivity between servers in each  partition.
+func (cfg *config) partition(p1 []int, p2 []int) {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+	// log.Printf("partition servers into: %v %v\n", p1, p2)
+	for i := 0; i < len(p1); i++ {
+		cfg.disconnectUnlocked(p1[i], p2)
+		cfg.connectUnlocked(p1[i], p1)
+	}
+	for i := 0; i < len(p2); i++ {
+		cfg.disconnectUnlocked(p2[i], p1)
+		cfg.connectUnlocked(p2[i], p2)
+	}
+}
+
+// Create a clerk with clerk specific server names.
+// Give it connections to all of the servers, but for
+// now enable only connections to servers in to[].
+func (cfg *config) makeClient(to []int) *Clerk {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+
+	// a fresh set of ClientEnds.
+	ends := make([]*labrpc.ClientEnd, cfg.n)
+	endnames := make([]string, cfg.n)
+	for j := 0; j < cfg.n; j++ {
+		endnames[j] = randstring(20)
+		ends[j] = cfg.net.MakeEnd(endnames[j])
+		cfg.net.Connect(endnames[j], j)
+	}
+
+	ck := MakeClerk(random_handles(ends))
+	cfg.clerks[ck] = endnames
+	cfg.nextClientId++
+	cfg.ConnectClientUnlocked(ck, to)
+	return ck
+}
+
+func (cfg *config) deleteClient(ck *Clerk) {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+
+	v := cfg.clerks[ck]
+	for i := 0; i < len(v); i++ {
+		os.Remove(v[i])
+	}
+	delete(cfg.clerks, ck)
+}
+
+// caller should hold cfg.mu
+func (cfg *config) ConnectClientUnlocked(ck *Clerk, to []int) {
+	// log.Printf("ConnectClient %v to %v\n", ck, to)
+	endnames := cfg.clerks[ck]
+	for j := 0; j < len(to); j++ {
+		s := endnames[to[j]]
+		cfg.net.Enable(s, true)
+	}
+}
+
+func (cfg *config) ConnectClient(ck *Clerk, to []int) {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+	cfg.ConnectClientUnlocked(ck, to)
+}
+
+// caller should hold cfg.mu
+func (cfg *config) DisconnectClientUnlocked(ck *Clerk, from []int) {
+	// log.Printf("DisconnectClient %v from %v\n", ck, from)
+	endnames := cfg.clerks[ck]
+	for j := 0; j < len(from); j++ {
+		s := endnames[from[j]]
+		cfg.net.Enable(s, false)
+	}
+}
+
+func (cfg *config) DisconnectClient(ck *Clerk, from []int) {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+	cfg.DisconnectClientUnlocked(ck, from)
+}
+
+// Shutdown a server by isolating it
+func (cfg *config) ShutdownServer(i int) {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+
+	cfg.disconnectUnlocked(i, cfg.All())
+
+	// disable client connections to the server.
+	// it's important to do this before creating
+	// the new Persister in saved[i], to avoid
+	// the possibility of the server returning a
+	// positive reply to an Append but persisting
+	// the result in the superseded Persister.
+	cfg.net.DeleteServer(i)
+
+	// a fresh persister, in case old instance
+	// continues to update the Persister.
+	// but copy old persister's content so that we always
+	// pass Make() the last persisted state.
+	if cfg.saved[i] != nil {
+		cfg.saved[i] = cfg.saved[i].Copy()
+	}
+
+	kv := cfg.servers[i]
+	if kv != nil {
+		cfg.mu.Unlock()
+		kv.Kill()
+		cfg.mu.Lock()
+		cfg.servers[i] = nil
+	}
+}
+
+// If restart servers, first call ShutdownServer
+func (cfg *config) StartServer(i int) {
+	cfg.mu.Lock()
+
+	// a fresh set of outgoing ClientEnd names.
+	cfg.endnames[i] = make([]string, cfg.n)
+	for j := 0; j < cfg.n; j++ {
+		cfg.endnames[i][j] = randstring(20)
+	}
+
+	// a fresh set of ClientEnds.
+	ends := make([]*labrpc.ClientEnd, cfg.n)
+	for j := 0; j < cfg.n; j++ {
+		ends[j] = cfg.net.MakeEnd(cfg.endnames[i][j])
+		cfg.net.Connect(cfg.endnames[i][j], j)
+	}
+
+	// a fresh persister, so old instance doesn't overwrite
+	// new instance's persisted state.
+	// give the fresh persister a copy of the old persister's
+	// state, so that the spec is that we pass StartKVServer()
+	// the last persisted state.
+	if cfg.saved[i] != nil {
+		cfg.saved[i] = cfg.saved[i].Copy()
+	} else {
+		cfg.saved[i] = raft.MakePersister()
+	}
+
+	cfg.mu.Unlock()
+
+	cfg.servers[i] = StartServer(ends, i, cfg.saved[i])
+
+	kvsvc := labrpc.MakeService(cfg.servers[i])
+	rfsvc := labrpc.MakeService(cfg.servers[i].rf)
+	srv := labrpc.MakeServer()
+	srv.AddService(kvsvc)
+	srv.AddService(rfsvc)
+	cfg.net.AddServer(i, srv)
+}
+
+func (cfg *config) Leader() (bool, int) {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+
+	for i := 0; i < cfg.n; i++ {
+		if cfg.servers[i] != nil {
+			_, is_leader := cfg.servers[i].rf.GetState()
+			if is_leader {
+				return true, i
+			}
+		}
+	}
+	return false, 0
+}
+
+// Partition servers into 2 groups and put current leader in minority
+func (cfg *config) make_partition() ([]int, []int) {
+	_, l := cfg.Leader()
+	p1 := make([]int, cfg.n/2+1)
+	p2 := make([]int, cfg.n/2)
+	j := 0
+	for i := 0; i < cfg.n; i++ {
+		if i != l {
+			if j < len(p1) {
+				p1[j] = i
+			} else {
+				p2[j-len(p1)] = i
+			}
+			j++
+		}
+	}
+	p2[len(p2)-1] = l
+	return p1, p2
+}
+
+func make_config(t *testing.T, n int, unreliable bool) *config {
+	runtime.GOMAXPROCS(4)
+	cfg := &config{}
+	cfg.t = t
+	cfg.net = labrpc.MakeNetwork()
+	cfg.n = n
+	cfg.servers = make([]*ShardCtrler, cfg.n)
+	cfg.saved = make([]*raft.Persister, cfg.n)
+	cfg.endnames = make([][]string, cfg.n)
+	cfg.clerks = make(map[*Clerk][]string)
+	cfg.nextClientId = cfg.n + 1000 // client ids start 1000 above the highest serverid
+	cfg.start = time.Now()
+
+	// create a full set of KV servers.
+	for i := 0; i < cfg.n; i++ {
+		cfg.StartServer(i)
+	}
+
+	cfg.ConnectAll()
+
+	cfg.net.Reliable(!unreliable)
+
+	return cfg
+}
--- a/src/shardctrler/server.go
+++ b/src/shardctrler/server.go
@ -0,0 +1,80 @@
+package shardctrler
+
+
+import "6.824/raft"
+import "6.824/labrpc"
+import "sync"
+import "6.824/labgob"
+
+
+type ShardCtrler struct {
+	mu      sync.Mutex
+	me      int
+	rf      *raft.Raft
+	applyCh chan raft.ApplyMsg
+
+	// Your data here.
+
+	configs []Config // indexed by config num
+}
+
+
+type Op struct {
+	// Your data here.
+}
+
+
+func (sc *ShardCtrler) Join(args *JoinArgs, reply *JoinReply) {
+	// Your code here.
+}
+
+func (sc *ShardCtrler) Leave(args *LeaveArgs, reply *LeaveReply) {
+	// Your code here.
+}
+
+func (sc *ShardCtrler) Move(args *MoveArgs, reply *MoveReply) {
+	// Your code here.
+}
+
+func (sc *ShardCtrler) Query(args *QueryArgs, reply *QueryReply) {
+	// Your code here.
+}
+
+
+//
+// the tester calls Kill() when a ShardCtrler instance won't
+// be needed again. you are not required to do anything
+// in Kill(), but it might be convenient to (for example)
+// turn off debug output from this instance.
+//
+func (sc *ShardCtrler) Kill() {
+	sc.rf.Kill()
+	// Your code here, if desired.
+}
+
+// needed by shardkv tester
+func (sc *ShardCtrler) Raft() *raft.Raft {
+	return sc.rf
+}
+
+//
+// servers[] contains the ports of the set of
+// servers that will cooperate via Raft to
+// form the fault-tolerant shardctrler service.
+// me is the index of the current server in servers[].
+//
+func StartServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister) *ShardCtrler {
+	sc := new(ShardCtrler)
+	sc.me = me
+
+	sc.configs = make([]Config, 1)
+	sc.configs[0].Groups = map[int][]string{}
+
+	labgob.Register(Op{})
+	sc.applyCh = make(chan raft.ApplyMsg)
+	sc.rf = raft.Make(servers, me, persister, sc.applyCh)
+
+	// Your code here.
+
+	return sc
+}
--- a/src/shardctrler/test_test.go
+++ b/src/shardctrler/test_test.go
@ -0,0 +1,403 @@
+package shardctrler
+
+import (
+	"fmt"
+	"sync"
+	"testing"
+	"time"
+)
+
+// import "time"
+
+func check(t *testing.T, groups []int, ck *Clerk) {
+	c := ck.Query(-1)
+	if len(c.Groups) != len(groups) {
+		t.Fatalf("wanted %v groups, got %v", len(groups), len(c.Groups))
+	}
+
+	// are the groups as expected?
+	for _, g := range groups {
+		_, ok := c.Groups[g]
+		if ok != true {
+			t.Fatalf("missing group %v", g)
+		}
+	}
+
+	// any un-allocated shards?
+	if len(groups) > 0 {
+		for s, g := range c.Shards {
+			_, ok := c.Groups[g]
+			if ok == false {
+				t.Fatalf("shard %v -> invalid group %v", s, g)
+			}
+		}
+	}
+
+	// more or less balanced sharding?
+	counts := map[int]int{}
+	for _, g := range c.Shards {
+		counts[g] += 1
+	}
+	min := 257
+	max := 0
+	for g, _ := range c.Groups {
+		if counts[g] > max {
+			max = counts[g]
+		}
+		if counts[g] < min {
+			min = counts[g]
+		}
+	}
+	if max > min+1 {
+		t.Fatalf("max %v too much larger than min %v", max, min)
+	}
+}
+
+func check_same_config(t *testing.T, c1 Config, c2 Config) {
+	if c1.Num != c2.Num {
+		t.Fatalf("Num wrong")
+	}
+	if c1.Shards != c2.Shards {
+		t.Fatalf("Shards wrong")
+	}
+	if len(c1.Groups) != len(c2.Groups) {
+		t.Fatalf("number of Groups is wrong")
+	}
+	for gid, sa := range c1.Groups {
+		sa1, ok := c2.Groups[gid]
+		if ok == false || len(sa1) != len(sa) {
+			t.Fatalf("len(Groups) wrong")
+		}
+		if ok && len(sa1) == len(sa) {
+			for j := 0; j < len(sa); j++ {
+				if sa[j] != sa1[j] {
+					t.Fatalf("Groups wrong")
+				}
+			}
+		}
+	}
+}
+
+func TestBasic(t *testing.T) {
+	const nservers = 3
+	cfg := make_config(t, nservers, false)
+	defer cfg.cleanup()
+
+	ck := cfg.makeClient(cfg.All())
+
+	fmt.Printf("Test: Basic leave/join ...\n")
+
+	cfa := make([]Config, 6)
+	cfa[0] = ck.Query(-1)
+
+	check(t, []int{}, ck)
+
+	var gid1 int = 1
+	ck.Join(map[int][]string{gid1: []string{"x", "y", "z"}})
+	check(t, []int{gid1}, ck)
+	cfa[1] = ck.Query(-1)
+
+	var gid2 int = 2
+	ck.Join(map[int][]string{gid2: []string{"a", "b", "c"}})
+	check(t, []int{gid1, gid2}, ck)
+	cfa[2] = ck.Query(-1)
+
+	cfx := ck.Query(-1)
+	sa1 := cfx.Groups[gid1]
+	if len(sa1) != 3 || sa1[0] != "x" || sa1[1] != "y" || sa1[2] != "z" {
+		t.Fatalf("wrong servers for gid %v: %v\n", gid1, sa1)
+	}
+	sa2 := cfx.Groups[gid2]
+	if len(sa2) != 3 || sa2[0] != "a" || sa2[1] != "b" || sa2[2] != "c" {
+		t.Fatalf("wrong servers for gid %v: %v\n", gid2, sa2)
+	}
+
+	ck.Leave([]int{gid1})
+	check(t, []int{gid2}, ck)
+	cfa[4] = ck.Query(-1)
+
+	ck.Leave([]int{gid2})
+	cfa[5] = ck.Query(-1)
+
+	fmt.Printf("  ... Passed\n")
+
+	fmt.Printf("Test: Historical queries ...\n")
+
+	for s := 0; s < nservers; s++ {
+		cfg.ShutdownServer(s)
+		for i := 0; i < len(cfa); i++ {
+			c := ck.Query(cfa[i].Num)
+			check_same_config(t, c, cfa[i])
+		}
+		cfg.StartServer(s)
+		cfg.ConnectAll()
+	}
+
+	fmt.Printf("  ... Passed\n")
+
+	fmt.Printf("Test: Move ...\n")
+	{
+		var gid3 int = 503
+		ck.Join(map[int][]string{gid3: []string{"3a", "3b", "3c"}})
+		var gid4 int = 504
+		ck.Join(map[int][]string{gid4: []string{"4a", "4b", "4c"}})
+		for i := 0; i < NShards; i++ {
+			cf := ck.Query(-1)
+			if i < NShards/2 {
+				ck.Move(i, gid3)
+				if cf.Shards[i] != gid3 {
+					cf1 := ck.Query(-1)
+					if cf1.Num <= cf.Num {
+						t.Fatalf("Move should increase Config.Num")
+					}
+				}
+			} else {
+				ck.Move(i, gid4)
+				if cf.Shards[i] != gid4 {
+					cf1 := ck.Query(-1)
+					if cf1.Num <= cf.Num {
+						t.Fatalf("Move should increase Config.Num")
+					}
+				}
+			}
+		}
+		cf2 := ck.Query(-1)
+		for i := 0; i < NShards; i++ {
+			if i < NShards/2 {
+				if cf2.Shards[i] != gid3 {
+					t.Fatalf("expected shard %v on gid %v actually %v",
+						i, gid3, cf2.Shards[i])
+				}
+			} else {
+				if cf2.Shards[i] != gid4 {
+					t.Fatalf("expected shard %v on gid %v actually %v",
+						i, gid4, cf2.Shards[i])
+				}
+			}
+		}
+		ck.Leave([]int{gid3})
+		ck.Leave([]int{gid4})
+	}
+	fmt.Printf("  ... Passed\n")
+
+	fmt.Printf("Test: Concurrent leave/join ...\n")
+
+	const npara = 10
+	var cka [npara]*Clerk
+	for i := 0; i < len(cka); i++ {
+		cka[i] = cfg.makeClient(cfg.All())
+	}
+	gids := make([]int, npara)
+	ch := make(chan bool)
+	for xi := 0; xi < npara; xi++ {
+		gids[xi] = int((xi * 10) + 100)
+		go func(i int) {
+			defer func() { ch <- true }()
+			var gid int = gids[i]
+			var sid1 = fmt.Sprintf("s%da", gid)
+			var sid2 = fmt.Sprintf("s%db", gid)
+			cka[i].Join(map[int][]string{gid + 1000: []string{sid1}})
+			cka[i].Join(map[int][]string{gid: []string{sid2}})
+			cka[i].Leave([]int{gid + 1000})
+		}(xi)
+	}
+	for i := 0; i < npara; i++ {
+		<-ch
+	}
+	check(t, gids, ck)
+
+	fmt.Printf("  ... Passed\n")
+
+	fmt.Printf("Test: Minimal transfers after joins ...\n")
+
+	c1 := ck.Query(-1)
+	for i := 0; i < 5; i++ {
+		var gid = int(npara + 1 + i)
+		ck.Join(map[int][]string{gid: []string{
+			fmt.Sprintf("%da", gid),
+			fmt.Sprintf("%db", gid),
+			fmt.Sprintf("%db", gid)}})
+	}
+	c2 := ck.Query(-1)
+	for i := int(1); i <= npara; i++ {
+		for j := 0; j < len(c1.Shards); j++ {
+			if c2.Shards[j] == i {
+				if c1.Shards[j] != i {
+					t.Fatalf("non-minimal transfer after Join()s")
+				}
+			}
+		}
+	}
+
+	fmt.Printf("  ... Passed\n")
+
+	fmt.Printf("Test: Minimal transfers after leaves ...\n")
+
+	for i := 0; i < 5; i++ {
+		ck.Leave([]int{int(npara + 1 + i)})
+	}
+	c3 := ck.Query(-1)
+	for i := int(1); i <= npara; i++ {
+		for j := 0; j < len(c1.Shards); j++ {
+			if c2.Shards[j] == i {
+				if c3.Shards[j] != i {
+					t.Fatalf("non-minimal transfer after Leave()s")
+				}
+			}
+		}
+	}
+
+	fmt.Printf("  ... Passed\n")
+}
+
+func TestMulti(t *testing.T) {
+	const nservers = 3
+	cfg := make_config(t, nservers, false)
+	defer cfg.cleanup()
+
+	ck := cfg.makeClient(cfg.All())
+
+	fmt.Printf("Test: Multi-group join/leave ...\n")
+
+	cfa := make([]Config, 6)
+	cfa[0] = ck.Query(-1)
+
+	check(t, []int{}, ck)
+
+	var gid1 int = 1
+	var gid2 int = 2
+	ck.Join(map[int][]string{
+		gid1: []string{"x", "y", "z"},
+		gid2: []string{"a", "b", "c"},
+	})
+	check(t, []int{gid1, gid2}, ck)
+	cfa[1] = ck.Query(-1)
+
+	var gid3 int = 3
+	ck.Join(map[int][]string{gid3: []string{"j", "k", "l"}})
+	check(t, []int{gid1, gid2, gid3}, ck)
+	cfa[2] = ck.Query(-1)
+
+	cfx := ck.Query(-1)
+	sa1 := cfx.Groups[gid1]
+	if len(sa1) != 3 || sa1[0] != "x" || sa1[1] != "y" || sa1[2] != "z" {
+		t.Fatalf("wrong servers for gid %v: %v\n", gid1, sa1)
+	}
+	sa2 := cfx.Groups[gid2]
+	if len(sa2) != 3 || sa2[0] != "a" || sa2[1] != "b" || sa2[2] != "c" {
+		t.Fatalf("wrong servers for gid %v: %v\n", gid2, sa2)
+	}
+	sa3 := cfx.Groups[gid3]
+	if len(sa3) != 3 || sa3[0] != "j" || sa3[1] != "k" || sa3[2] != "l" {
+		t.Fatalf("wrong servers for gid %v: %v\n", gid3, sa3)
+	}
+
+	ck.Leave([]int{gid1, gid3})
+	check(t, []int{gid2}, ck)
+	cfa[3] = ck.Query(-1)
+
+	cfx = ck.Query(-1)
+	sa2 = cfx.Groups[gid2]
+	if len(sa2) != 3 || sa2[0] != "a" || sa2[1] != "b" || sa2[2] != "c" {
+		t.Fatalf("wrong servers for gid %v: %v\n", gid2, sa2)
+	}
+
+	ck.Leave([]int{gid2})
+
+	fmt.Printf("  ... Passed\n")
+
+	fmt.Printf("Test: Concurrent multi leave/join ...\n")
+
+	const npara = 10
+	var cka [npara]*Clerk
+	for i := 0; i < len(cka); i++ {
+		cka[i] = cfg.makeClient(cfg.All())
+	}
+	gids := make([]int, npara)
+	var wg sync.WaitGroup
+	for xi := 0; xi < npara; xi++ {
+		wg.Add(1)
+		gids[xi] = int(xi + 1000)
+		go func(i int) {
+			defer wg.Done()
+			var gid int = gids[i]
+			cka[i].Join(map[int][]string{
+				gid: []string{
+					fmt.Sprintf("%da", gid),
+					fmt.Sprintf("%db", gid),
+					fmt.Sprintf("%dc", gid)},
+				gid + 1000: []string{fmt.Sprintf("%da", gid+1000)},
+				gid + 2000: []string{fmt.Sprintf("%da", gid+2000)},
+			})
+			cka[i].Leave([]int{gid + 1000, gid + 2000})
+		}(xi)
+	}
+	wg.Wait()
+	check(t, gids, ck)
+
+	fmt.Printf("  ... Passed\n")
+
+	fmt.Printf("Test: Minimal transfers after multijoins ...\n")
+
+	c1 := ck.Query(-1)
+	m := make(map[int][]string)
+	for i := 0; i < 5; i++ {
+		var gid = npara + 1 + i
+		m[gid] = []string{fmt.Sprintf("%da", gid), fmt.Sprintf("%db", gid)}
+	}
+	ck.Join(m)
+	c2 := ck.Query(-1)
+	for i := int(1); i <= npara; i++ {
+		for j := 0; j < len(c1.Shards); j++ {
+			if c2.Shards[j] == i {
+				if c1.Shards[j] != i {
+					t.Fatalf("non-minimal transfer after Join()s")
+				}
+			}
+		}
+	}
+
+	fmt.Printf("  ... Passed\n")
+
+	fmt.Printf("Test: Minimal transfers after multileaves ...\n")
+
+	var l []int
+	for i := 0; i < 5; i++ {
+		l = append(l, npara+1+i)
+	}
+	ck.Leave(l)
+	c3 := ck.Query(-1)
+	for i := int(1); i <= npara; i++ {
+		for j := 0; j < len(c1.Shards); j++ {
+			if c2.Shards[j] == i {
+				if c3.Shards[j] != i {
+					t.Fatalf("non-minimal transfer after Leave()s")
+				}
+			}
+		}
+	}
+
+	fmt.Printf("  ... Passed\n")
+
+	fmt.Printf("Test: Check Same config on servers ...\n")
+
+	isLeader, leader := cfg.Leader()
+	if !isLeader {
+		t.Fatalf("Leader not found")
+	}
+	c := ck.Query(-1) // Config leader claims
+
+	cfg.ShutdownServer(leader)
+
+	attempts := 0
+	for isLeader, leader = cfg.Leader(); isLeader; time.Sleep(1 * time.Second) {
+		if attempts++; attempts >= 3 {
+			t.Fatalf("Leader not found")
+		}
+	}
+
+	c1 = ck.Query(-1)
+	check_same_config(t, c, c1)
+
+	fmt.Printf("  ... Passed\n")
+}
--- a/src/shardkv/client.go
+++ b/src/shardkv/client.go
@ -0,0 +1,137 @@
+package shardkv
+
+//
+// client code to talk to a sharded key/value service.
+//
+// the client first talks to the shardctrler to find out
+// the assignment of shards (keys) to groups, and then
+// talks to the group that holds the key's shard.
+//
+
+import "6.824/labrpc"
+import "crypto/rand"
+import "math/big"
+import "6.824/shardctrler"
+import "time"
+
+//
+// which shard is a key in?
+// please use this function,
+// and please do not change it.
+//
+func key2shard(key string) int {
+	shard := 0
+	if len(key) > 0 {
+		shard = int(key[0])
+	}
+	shard %= shardctrler.NShards
+	return shard
+}
+
+func nrand() int64 {
+	max := big.NewInt(int64(1) << 62)
+	bigx, _ := rand.Int(rand.Reader, max)
+	x := bigx.Int64()
+	return x
+}
+
+type Clerk struct {
+	sm       *shardctrler.Clerk
+	config   shardctrler.Config
+	make_end func(string) *labrpc.ClientEnd
+	// You will have to modify this struct.
+}
+
+//
+// the tester calls MakeClerk.
+//
+// ctrlers[] is needed to call shardctrler.MakeClerk().
+//
+// make_end(servername) turns a server name from a
+// Config.Groups[gid][i] into a labrpc.ClientEnd on which you can
+// send RPCs.
+//
+func MakeClerk(ctrlers []*labrpc.ClientEnd, make_end func(string) *labrpc.ClientEnd) *Clerk {
+	ck := new(Clerk)
+	ck.sm = shardctrler.MakeClerk(ctrlers)
+	ck.make_end = make_end
+	// You'll have to add code here.
+	return ck
+}
+
+//
+// fetch the current value for a key.
+// returns "" if the key does not exist.
+// keeps trying forever in the face of all other errors.
+// You will have to modify this function.
+//
+func (ck *Clerk) Get(key string) string {
+	args := GetArgs{}
+	args.Key = key
+
+	for {
+		shard := key2shard(key)
+		gid := ck.config.Shards[shard]
+		if servers, ok := ck.config.Groups[gid]; ok {
+			// try each server for the shard.
+			for si := 0; si < len(servers); si++ {
+				srv := ck.make_end(servers[si])
+				var reply GetReply
+				ok := srv.Call("ShardKV.Get", &args, &reply)
+				if ok && (reply.Err == OK || reply.Err == ErrNoKey) {
+					return reply.Value
+				}
+				if ok && (reply.Err == ErrWrongGroup) {
+					break
+				}
+				// ... not ok, or ErrWrongLeader
+			}
+		}
+		time.Sleep(100 * time.Millisecond)
+		// ask controler for the latest configuration.
+		ck.config = ck.sm.Query(-1)
+	}
+
+	return ""
+}
+
+//
+// shared by Put and Append.
+// You will have to modify this function.
+//
+func (ck *Clerk) PutAppend(key string, value string, op string) {
+	args := PutAppendArgs{}
+	args.Key = key
+	args.Value = value
+	args.Op = op
+
+
+	for {
+		shard := key2shard(key)
+		gid := ck.config.Shards[shard]
+		if servers, ok := ck.config.Groups[gid]; ok {
+			for si := 0; si < len(servers); si++ {
+				srv := ck.make_end(servers[si])
+				var reply PutAppendReply
+				ok := srv.Call("ShardKV.PutAppend", &args, &reply)
+				if ok && reply.Err == OK {
+					return
+				}
+				if ok && reply.Err == ErrWrongGroup {
+					break
+				}
+				// ... not ok, or ErrWrongLeader
+			}
+		}
+		time.Sleep(100 * time.Millisecond)
+		// ask controler for the latest configuration.
+		ck.config = ck.sm.Query(-1)
+	}
+}
+
+func (ck *Clerk) Put(key string, value string) {
+	ck.PutAppend(key, value, "Put")
+}
+func (ck *Clerk) Append(key string, value string) {
+	ck.PutAppend(key, value, "Append")
+}
--- a/src/shardkv/common.go
+++ b/src/shardkv/common.go
@ -0,0 +1,44 @@
+package shardkv
+
+//
+// Sharded key/value server.
+// Lots of replica groups, each running Raft.
+// Shardctrler decides which group serves each shard.
+// Shardctrler may change shard assignment from time to time.
+//
+// You will have to modify these definitions.
+//
+
+const (
+	OK             = "OK"
+	ErrNoKey       = "ErrNoKey"
+	ErrWrongGroup  = "ErrWrongGroup"
+	ErrWrongLeader = "ErrWrongLeader"
+)
+
+type Err string
+
+// Put or Append
+type PutAppendArgs struct {
+	// You'll have to add definitions here.
+	Key   string
+	Value string
+	Op    string // "Put" or "Append"
+	// You'll have to add definitions here.
+	// Field names must start with capital letters,
+	// otherwise RPC will break.
+}
+
+type PutAppendReply struct {
+	Err Err
+}
+
+type GetArgs struct {
+	Key string
+	// You'll have to add definitions here.
+}
+
+type GetReply struct {
+	Err   Err
+	Value string
+}
--- a/src/shardkv/config.go
+++ b/src/shardkv/config.go
@ -0,0 +1,382 @@
+package shardkv
+
+import "6.824/shardctrler"
+import "6.824/labrpc"
+import "testing"
+import "os"
+
+// import "log"
+import crand "crypto/rand"
+import "math/big"
+import "math/rand"
+import "encoding/base64"
+import "sync"
+import "runtime"
+import "6.824/raft"
+import "strconv"
+import "fmt"
+import "time"
+
+func randstring(n int) string {
+	b := make([]byte, 2*n)
+	crand.Read(b)
+	s := base64.URLEncoding.EncodeToString(b)
+	return s[0:n]
+}
+
+func makeSeed() int64 {
+	max := big.NewInt(int64(1) << 62)
+	bigx, _ := crand.Int(crand.Reader, max)
+	x := bigx.Int64()
+	return x
+}
+
+// Randomize server handles
+func random_handles(kvh []*labrpc.ClientEnd) []*labrpc.ClientEnd {
+	sa := make([]*labrpc.ClientEnd, len(kvh))
+	copy(sa, kvh)
+	for i := range sa {
+		j := rand.Intn(i + 1)
+		sa[i], sa[j] = sa[j], sa[i]
+	}
+	return sa
+}
+
+type group struct {
+	gid       int
+	servers   []*ShardKV
+	saved     []*raft.Persister
+	endnames  [][]string
+	mendnames [][]string
+}
+
+type config struct {
+	mu    sync.Mutex
+	t     *testing.T
+	net   *labrpc.Network
+	start time.Time // time at which make_config() was called
+
+	nctrlers      int
+	ctrlerservers []*shardctrler.ShardCtrler
+	mck           *shardctrler.Clerk
+
+	ngroups int
+	n       int // servers per k/v group
+	groups  []*group
+
+	clerks       map[*Clerk][]string
+	nextClientId int
+	maxraftstate int
+}
+
+func (cfg *config) checkTimeout() {
+	// enforce a two minute real-time limit on each test
+	if !cfg.t.Failed() && time.Since(cfg.start) > 120*time.Second {
+		cfg.t.Fatal("test took longer than 120 seconds")
+	}
+}
+
+func (cfg *config) cleanup() {
+	for gi := 0; gi < cfg.ngroups; gi++ {
+		cfg.ShutdownGroup(gi)
+	}
+	for i := 0; i < cfg.nctrlers; i++ {
+		cfg.ctrlerservers[i].Kill()
+	}
+	cfg.net.Cleanup()
+	cfg.checkTimeout()
+}
+
+// check that no server's log is too big.
+func (cfg *config) checklogs() {
+	for gi := 0; gi < cfg.ngroups; gi++ {
+		for i := 0; i < cfg.n; i++ {
+			raft := cfg.groups[gi].saved[i].RaftStateSize()
+			snap := len(cfg.groups[gi].saved[i].ReadSnapshot())
+			if cfg.maxraftstate >= 0 && raft > 8*cfg.maxraftstate {
+				cfg.t.Fatalf("persister.RaftStateSize() %v, but maxraftstate %v",
+					raft, cfg.maxraftstate)
+			}
+			if cfg.maxraftstate < 0 && snap > 0 {
+				cfg.t.Fatalf("maxraftstate is -1, but snapshot is non-empty!")
+			}
+		}
+	}
+}
+
+// controler server name for labrpc.
+func (cfg *config) ctrlername(i int) string {
+	return "ctrler" + strconv.Itoa(i)
+}
+
+// shard server name for labrpc.
+// i'th server of group gid.
+func (cfg *config) servername(gid int, i int) string {
+	return "server-" + strconv.Itoa(gid) + "-" + strconv.Itoa(i)
+}
+
+func (cfg *config) makeClient() *Clerk {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+
+	// ClientEnds to talk to controler service.
+	ends := make([]*labrpc.ClientEnd, cfg.nctrlers)
+	endnames := make([]string, cfg.n)
+	for j := 0; j < cfg.nctrlers; j++ {
+		endnames[j] = randstring(20)
+		ends[j] = cfg.net.MakeEnd(endnames[j])
+		cfg.net.Connect(endnames[j], cfg.ctrlername(j))
+		cfg.net.Enable(endnames[j], true)
+	}
+
+	ck := MakeClerk(ends, func(servername string) *labrpc.ClientEnd {
+		name := randstring(20)
+		end := cfg.net.MakeEnd(name)
+		cfg.net.Connect(name, servername)
+		cfg.net.Enable(name, true)
+		return end
+	})
+	cfg.clerks[ck] = endnames
+	cfg.nextClientId++
+	return ck
+}
+
+func (cfg *config) deleteClient(ck *Clerk) {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+
+	v := cfg.clerks[ck]
+	for i := 0; i < len(v); i++ {
+		os.Remove(v[i])
+	}
+	delete(cfg.clerks, ck)
+}
+
+// Shutdown i'th server of gi'th group, by isolating it
+func (cfg *config) ShutdownServer(gi int, i int) {
+	cfg.mu.Lock()
+	defer cfg.mu.Unlock()
+
+	gg := cfg.groups[gi]
+
+	// prevent this server from sending
+	for j := 0; j < len(gg.servers); j++ {
+		name := gg.endnames[i][j]
+		cfg.net.Enable(name, false)
+	}
+	for j := 0; j < len(gg.mendnames[i]); j++ {
+		name := gg.mendnames[i][j]
+		cfg.net.Enable(name, false)
+	}
+
+	// disable client connections to the server.
+	// it's important to do this before creating
+	// the new Persister in saved[i], to avoid
+	// the possibility of the server returning a
+	// positive reply to an Append but persisting
+	// the result in the superseded Persister.
+	cfg.net.DeleteServer(cfg.servername(gg.gid, i))
+
+	// a fresh persister, in case old instance
+	// continues to update the Persister.
+	// but copy old persister's content so that we always
+	// pass Make() the last persisted state.
+	if gg.saved[i] != nil {
+		gg.saved[i] = gg.saved[i].Copy()
+	}
+
+	kv := gg.servers[i]
+	if kv != nil {
+		cfg.mu.Unlock()
+		kv.Kill()
+		cfg.mu.Lock()
+		gg.servers[i] = nil
+	}
+}
+
+func (cfg *config) ShutdownGroup(gi int) {
+	for i := 0; i < cfg.n; i++ {
+		cfg.ShutdownServer(gi, i)
+	}
+}
+
+// start i'th server in gi'th group
+func (cfg *config) StartServer(gi int, i int) {
+	cfg.mu.Lock()
+
+	gg := cfg.groups[gi]
+
+	// a fresh set of outgoing ClientEnd names
+	// to talk to other servers in this group.
+	gg.endnames[i] = make([]string, cfg.n)
+	for j := 0; j < cfg.n; j++ {
+		gg.endnames[i][j] = randstring(20)
+	}
+
+	// and the connections to other servers in this group.
+	ends := make([]*labrpc.ClientEnd, cfg.n)
+	for j := 0; j < cfg.n; j++ {
+		ends[j] = cfg.net.MakeEnd(gg.endnames[i][j])
+		cfg.net.Connect(gg.endnames[i][j], cfg.servername(gg.gid, j))
+		cfg.net.Enable(gg.endnames[i][j], true)
+	}
+
+	// ends to talk to shardctrler service
+	mends := make([]*labrpc.ClientEnd, cfg.nctrlers)
+	gg.mendnames[i] = make([]string, cfg.nctrlers)
+	for j := 0; j < cfg.nctrlers; j++ {
+		gg.mendnames[i][j] = randstring(20)
+		mends[j] = cfg.net.MakeEnd(gg.mendnames[i][j])
+		cfg.net.Connect(gg.mendnames[i][j], cfg.ctrlername(j))
+		cfg.net.Enable(gg.mendnames[i][j], true)
+	}
+
+	// a fresh persister, so old instance doesn't overwrite
+	// new instance's persisted state.
+	// give the fresh persister a copy of the old persister's
+	// state, so that the spec is that we pass StartKVServer()
+	// the last persisted state.
+	if gg.saved[i] != nil {
+		gg.saved[i] = gg.saved[i].Copy()
+	} else {
+		gg.saved[i] = raft.MakePersister()
+	}
+	cfg.mu.Unlock()
+
+	gg.servers[i] = StartServer(ends, i, gg.saved[i], cfg.maxraftstate,
+		gg.gid, mends,
+		func(servername string) *labrpc.ClientEnd {
+			name := randstring(20)
+			end := cfg.net.MakeEnd(name)
+			cfg.net.Connect(name, servername)
+			cfg.net.Enable(name, true)
+			return end
+		})
+
+	kvsvc := labrpc.MakeService(gg.servers[i])
+	rfsvc := labrpc.MakeService(gg.servers[i].rf)
+	srv := labrpc.MakeServer()
+	srv.AddService(kvsvc)
+	srv.AddService(rfsvc)
+	cfg.net.AddServer(cfg.servername(gg.gid, i), srv)
+}
+
+func (cfg *config) StartGroup(gi int) {
+	for i := 0; i < cfg.n; i++ {
+		cfg.StartServer(gi, i)
+	}
+}
+
+func (cfg *config) StartCtrlerserver(i int) {
+	// ClientEnds to talk to other controler replicas.
+	ends := make([]*labrpc.ClientEnd, cfg.nctrlers)
+	for j := 0; j < cfg.nctrlers; j++ {
+		endname := randstring(20)
+		ends[j] = cfg.net.MakeEnd(endname)
+		cfg.net.Connect(endname, cfg.ctrlername(j))
+		cfg.net.Enable(endname, true)
+	}
+
+	p := raft.MakePersister()
+
+	cfg.ctrlerservers[i] = shardctrler.StartServer(ends, i, p)
+
+	msvc := labrpc.MakeService(cfg.ctrlerservers[i])
+	rfsvc := labrpc.MakeService(cfg.ctrlerservers[i].Raft())
+	srv := labrpc.MakeServer()
+	srv.AddService(msvc)
+	srv.AddService(rfsvc)
+	cfg.net.AddServer(cfg.ctrlername(i), srv)
+}
+
+func (cfg *config) shardclerk() *shardctrler.Clerk {
+	// ClientEnds to talk to ctrler service.
+	ends := make([]*labrpc.ClientEnd, cfg.nctrlers)
+	for j := 0; j < cfg.nctrlers; j++ {
+		name := randstring(20)
+		ends[j] = cfg.net.MakeEnd(name)
+		cfg.net.Connect(name, cfg.ctrlername(j))
+		cfg.net.Enable(name, true)
+	}
+
+	return shardctrler.MakeClerk(ends)
+}
+
+// tell the shardctrler that a group is joining.
+func (cfg *config) join(gi int) {
+	cfg.joinm([]int{gi})
+}
+
+func (cfg *config) joinm(gis []int) {
+	m := make(map[int][]string, len(gis))
+	for _, g := range gis {
+		gid := cfg.groups[g].gid
+		servernames := make([]string, cfg.n)
+		for i := 0; i < cfg.n; i++ {
+			servernames[i] = cfg.servername(gid, i)
+		}
+		m[gid] = servernames
+	}
+	cfg.mck.Join(m)
+}
+
+// tell the shardctrler that a group is leaving.
+func (cfg *config) leave(gi int) {
+	cfg.leavem([]int{gi})
+}
+
+func (cfg *config) leavem(gis []int) {
+	gids := make([]int, 0, len(gis))
+	for _, g := range gis {
+		gids = append(gids, cfg.groups[g].gid)
+	}
+	cfg.mck.Leave(gids)
+}
+
+var ncpu_once sync.Once
+
+func make_config(t *testing.T, n int, unreliable bool, maxraftstate int) *config {
+	ncpu_once.Do(func() {
+		if runtime.NumCPU() < 2 {
+			fmt.Printf("warning: only one CPU, which may conceal locking bugs\n")
+		}
+		rand.Seed(makeSeed())
+	})
+	runtime.GOMAXPROCS(4)
+	cfg := &config{}
+	cfg.t = t
+	cfg.maxraftstate = maxraftstate
+	cfg.net = labrpc.MakeNetwork()
+	cfg.start = time.Now()
+
+	// controler
+	cfg.nctrlers = 3
+	cfg.ctrlerservers = make([]*shardctrler.ShardCtrler, cfg.nctrlers)
+	for i := 0; i < cfg.nctrlers; i++ {
+		cfg.StartCtrlerserver(i)
+	}
+	cfg.mck = cfg.shardclerk()
+
+	cfg.ngroups = 3
+	cfg.groups = make([]*group, cfg.ngroups)
+	cfg.n = n
+	for gi := 0; gi < cfg.ngroups; gi++ {
+		gg := &group{}
+		cfg.groups[gi] = gg
+		gg.gid = 100 + gi
+		gg.servers = make([]*ShardKV, cfg.n)
+		gg.saved = make([]*raft.Persister, cfg.n)
+		gg.endnames = make([][]string, cfg.n)
+		gg.mendnames = make([][]string, cfg.nctrlers)
+		for i := 0; i < cfg.n; i++ {
+			cfg.StartServer(gi, i)
+		}
+	}
+
+	cfg.clerks = make(map[*Clerk][]string)
+	cfg.nextClientId = cfg.n + 1000 // client ids start 1000 above the highest serverid
+
+	cfg.net.Reliable(!unreliable)
+
+	return cfg
+}
--- a/src/shardkv/server.go
+++ b/src/shardkv/server.go
@ -0,0 +1,101 @@
+package shardkv
+
+
+import "6.824/labrpc"
+import "6.824/raft"
+import "sync"
+import "6.824/labgob"
+
+
+
+type Op struct {
+	// Your definitions here.
+	// Field names must start with capital letters,
+	// otherwise RPC will break.
+}
+
+type ShardKV struct {
+	mu           sync.Mutex
+	me           int
+	rf           *raft.Raft
+	applyCh      chan raft.ApplyMsg
+	make_end     func(string) *labrpc.ClientEnd
+	gid          int
+	ctrlers      []*labrpc.ClientEnd
+	maxraftstate int // snapshot if log grows this big
+
+	// Your definitions here.
+}
+
+
+func (kv *ShardKV) Get(args *GetArgs, reply *GetReply) {
+	// Your code here.
+}
+
+func (kv *ShardKV) PutAppend(args *PutAppendArgs, reply *PutAppendReply) {
+	// Your code here.
+}
+
+//
+// the tester calls Kill() when a ShardKV instance won't
+// be needed again. you are not required to do anything
+// in Kill(), but it might be convenient to (for example)
+// turn off debug output from this instance.
+//
+func (kv *ShardKV) Kill() {
+	kv.rf.Kill()
+	// Your code here, if desired.
+}
+
+
+//
+// servers[] contains the ports of the servers in this group.
+//
+// me is the index of the current server in servers[].
+//
+// the k/v server should store snapshots through the underlying Raft
+// implementation, which should call persister.SaveStateAndSnapshot() to
+// atomically save the Raft state along with the snapshot.
+//
+// the k/v server should snapshot when Raft's saved state exceeds
+// maxraftstate bytes, in order to allow Raft to garbage-collect its
+// log. if maxraftstate is -1, you don't need to snapshot.
+//
+// gid is this group's GID, for interacting with the shardctrler.
+//
+// pass ctrlers[] to shardctrler.MakeClerk() so you can send
+// RPCs to the shardctrler.
+//
+// make_end(servername) turns a server name from a
+// Config.Groups[gid][i] into a labrpc.ClientEnd on which you can
+// send RPCs. You'll need this to send RPCs to other groups.
+//
+// look at client.go for examples of how to use ctrlers[]
+// and make_end() to send RPCs to the group owning a specific shard.
+//
+// StartServer() must return quickly, so it should start goroutines
+// for any long-running work.
+//
+func StartServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister, maxraftstate int, gid int, ctrlers []*labrpc.ClientEnd, make_end func(string) *labrpc.ClientEnd) *ShardKV {
+	// call labgob.Register on structures you want
+	// Go's RPC library to marshall/unmarshall.
+	labgob.Register(Op{})
+
+	kv := new(ShardKV)
+	kv.me = me
+	kv.maxraftstate = maxraftstate
+	kv.make_end = make_end
+	kv.gid = gid
+	kv.ctrlers = ctrlers
+
+	// Your initialization code here.
+
+	// Use something like this to talk to the shardctrler:
+	// kv.mck = shardctrler.MakeClerk(kv.ctrlers)
+
+	kv.applyCh = make(chan raft.ApplyMsg)
+	kv.rf = raft.Make(servers, me, persister, kv.applyCh)
+
+
+	return kv
+}
--- a/src/shardkv/test_test.go
+++ b/src/shardkv/test_test.go
@ -0,0 +1,948 @@
+package shardkv
+
+import "6.824/porcupine"
+import "6.824/models"
+import "testing"
+import "strconv"
+import "time"
+import "fmt"
+import "sync/atomic"
+import "sync"
+import "math/rand"
+import "io/ioutil"
+
+const linearizabilityCheckTimeout = 1 * time.Second
+
+func check(t *testing.T, ck *Clerk, key string, value string) {
+	v := ck.Get(key)
+	if v != value {
+		t.Fatalf("Get(%v): expected:\n%v\nreceived:\n%v", key, value, v)
+	}
+}
+
+//
+// test static 2-way sharding, without shard movement.
+//
+func TestStaticShards(t *testing.T) {
+	fmt.Printf("Test: static shards ...\n")
+
+	cfg := make_config(t, 3, false, -1)
+	defer cfg.cleanup()
+
+	ck := cfg.makeClient()
+
+	cfg.join(0)
+	cfg.join(1)
+
+	n := 10
+	ka := make([]string, n)
+	va := make([]string, n)
+	for i := 0; i < n; i++ {
+		ka[i] = strconv.Itoa(i) // ensure multiple shards
+		va[i] = randstring(20)
+		ck.Put(ka[i], va[i])
+	}
+	for i := 0; i < n; i++ {
+		check(t, ck, ka[i], va[i])
+	}
+
+	// make sure that the data really is sharded by
+	// shutting down one shard and checking that some
+	// Get()s don't succeed.
+	cfg.ShutdownGroup(1)
+	cfg.checklogs() // forbid snapshots
+
+	ch := make(chan string)
+	for xi := 0; xi < n; xi++ {
+		ck1 := cfg.makeClient() // only one call allowed per client
+		go func(i int) {
+			v := ck1.Get(ka[i])
+			if v != va[i] {
+				ch <- fmt.Sprintf("Get(%v): expected:\n%v\nreceived:\n%v", ka[i], va[i], v)
+			} else {
+				ch <- ""
+			}
+		}(xi)
+	}
+
+	// wait a bit, only about half the Gets should succeed.
+	ndone := 0
+	done := false
+	for done == false {
+		select {
+		case err := <-ch:
+			if err != "" {
+				t.Fatal(err)
+			}
+			ndone += 1
+		case <-time.After(time.Second * 2):
+			done = true
+			break
+		}
+	}
+
+	if ndone != 5 {
+		t.Fatalf("expected 5 completions with one shard dead; got %v\n", ndone)
+	}
+
+	// bring the crashed shard/group back to life.
+	cfg.StartGroup(1)
+	for i := 0; i < n; i++ {
+		check(t, ck, ka[i], va[i])
+	}
+
+	fmt.Printf("  ... Passed\n")
+}
+
+func TestJoinLeave(t *testing.T) {
+	fmt.Printf("Test: join then leave ...\n")
+
+	cfg := make_config(t, 3, false, -1)
+	defer cfg.cleanup()
+
+	ck := cfg.makeClient()
+
+	cfg.join(0)
+
+	n := 10
+	ka := make([]string, n)
+	va := make([]string, n)
+	for i := 0; i < n; i++ {
+		ka[i] = strconv.Itoa(i) // ensure multiple shards
+		va[i] = randstring(5)
+		ck.Put(ka[i], va[i])
+	}
+	for i := 0; i < n; i++ {
+		check(t, ck, ka[i], va[i])
+	}
+
+	cfg.join(1)
+
+	for i := 0; i < n; i++ {
+		check(t, ck, ka[i], va[i])
+		x := randstring(5)
+		ck.Append(ka[i], x)
+		va[i] += x
+	}
+
+	cfg.leave(0)
+
+	for i := 0; i < n; i++ {
+		check(t, ck, ka[i], va[i])
+		x := randstring(5)
+		ck.Append(ka[i], x)
+		va[i] += x
+	}
+
+	// allow time for shards to transfer.
+	time.Sleep(1 * time.Second)
+
+	cfg.checklogs()
+	cfg.ShutdownGroup(0)
+
+	for i := 0; i < n; i++ {
+		check(t, ck, ka[i], va[i])
+	}
+
+	fmt.Printf("  ... Passed\n")
+}
+
+func TestSnapshot(t *testing.T) {
+	fmt.Printf("Test: snapshots, join, and leave ...\n")
+
+	cfg := make_config(t, 3, false, 1000)
+	defer cfg.cleanup()
+
+	ck := cfg.makeClient()
+
+	cfg.join(0)
+
+	n := 30
+	ka := make([]string, n)
+	va := make([]string, n)
+	for i := 0; i < n; i++ {
+		ka[i] = strconv.Itoa(i) // ensure multiple shards
+		va[i] = randstring(20)
+		ck.Put(ka[i], va[i])
+	}
+	for i := 0; i < n; i++ {
+		check(t, ck, ka[i], va[i])
+	}
+
+	cfg.join(1)
+	cfg.join(2)
+	cfg.leave(0)
+
+	for i := 0; i < n; i++ {
+		check(t, ck, ka[i], va[i])
+		x := randstring(20)
+		ck.Append(ka[i], x)
+		va[i] += x
+	}
+
+	cfg.leave(1)
+	cfg.join(0)
+
+	for i := 0; i < n; i++ {
+		check(t, ck, ka[i], va[i])
+		x := randstring(20)
+		ck.Append(ka[i], x)
+		va[i] += x
+	}
+
+	time.Sleep(1 * time.Second)
+
+	for i := 0; i < n; i++ {
+		check(t, ck, ka[i], va[i])
+	}
+
+	time.Sleep(1 * time.Second)
+
+	cfg.checklogs()
+
+	cfg.ShutdownGroup(0)
+	cfg.ShutdownGroup(1)
+	cfg.ShutdownGroup(2)
+
+	cfg.StartGroup(0)
+	cfg.StartGroup(1)
+	cfg.StartGroup(2)
+
+	for i := 0; i < n; i++ {
+		check(t, ck, ka[i], va[i])
+	}
+
+	fmt.Printf("  ... Passed\n")
+}
+
+func TestMissChange(t *testing.T) {
+	fmt.Printf("Test: servers miss configuration changes...\n")
+
+	cfg := make_config(t, 3, false, 1000)
+	defer cfg.cleanup()
+
+	ck := cfg.makeClient()
+
+	cfg.join(0)
+
+	n := 10
+	ka := make([]string, n)
+	va := make([]string, n)
+	for i := 0; i < n; i++ {
+		ka[i] = strconv.Itoa(i) // ensure multiple shards
+		va[i] = randstring(20)
+		ck.Put(ka[i], va[i])
+	}
+	for i := 0; i < n; i++ {
+		check(t, ck, ka[i], va[i])
+	}
+
+	cfg.join(1)
+
+	cfg.ShutdownServer(0, 0)
+	cfg.ShutdownServer(1, 0)
+	cfg.ShutdownServer(2, 0)
+
+	cfg.join(2)
+	cfg.leave(1)
+	cfg.leave(0)
+
+	for i := 0; i < n; i++ {
+		check(t, ck, ka[i], va[i])
+		x := randstring(20)
+		ck.Append(ka[i], x)
+		va[i] += x
+	}
+
+	cfg.join(1)
+
+	for i := 0; i < n; i++ {
+		check(t, ck, ka[i], va[i])
+		x := randstring(20)
+		ck.Append(ka[i], x)
+		va[i] += x
+	}
+
+	cfg.StartServer(0, 0)
+	cfg.StartServer(1, 0)
+	cfg.StartServer(2, 0)
+
+	for i := 0; i < n; i++ {
+		check(t, ck, ka[i], va[i])
+		x := randstring(20)
+		ck.Append(ka[i], x)
+		va[i] += x
+	}
+
+	time.Sleep(2 * time.Second)
+
+	cfg.ShutdownServer(0, 1)
+	cfg.ShutdownServer(1, 1)
+	cfg.ShutdownServer(2, 1)
+
+	cfg.join(0)
+	cfg.leave(2)
+
+	for i := 0; i < n; i++ {
+		check(t, ck, ka[i], va[i])
+		x := randstring(20)
+		ck.Append(ka[i], x)
+		va[i] += x
+	}
+
+	cfg.StartServer(0, 1)
+	cfg.StartServer(1, 1)
+	cfg.StartServer(2, 1)
+
+	for i := 0; i < n; i++ {
+		check(t, ck, ka[i], va[i])
+	}
+
+	fmt.Printf("  ... Passed\n")
+}
+
+func TestConcurrent1(t *testing.T) {
+	fmt.Printf("Test: concurrent puts and configuration changes...\n")
+
+	cfg := make_config(t, 3, false, 100)
+	defer cfg.cleanup()
+
+	ck := cfg.makeClient()
+
+	cfg.join(0)
+
+	n := 10
+	ka := make([]string, n)
+	va := make([]string, n)
+	for i := 0; i < n; i++ {
+		ka[i] = strconv.Itoa(i) // ensure multiple shards
+		va[i] = randstring(5)
+		ck.Put(ka[i], va[i])
+	}
+
+	var done int32
+	ch := make(chan bool)
+
+	ff := func(i int) {
+		defer func() { ch <- true }()
+		ck1 := cfg.makeClient()
+		for atomic.LoadInt32(&done) == 0 {
+			x := randstring(5)
+			ck1.Append(ka[i], x)
+			va[i] += x
+			time.Sleep(10 * time.Millisecond)
+		}
+	}
+
+	for i := 0; i < n; i++ {
+		go ff(i)
+	}
+
+	time.Sleep(150 * time.Millisecond)
+	cfg.join(1)
+	time.Sleep(500 * time.Millisecond)
+	cfg.join(2)
+	time.Sleep(500 * time.Millisecond)
+	cfg.leave(0)
+
+	cfg.ShutdownGroup(0)
+	time.Sleep(100 * time.Millisecond)
+	cfg.ShutdownGroup(1)
+	time.Sleep(100 * time.Millisecond)
+	cfg.ShutdownGroup(2)
+
+	cfg.leave(2)
+
+	time.Sleep(100 * time.Millisecond)
+	cfg.StartGroup(0)
+	cfg.StartGroup(1)
+	cfg.StartGroup(2)
+
+	time.Sleep(100 * time.Millisecond)
+	cfg.join(0)
+	cfg.leave(1)
+	time.Sleep(500 * time.Millisecond)
+	cfg.join(1)
+
+	time.Sleep(1 * time.Second)
+
+	atomic.StoreInt32(&done, 1)
+	for i := 0; i < n; i++ {
+		<-ch
+	}
+
+	for i := 0; i < n; i++ {
+		check(t, ck, ka[i], va[i])
+	}
+
+	fmt.Printf("  ... Passed\n")
+}
+
+//
+// this tests the various sources from which a re-starting
+// group might need to fetch shard contents.
+//
+func TestConcurrent2(t *testing.T) {
+	fmt.Printf("Test: more concurrent puts and configuration changes...\n")
+
+	cfg := make_config(t, 3, false, -1)
+	defer cfg.cleanup()
+
+	ck := cfg.makeClient()
+
+	cfg.join(1)
+	cfg.join(0)
+	cfg.join(2)
+
+	n := 10
+	ka := make([]string, n)
+	va := make([]string, n)
+	for i := 0; i < n; i++ {
+		ka[i] = strconv.Itoa(i) // ensure multiple shards
+		va[i] = randstring(1)
+		ck.Put(ka[i], va[i])
+	}
+
+	var done int32
+	ch := make(chan bool)
+
+	ff := func(i int, ck1 *Clerk) {
+		defer func() { ch <- true }()
+		for atomic.LoadInt32(&done) == 0 {
+			x := randstring(1)
+			ck1.Append(ka[i], x)
+			va[i] += x
+			time.Sleep(50 * time.Millisecond)
+		}
+	}
+
+	for i := 0; i < n; i++ {
+		ck1 := cfg.makeClient()
+		go ff(i, ck1)
+	}
+
+	cfg.leave(0)
+	cfg.leave(2)
+	time.Sleep(3000 * time.Millisecond)
+	cfg.join(0)
+	cfg.join(2)
+	cfg.leave(1)
+	time.Sleep(3000 * time.Millisecond)
+	cfg.join(1)
+	cfg.leave(0)
+	cfg.leave(2)
+	time.Sleep(3000 * time.Millisecond)
+
+	cfg.ShutdownGroup(1)
+	cfg.ShutdownGroup(2)
+	time.Sleep(1000 * time.Millisecond)
+	cfg.StartGroup(1)
+	cfg.StartGroup(2)
+
+	time.Sleep(2 * time.Second)
+
+	atomic.StoreInt32(&done, 1)
+	for i := 0; i < n; i++ {
+		<-ch
+	}
+
+	for i := 0; i < n; i++ {
+		check(t, ck, ka[i], va[i])
+	}
+
+	fmt.Printf("  ... Passed\n")
+}
+
+func TestConcurrent3(t *testing.T) {
+	fmt.Printf("Test: concurrent configuration change and restart...\n")
+
+	cfg := make_config(t, 3, false, 300)
+	defer cfg.cleanup()
+
+	ck := cfg.makeClient()
+
+	cfg.join(0)
+
+	n := 10
+	ka := make([]string, n)
+	va := make([]string, n)
+	for i := 0; i < n; i++ {
+		ka[i] = strconv.Itoa(i)
+		va[i] = randstring(1)
+		ck.Put(ka[i], va[i])
+	}
+
+	var done int32
+	ch := make(chan bool)
+
+	ff := func(i int, ck1 *Clerk) {
+		defer func() { ch <- true }()
+		for atomic.LoadInt32(&done) == 0 {
+			x := randstring(1)
+			ck1.Append(ka[i], x)
+			va[i] += x
+		}
+	}
+
+	for i := 0; i < n; i++ {
+		ck1 := cfg.makeClient()
+		go ff(i, ck1)
+	}
+
+	t0 := time.Now()
+	for time.Since(t0) < 12*time.Second {
+		cfg.join(2)
+		cfg.join(1)
+		time.Sleep(time.Duration(rand.Int()%900) * time.Millisecond)
+		cfg.ShutdownGroup(0)
+		cfg.ShutdownGroup(1)
+		cfg.ShutdownGroup(2)
+		cfg.StartGroup(0)
+		cfg.StartGroup(1)
+		cfg.StartGroup(2)
+
+		time.Sleep(time.Duration(rand.Int()%900) * time.Millisecond)
+		cfg.leave(1)
+		cfg.leave(2)
+		time.Sleep(time.Duration(rand.Int()%900) * time.Millisecond)
+	}
+
+	time.Sleep(2 * time.Second)
+
+	atomic.StoreInt32(&done, 1)
+	for i := 0; i < n; i++ {
+		<-ch
+	}
+
+	for i := 0; i < n; i++ {
+		check(t, ck, ka[i], va[i])
+	}
+
+	fmt.Printf("  ... Passed\n")
+}
+
+func TestUnreliable1(t *testing.T) {
+	fmt.Printf("Test: unreliable 1...\n")
+
+	cfg := make_config(t, 3, true, 100)
+	defer cfg.cleanup()
+
+	ck := cfg.makeClient()
+
+	cfg.join(0)
+
+	n := 10
+	ka := make([]string, n)
+	va := make([]string, n)
+	for i := 0; i < n; i++ {
+		ka[i] = strconv.Itoa(i) // ensure multiple shards
+		va[i] = randstring(5)
+		ck.Put(ka[i], va[i])
+	}
+
+	cfg.join(1)
+	cfg.join(2)
+	cfg.leave(0)
+
+	for ii := 0; ii < n*2; ii++ {
+		i := ii % n
+		check(t, ck, ka[i], va[i])
+		x := randstring(5)
+		ck.Append(ka[i], x)
+		va[i] += x
+	}
+
+	cfg.join(0)
+	cfg.leave(1)
+
+	for ii := 0; ii < n*2; ii++ {
+		i := ii % n
+		check(t, ck, ka[i], va[i])
+	}
+
+	fmt.Printf("  ... Passed\n")
+}
+
+func TestUnreliable2(t *testing.T) {
+	fmt.Printf("Test: unreliable 2...\n")
+
+	cfg := make_config(t, 3, true, 100)
+	defer cfg.cleanup()
+
+	ck := cfg.makeClient()
+
+	cfg.join(0)
+
+	n := 10
+	ka := make([]string, n)
+	va := make([]string, n)
+	for i := 0; i < n; i++ {
+		ka[i] = strconv.Itoa(i) // ensure multiple shards
+		va[i] = randstring(5)
+		ck.Put(ka[i], va[i])
+	}
+
+	var done int32
+	ch := make(chan bool)
+
+	ff := func(i int) {
+		defer func() { ch <- true }()
+		ck1 := cfg.makeClient()
+		for atomic.LoadInt32(&done) == 0 {
+			x := randstring(5)
+			ck1.Append(ka[i], x)
+			va[i] += x
+		}
+	}
+
+	for i := 0; i < n; i++ {
+		go ff(i)
+	}
+
+	time.Sleep(150 * time.Millisecond)
+	cfg.join(1)
+	time.Sleep(500 * time.Millisecond)
+	cfg.join(2)
+	time.Sleep(500 * time.Millisecond)
+	cfg.leave(0)
+	time.Sleep(500 * time.Millisecond)
+	cfg.leave(1)
+	time.Sleep(500 * time.Millisecond)
+	cfg.join(1)
+	cfg.join(0)
+
+	time.Sleep(2 * time.Second)
+
+	atomic.StoreInt32(&done, 1)
+	cfg.net.Reliable(true)
+	for i := 0; i < n; i++ {
+		<-ch
+	}
+
+	for i := 0; i < n; i++ {
+		check(t, ck, ka[i], va[i])
+	}
+
+	fmt.Printf("  ... Passed\n")
+}
+
+func TestUnreliable3(t *testing.T) {
+	fmt.Printf("Test: unreliable 3...\n")
+
+	cfg := make_config(t, 3, true, 100)
+	defer cfg.cleanup()
+
+	begin := time.Now()
+	var operations []porcupine.Operation
+	var opMu sync.Mutex
+
+	ck := cfg.makeClient()
+
+	cfg.join(0)
+
+	n := 10
+	ka := make([]string, n)
+	va := make([]string, n)
+	for i := 0; i < n; i++ {
+		ka[i] = strconv.Itoa(i) // ensure multiple shards
+		va[i] = randstring(5)
+		start := int64(time.Since(begin))
+		ck.Put(ka[i], va[i])
+		end := int64(time.Since(begin))
+		inp := models.KvInput{Op: 1, Key: ka[i], Value: va[i]}
+		var out models.KvOutput
+		op := porcupine.Operation{Input: inp, Call: start, Output: out, Return: end, ClientId: 0}
+		operations = append(operations, op)
+	}
+
+	var done int32
+	ch := make(chan bool)
+
+	ff := func(i int) {
+		defer func() { ch <- true }()
+		ck1 := cfg.makeClient()
+		for atomic.LoadInt32(&done) == 0 {
+			ki := rand.Int() % n
+			nv := randstring(5)
+			var inp models.KvInput
+			var out models.KvOutput
+			start := int64(time.Since(begin))
+			if (rand.Int() % 1000) < 500 {
+				ck1.Append(ka[ki], nv)
+				inp = models.KvInput{Op: 2, Key: ka[ki], Value: nv}
+			} else if (rand.Int() % 1000) < 100 {
+				ck1.Put(ka[ki], nv)
+				inp = models.KvInput{Op: 1, Key: ka[ki], Value: nv}
+			} else {
+				v := ck1.Get(ka[ki])
+				inp = models.KvInput{Op: 0, Key: ka[ki]}
+				out = models.KvOutput{Value: v}
+			}
+			end := int64(time.Since(begin))
+			op := porcupine.Operation{Input: inp, Call: start, Output: out, Return: end, ClientId: i}
+			opMu.Lock()
+			operations = append(operations, op)
+			opMu.Unlock()
+		}
+	}
+
+	for i := 0; i < n; i++ {
+		go ff(i)
+	}
+
+	time.Sleep(150 * time.Millisecond)
+	cfg.join(1)
+	time.Sleep(500 * time.Millisecond)
+	cfg.join(2)
+	time.Sleep(500 * time.Millisecond)
+	cfg.leave(0)
+	time.Sleep(500 * time.Millisecond)
+	cfg.leave(1)
+	time.Sleep(500 * time.Millisecond)
+	cfg.join(1)
+	cfg.join(0)
+
+	time.Sleep(2 * time.Second)
+
+	atomic.StoreInt32(&done, 1)
+	cfg.net.Reliable(true)
+	for i := 0; i < n; i++ {
+		<-ch
+	}
+
+	res, info := porcupine.CheckOperationsVerbose(models.KvModel, operations, linearizabilityCheckTimeout)
+	if res == porcupine.Illegal {
+		file, err := ioutil.TempFile("", "*.html")
+		if err != nil {
+			fmt.Printf("info: failed to create temp file for visualization")
+		} else {
+			err = porcupine.Visualize(models.KvModel, info, file)
+			if err != nil {
+				fmt.Printf("info: failed to write history visualization to %s\n", file.Name())
+			} else {
+				fmt.Printf("info: wrote history visualization to %s\n", file.Name())
+			}
+		}
+		t.Fatal("history is not linearizable")
+	} else if res == porcupine.Unknown {
+		fmt.Println("info: linearizability check timed out, assuming history is ok")
+	}
+
+	fmt.Printf("  ... Passed\n")
+}
+
+//
+// optional test to see whether servers are deleting
+// shards for which they are no longer responsible.
+//
+func TestChallenge1Delete(t *testing.T) {
+	fmt.Printf("Test: shard deletion (challenge 1) ...\n")
+
+	// "1" means force snapshot after every log entry.
+	cfg := make_config(t, 3, false, 1)
+	defer cfg.cleanup()
+
+	ck := cfg.makeClient()
+
+	cfg.join(0)
+
+	// 30,000 bytes of total values.
+	n := 30
+	ka := make([]string, n)
+	va := make([]string, n)
+	for i := 0; i < n; i++ {
+		ka[i] = strconv.Itoa(i)
+		va[i] = randstring(1000)
+		ck.Put(ka[i], va[i])
+	}
+	for i := 0; i < 3; i++ {
+		check(t, ck, ka[i], va[i])
+	}
+
+	for iters := 0; iters < 2; iters++ {
+		cfg.join(1)
+		cfg.leave(0)
+		cfg.join(2)
+		time.Sleep(3 * time.Second)
+		for i := 0; i < 3; i++ {
+			check(t, ck, ka[i], va[i])
+		}
+		cfg.leave(1)
+		cfg.join(0)
+		cfg.leave(2)
+		time.Sleep(3 * time.Second)
+		for i := 0; i < 3; i++ {
+			check(t, ck, ka[i], va[i])
+		}
+	}
+
+	cfg.join(1)
+	cfg.join(2)
+	time.Sleep(1 * time.Second)
+	for i := 0; i < 3; i++ {
+		check(t, ck, ka[i], va[i])
+	}
+	time.Sleep(1 * time.Second)
+	for i := 0; i < 3; i++ {
+		check(t, ck, ka[i], va[i])
+	}
+	time.Sleep(1 * time.Second)
+	for i := 0; i < 3; i++ {
+		check(t, ck, ka[i], va[i])
+	}
+
+	total := 0
+	for gi := 0; gi < cfg.ngroups; gi++ {
+		for i := 0; i < cfg.n; i++ {
+			raft := cfg.groups[gi].saved[i].RaftStateSize()
+			snap := len(cfg.groups[gi].saved[i].ReadSnapshot())
+			total += raft + snap
+		}
+	}
+
+	// 27 keys should be stored once.
+	// 3 keys should also be stored in client dup tables.
+	// everything on 3 replicas.
+	// plus slop.
+	expected := 3 * (((n - 3) * 1000) + 2*3*1000 + 6000)
+	if total > expected {
+		t.Fatalf("snapshot + persisted Raft state are too big: %v > %v\n", total, expected)
+	}
+
+	for i := 0; i < n; i++ {
+		check(t, ck, ka[i], va[i])
+	}
+
+	fmt.Printf("  ... Passed\n")
+}
+
+//
+// optional test to see whether servers can handle
+// shards that are not affected by a config change
+// while the config change is underway
+//
+func TestChallenge2Unaffected(t *testing.T) {
+	fmt.Printf("Test: unaffected shard access (challenge 2) ...\n")
+
+	cfg := make_config(t, 3, true, 100)
+	defer cfg.cleanup()
+
+	ck := cfg.makeClient()
+
+	// JOIN 100
+	cfg.join(0)
+
+	// Do a bunch of puts to keys in all shards
+	n := 10
+	ka := make([]string, n)
+	va := make([]string, n)
+	for i := 0; i < n; i++ {
+		ka[i] = strconv.Itoa(i) // ensure multiple shards
+		va[i] = "100"
+		ck.Put(ka[i], va[i])
+	}
+
+	// JOIN 101
+	cfg.join(1)
+
+	// QUERY to find shards now owned by 101
+	c := cfg.mck.Query(-1)
+	owned := make(map[int]bool, n)
+	for s, gid := range c.Shards {
+		owned[s] = gid == cfg.groups[1].gid
+	}
+
+	// Wait for migration to new config to complete, and for clients to
+	// start using this updated config. Gets to any key k such that
+	// owned[shard(k)] == true should now be served by group 101.
+	<-time.After(1 * time.Second)
+	for i := 0; i < n; i++ {
+		if owned[i] {
+			va[i] = "101"
+			ck.Put(ka[i], va[i])
+		}
+	}
+
+	// KILL 100
+	cfg.ShutdownGroup(0)
+
+	// LEAVE 100
+	// 101 doesn't get a chance to migrate things previously owned by 100
+	cfg.leave(0)
+
+	// Wait to make sure clients see new config
+	<-time.After(1 * time.Second)
+
+	// And finally: check that gets/puts for 101-owned keys still complete
+	for i := 0; i < n; i++ {
+		shard := int(ka[i][0]) % 10
+		if owned[shard] {
+			check(t, ck, ka[i], va[i])
+			ck.Put(ka[i], va[i]+"-1")
+			check(t, ck, ka[i], va[i]+"-1")
+		}
+	}
+
+	fmt.Printf("  ... Passed\n")
+}
+
+//
+// optional test to see whether servers can handle operations on shards that
+// have been received as a part of a config migration when the entire migration
+// has not yet completed.
+//
+func TestChallenge2Partial(t *testing.T) {
+	fmt.Printf("Test: partial migration shard access (challenge 2) ...\n")
+
+	cfg := make_config(t, 3, true, 100)
+	defer cfg.cleanup()
+
+	ck := cfg.makeClient()
+
+	// JOIN 100 + 101 + 102
+	cfg.joinm([]int{0, 1, 2})
+
+	// Give the implementation some time to reconfigure
+	<-time.After(1 * time.Second)
+
+	// Do a bunch of puts to keys in all shards
+	n := 10
+	ka := make([]string, n)
+	va := make([]string, n)
+	for i := 0; i < n; i++ {
+		ka[i] = strconv.Itoa(i) // ensure multiple shards
+		va[i] = "100"
+		ck.Put(ka[i], va[i])
+	}
+
+	// QUERY to find shards owned by 102
+	c := cfg.mck.Query(-1)
+	owned := make(map[int]bool, n)
+	for s, gid := range c.Shards {
+		owned[s] = gid == cfg.groups[2].gid
+	}
+
+	// KILL 100
+	cfg.ShutdownGroup(0)
+
+	// LEAVE 100 + 102
+	// 101 can get old shards from 102, but not from 100. 101 should start
+	// serving shards that used to belong to 102 as soon as possible
+	cfg.leavem([]int{0, 2})
+
+	// Give the implementation some time to start reconfiguration
+	// And to migrate 102 -> 101
+	<-time.After(1 * time.Second)
+
+	// And finally: check that gets/puts for 101-owned keys now complete
+	for i := 0; i < n; i++ {
+		shard := key2shard(ka[i])
+		if owned[shard] {
+			check(t, ck, ka[i], va[i])
+			ck.Put(ka[i], va[i]+"-2")
+			check(t, ck, ka[i], va[i]+"-2")
+		}
+	}
+
+	fmt.Printf("  ... Passed\n")
+}