This commit is contained in:
Robert Morris 2022-01-10 15:19:31 -05:00
commit 5af9fddd54
66 changed files with 77148 additions and 0 deletions

127
.check-build Executable file
View File

@ -0,0 +1,127 @@
#!/usr/bin/env bash
set -eu
REFERENCE_FILES=(
# lab 1
src/mrapps/crash.go
src/mrapps/indexer.go
src/mrapps/mtiming.go
src/mrapps/nocrash.go
src/mrapps/rtiming.go
src/mrapps/wc.go
src/main/mrsequential.go
src/main/mrcoordinator.go
src/main/mrworker.go
# lab 2
src/raft/persister.go
src/raft/test_test.go
src/raft/config.go
src/labrpc/labrpc.go
# lab 3
src/kvraft/test_test.go
src/kvraft/config.go
# lab 4a
src/shardctrler/test_test.go
src/shardctrler/config.go
# lab 4b
src/shardkv/test_test.go
src/shardkv/config.go
)
main() {
upstream="$1"
labnum="$2"
# make sure we have reference copy of lab, in FETCH_HEAD
git fetch "$upstream" 2>/dev/null || die "unable to git fetch $upstream"
# copy existing directory
tmpdir="$(mktemp -d)"
find src -type s -delete # cp can't copy sockets
cp -r src "$tmpdir"
orig="$PWD"
cd "$tmpdir"
# check out reference files
for f in ${REFERENCE_FILES[@]}; do
mkdir -p "$(dirname $f)"
git --git-dir="$orig/.git" show "FETCH_HEAD:$f" > "$f"
done
case $labnum in
"lab1") check_lab1;;
"lab2a"|"lab2b"|"lab2c"|"lab2d") check_lab2;;
"lab3a"|"lab3b") check_lab3;;
"lab4a") check_lab4a;;
"lab4b") check_lab4b;;
*) die "unknown lab: $labnum";;
esac
cd
rm -rf "$tmpdir"
}
check_lab1() {
check_cmd cd src/mrapps
check_cmd go build -buildmode=plugin wc.go
check_cmd go build -buildmode=plugin indexer.go
check_cmd go build -buildmode=plugin mtiming.go
check_cmd go build -buildmode=plugin rtiming.go
check_cmd go build -buildmode=plugin crash.go
check_cmd go build -buildmode=plugin nocrash.go
check_cmd cd ../main
check_cmd go build mrcoordinator.go
check_cmd go build mrworker.go
check_cmd go build mrsequential.go
}
check_lab2() {
check_cmd cd src/raft
check_cmd go test -c
}
check_lab3() {
check_cmd cd src/kvraft
check_cmd go test -c
}
check_lab4a() {
check_cmd cd src/shardctrler
check_cmd go test -c
}
check_lab4b() {
check_cmd cd src/shardkv
check_cmd go test -c
# also check other labs/parts
cd "$tmpdir"
check_lab4a
cd "$tmpdir"
check_lab3
cd "$tmpdir"
check_lab2
}
check_cmd() {
if ! "$@" >/dev/null 2>&1; then
echo "We tried building your source code with testing-related files reverted to original versions, and the build failed. This copy of your code is preserved in $tmpdir for debugging purposes. Please make sure the code you are trying to hand in does not make changes to test code." >&2
echo >&2
echo "The build failed while trying to run the following command:" >&2
echo >&2
echo "$ $@" >&2
echo " (cwd: ${PWD#$tmpdir/})" >&2
exit 1
fi
}
die() {
echo "$1" >&2
exit 1
}
main "$@"

4
.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
pkg/
api.key
.api.key.trimmed
*-handin.tar.gz

45
Makefile Normal file
View File

@ -0,0 +1,45 @@
# This is the Makefile helping you submit the labs.
# Just create 6.824/api.key with your API key in it,
# and submit your lab with the following command:
# $ make [lab1|lab2a|lab2b|lab2c|lab2d|lab3a|lab3b|lab4a|lab4b]
LABS=" lab1 lab2a lab2b lab2c lab2d lab3a lab3b lab4a lab4b "
%: check-%
@echo "Preparing $@-handin.tar.gz"
@if echo $(LABS) | grep -q " $@ " ; then \
echo "Tarring up your submission..." ; \
COPYFILE_DISABLE=1 tar cvzf $@-handin.tar.gz \
"--exclude=src/main/pg-*.txt" \
"--exclude=src/main/diskvd" \
"--exclude=src/mapreduce/824-mrinput-*.txt" \
"--exclude=src/main/mr-*" \
"--exclude=mrtmp.*" \
"--exclude=src/main/diff.out" \
"--exclude=src/main/mrmaster" \
"--exclude=src/main/mrsequential" \
"--exclude=src/main/mrworker" \
"--exclude=*.so" \
Makefile src; \
if ! test -e api.key ; then \
echo "Missing $(PWD)/api.key. Please create the file with your key in it or submit the $@-handin.tar.gz via the web interface."; \
else \
echo "Are you sure you want to submit $@? Enter 'yes' to continue:"; \
read line; \
if test "$$line" != "yes" ; then echo "Giving up submission"; exit; fi; \
if test `stat -c "%s" "$@-handin.tar.gz" 2>/dev/null || stat -f "%z" "$@-handin.tar.gz"` -ge 20971520 ; then echo "File exceeds 20MB."; exit; fi; \
cat api.key | tr -d '\n' > .api.key.trimmed ; \
curl --silent --fail --show-error -F file=@$@-handin.tar.gz -F "key=<.api.key.trimmed" \
https://6824.scripts.mit.edu/2022/handin.py/upload > /dev/null || { \
echo ; \
echo "Submit seems to have failed."; \
echo "Please upload the tarball manually on the submission website."; } \
fi; \
else \
echo "Bad target $@. Usage: make [$(LABS)]"; \
fi
.PHONY: check-%
check-%:
@echo "Checking that your submission builds correctly..."
@./.check-build git://g.csail.mit.edu/6.824-golabs-2022 $(patsubst check-%,%,$@)

12
src/.gitignore vendored Normal file
View File

@ -0,0 +1,12 @@
*.*/
main/mr-tmp/
mrtmp.*
824-mrinput-*.txt
/main/diff.out
/mapreduce/x.txt
/pbservice/x.txt
/kvpaxos/x.txt
*.so
/main/mrcoordinator
/main/mrsequential
/main/mrworker

3
src/go.mod Normal file
View File

@ -0,0 +1,3 @@
module 6.824
go 1.15

0
src/go.sum Normal file
View File

64
src/kvraft/client.go Normal file
View File

@ -0,0 +1,64 @@
package kvraft
import "6.824/labrpc"
import "crypto/rand"
import "math/big"
type Clerk struct {
servers []*labrpc.ClientEnd
// You will have to modify this struct.
}
func nrand() int64 {
max := big.NewInt(int64(1) << 62)
bigx, _ := rand.Int(rand.Reader, max)
x := bigx.Int64()
return x
}
func MakeClerk(servers []*labrpc.ClientEnd) *Clerk {
ck := new(Clerk)
ck.servers = servers
// You'll have to add code here.
return ck
}
//
// fetch the current value for a key.
// returns "" if the key does not exist.
// keeps trying forever in the face of all other errors.
//
// you can send an RPC with code like this:
// ok := ck.servers[i].Call("KVServer.Get", &args, &reply)
//
// the types of args and reply (including whether they are pointers)
// must match the declared types of the RPC handler function's
// arguments. and reply must be passed as a pointer.
//
func (ck *Clerk) Get(key string) string {
// You will have to modify this function.
return ""
}
//
// shared by Put and Append.
//
// you can send an RPC with code like this:
// ok := ck.servers[i].Call("KVServer.PutAppend", &args, &reply)
//
// the types of args and reply (including whether they are pointers)
// must match the declared types of the RPC handler function's
// arguments. and reply must be passed as a pointer.
//
func (ck *Clerk) PutAppend(key string, value string, op string) {
// You will have to modify this function.
}
func (ck *Clerk) Put(key string, value string) {
ck.PutAppend(key, value, "Put")
}
func (ck *Clerk) Append(key string, value string) {
ck.PutAppend(key, value, "Append")
}

33
src/kvraft/common.go Normal file
View File

@ -0,0 +1,33 @@
package kvraft
const (
OK = "OK"
ErrNoKey = "ErrNoKey"
ErrWrongLeader = "ErrWrongLeader"
)
type Err string
// Put or Append
type PutAppendArgs struct {
Key string
Value string
Op string // "Put" or "Append"
// You'll have to add definitions here.
// Field names must start with capital letters,
// otherwise RPC will break.
}
type PutAppendReply struct {
Err Err
}
type GetArgs struct {
Key string
// You'll have to add definitions here.
}
type GetReply struct {
Err Err
Value string
}

425
src/kvraft/config.go Normal file
View File

@ -0,0 +1,425 @@
package kvraft
import "6.824/labrpc"
import "testing"
import "os"
// import "log"
import crand "crypto/rand"
import "math/big"
import "math/rand"
import "encoding/base64"
import "sync"
import "runtime"
import "6.824/raft"
import "fmt"
import "time"
import "sync/atomic"
func randstring(n int) string {
b := make([]byte, 2*n)
crand.Read(b)
s := base64.URLEncoding.EncodeToString(b)
return s[0:n]
}
func makeSeed() int64 {
max := big.NewInt(int64(1) << 62)
bigx, _ := crand.Int(crand.Reader, max)
x := bigx.Int64()
return x
}
// Randomize server handles
func random_handles(kvh []*labrpc.ClientEnd) []*labrpc.ClientEnd {
sa := make([]*labrpc.ClientEnd, len(kvh))
copy(sa, kvh)
for i := range sa {
j := rand.Intn(i + 1)
sa[i], sa[j] = sa[j], sa[i]
}
return sa
}
type config struct {
mu sync.Mutex
t *testing.T
net *labrpc.Network
n int
kvservers []*KVServer
saved []*raft.Persister
endnames [][]string // names of each server's sending ClientEnds
clerks map[*Clerk][]string
nextClientId int
maxraftstate int
start time.Time // time at which make_config() was called
// begin()/end() statistics
t0 time.Time // time at which test_test.go called cfg.begin()
rpcs0 int // rpcTotal() at start of test
ops int32 // number of clerk get/put/append method calls
}
func (cfg *config) checkTimeout() {
// enforce a two minute real-time limit on each test
if !cfg.t.Failed() && time.Since(cfg.start) > 120*time.Second {
cfg.t.Fatal("test took longer than 120 seconds")
}
}
func (cfg *config) cleanup() {
cfg.mu.Lock()
defer cfg.mu.Unlock()
for i := 0; i < len(cfg.kvservers); i++ {
if cfg.kvservers[i] != nil {
cfg.kvservers[i].Kill()
}
}
cfg.net.Cleanup()
cfg.checkTimeout()
}
// Maximum log size across all servers
func (cfg *config) LogSize() int {
logsize := 0
for i := 0; i < cfg.n; i++ {
n := cfg.saved[i].RaftStateSize()
if n > logsize {
logsize = n
}
}
return logsize
}
// Maximum snapshot size across all servers
func (cfg *config) SnapshotSize() int {
snapshotsize := 0
for i := 0; i < cfg.n; i++ {
n := cfg.saved[i].SnapshotSize()
if n > snapshotsize {
snapshotsize = n
}
}
return snapshotsize
}
// attach server i to servers listed in to
// caller must hold cfg.mu
func (cfg *config) connectUnlocked(i int, to []int) {
// log.Printf("connect peer %d to %v\n", i, to)
// outgoing socket files
for j := 0; j < len(to); j++ {
endname := cfg.endnames[i][to[j]]
cfg.net.Enable(endname, true)
}
// incoming socket files
for j := 0; j < len(to); j++ {
endname := cfg.endnames[to[j]][i]
cfg.net.Enable(endname, true)
}
}
func (cfg *config) connect(i int, to []int) {
cfg.mu.Lock()
defer cfg.mu.Unlock()
cfg.connectUnlocked(i, to)
}
// detach server i from the servers listed in from
// caller must hold cfg.mu
func (cfg *config) disconnectUnlocked(i int, from []int) {
// log.Printf("disconnect peer %d from %v\n", i, from)
// outgoing socket files
for j := 0; j < len(from); j++ {
if cfg.endnames[i] != nil {
endname := cfg.endnames[i][from[j]]
cfg.net.Enable(endname, false)
}
}
// incoming socket files
for j := 0; j < len(from); j++ {
if cfg.endnames[j] != nil {
endname := cfg.endnames[from[j]][i]
cfg.net.Enable(endname, false)
}
}
}
func (cfg *config) disconnect(i int, from []int) {
cfg.mu.Lock()
defer cfg.mu.Unlock()
cfg.disconnectUnlocked(i, from)
}
func (cfg *config) All() []int {
all := make([]int, cfg.n)
for i := 0; i < cfg.n; i++ {
all[i] = i
}
return all
}
func (cfg *config) ConnectAll() {
cfg.mu.Lock()
defer cfg.mu.Unlock()
for i := 0; i < cfg.n; i++ {
cfg.connectUnlocked(i, cfg.All())
}
}
// Sets up 2 partitions with connectivity between servers in each partition.
func (cfg *config) partition(p1 []int, p2 []int) {
cfg.mu.Lock()
defer cfg.mu.Unlock()
// log.Printf("partition servers into: %v %v\n", p1, p2)
for i := 0; i < len(p1); i++ {
cfg.disconnectUnlocked(p1[i], p2)
cfg.connectUnlocked(p1[i], p1)
}
for i := 0; i < len(p2); i++ {
cfg.disconnectUnlocked(p2[i], p1)
cfg.connectUnlocked(p2[i], p2)
}
}
// Create a clerk with clerk specific server names.
// Give it connections to all of the servers, but for
// now enable only connections to servers in to[].
func (cfg *config) makeClient(to []int) *Clerk {
cfg.mu.Lock()
defer cfg.mu.Unlock()
// a fresh set of ClientEnds.
ends := make([]*labrpc.ClientEnd, cfg.n)
endnames := make([]string, cfg.n)
for j := 0; j < cfg.n; j++ {
endnames[j] = randstring(20)
ends[j] = cfg.net.MakeEnd(endnames[j])
cfg.net.Connect(endnames[j], j)
}
ck := MakeClerk(random_handles(ends))
cfg.clerks[ck] = endnames
cfg.nextClientId++
cfg.ConnectClientUnlocked(ck, to)
return ck
}
func (cfg *config) deleteClient(ck *Clerk) {
cfg.mu.Lock()
defer cfg.mu.Unlock()
v := cfg.clerks[ck]
for i := 0; i < len(v); i++ {
os.Remove(v[i])
}
delete(cfg.clerks, ck)
}
// caller should hold cfg.mu
func (cfg *config) ConnectClientUnlocked(ck *Clerk, to []int) {
// log.Printf("ConnectClient %v to %v\n", ck, to)
endnames := cfg.clerks[ck]
for j := 0; j < len(to); j++ {
s := endnames[to[j]]
cfg.net.Enable(s, true)
}
}
func (cfg *config) ConnectClient(ck *Clerk, to []int) {
cfg.mu.Lock()
defer cfg.mu.Unlock()
cfg.ConnectClientUnlocked(ck, to)
}
// caller should hold cfg.mu
func (cfg *config) DisconnectClientUnlocked(ck *Clerk, from []int) {
// log.Printf("DisconnectClient %v from %v\n", ck, from)
endnames := cfg.clerks[ck]
for j := 0; j < len(from); j++ {
s := endnames[from[j]]
cfg.net.Enable(s, false)
}
}
func (cfg *config) DisconnectClient(ck *Clerk, from []int) {
cfg.mu.Lock()
defer cfg.mu.Unlock()
cfg.DisconnectClientUnlocked(ck, from)
}
// Shutdown a server by isolating it
func (cfg *config) ShutdownServer(i int) {
cfg.mu.Lock()
defer cfg.mu.Unlock()
cfg.disconnectUnlocked(i, cfg.All())
// disable client connections to the server.
// it's important to do this before creating
// the new Persister in saved[i], to avoid
// the possibility of the server returning a
// positive reply to an Append but persisting
// the result in the superseded Persister.
cfg.net.DeleteServer(i)
// a fresh persister, in case old instance
// continues to update the Persister.
// but copy old persister's content so that we always
// pass Make() the last persisted state.
if cfg.saved[i] != nil {
cfg.saved[i] = cfg.saved[i].Copy()
}
kv := cfg.kvservers[i]
if kv != nil {
cfg.mu.Unlock()
kv.Kill()
cfg.mu.Lock()
cfg.kvservers[i] = nil
}
}
// If restart servers, first call ShutdownServer
func (cfg *config) StartServer(i int) {
cfg.mu.Lock()
// a fresh set of outgoing ClientEnd names.
cfg.endnames[i] = make([]string, cfg.n)
for j := 0; j < cfg.n; j++ {
cfg.endnames[i][j] = randstring(20)
}
// a fresh set of ClientEnds.
ends := make([]*labrpc.ClientEnd, cfg.n)
for j := 0; j < cfg.n; j++ {
ends[j] = cfg.net.MakeEnd(cfg.endnames[i][j])
cfg.net.Connect(cfg.endnames[i][j], j)
}
// a fresh persister, so old instance doesn't overwrite
// new instance's persisted state.
// give the fresh persister a copy of the old persister's
// state, so that the spec is that we pass StartKVServer()
// the last persisted state.
if cfg.saved[i] != nil {
cfg.saved[i] = cfg.saved[i].Copy()
} else {
cfg.saved[i] = raft.MakePersister()
}
cfg.mu.Unlock()
cfg.kvservers[i] = StartKVServer(ends, i, cfg.saved[i], cfg.maxraftstate)
kvsvc := labrpc.MakeService(cfg.kvservers[i])
rfsvc := labrpc.MakeService(cfg.kvservers[i].rf)
srv := labrpc.MakeServer()
srv.AddService(kvsvc)
srv.AddService(rfsvc)
cfg.net.AddServer(i, srv)
}
func (cfg *config) Leader() (bool, int) {
cfg.mu.Lock()
defer cfg.mu.Unlock()
for i := 0; i < cfg.n; i++ {
_, is_leader := cfg.kvservers[i].rf.GetState()
if is_leader {
return true, i
}
}
return false, 0
}
// Partition servers into 2 groups and put current leader in minority
func (cfg *config) make_partition() ([]int, []int) {
_, l := cfg.Leader()
p1 := make([]int, cfg.n/2+1)
p2 := make([]int, cfg.n/2)
j := 0
for i := 0; i < cfg.n; i++ {
if i != l {
if j < len(p1) {
p1[j] = i
} else {
p2[j-len(p1)] = i
}
j++
}
}
p2[len(p2)-1] = l
return p1, p2
}
var ncpu_once sync.Once
func make_config(t *testing.T, n int, unreliable bool, maxraftstate int) *config {
ncpu_once.Do(func() {
if runtime.NumCPU() < 2 {
fmt.Printf("warning: only one CPU, which may conceal locking bugs\n")
}
rand.Seed(makeSeed())
})
runtime.GOMAXPROCS(4)
cfg := &config{}
cfg.t = t
cfg.net = labrpc.MakeNetwork()
cfg.n = n
cfg.kvservers = make([]*KVServer, cfg.n)
cfg.saved = make([]*raft.Persister, cfg.n)
cfg.endnames = make([][]string, cfg.n)
cfg.clerks = make(map[*Clerk][]string)
cfg.nextClientId = cfg.n + 1000 // client ids start 1000 above the highest serverid
cfg.maxraftstate = maxraftstate
cfg.start = time.Now()
// create a full set of KV servers.
for i := 0; i < cfg.n; i++ {
cfg.StartServer(i)
}
cfg.ConnectAll()
cfg.net.Reliable(!unreliable)
return cfg
}
func (cfg *config) rpcTotal() int {
return cfg.net.GetTotalCount()
}
// start a Test.
// print the Test message.
// e.g. cfg.begin("Test (2B): RPC counts aren't too high")
func (cfg *config) begin(description string) {
fmt.Printf("%s ...\n", description)
cfg.t0 = time.Now()
cfg.rpcs0 = cfg.rpcTotal()
atomic.StoreInt32(&cfg.ops, 0)
}
func (cfg *config) op() {
atomic.AddInt32(&cfg.ops, 1)
}
// end a Test -- the fact that we got here means there
// was no failure.
// print the Passed message,
// and some performance numbers.
func (cfg *config) end() {
cfg.checkTimeout()
if cfg.t.Failed() == false {
t := time.Since(cfg.t0).Seconds() // real time
npeers := cfg.n // number of Raft peers
nrpc := cfg.rpcTotal() - cfg.rpcs0 // number of RPC sends
ops := atomic.LoadInt32(&cfg.ops) // number of clerk get/put/append calls
fmt.Printf(" ... Passed --")
fmt.Printf(" %4.1f %d %5d %4d\n", t, npeers, nrpc, ops)
}
}

101
src/kvraft/server.go Normal file
View File

@ -0,0 +1,101 @@
package kvraft
import (
"6.824/labgob"
"6.824/labrpc"
"6.824/raft"
"log"
"sync"
"sync/atomic"
)
const Debug = false
func DPrintf(format string, a ...interface{}) (n int, err error) {
if Debug {
log.Printf(format, a...)
}
return
}
type Op struct {
// Your definitions here.
// Field names must start with capital letters,
// otherwise RPC will break.
}
type KVServer struct {
mu sync.Mutex
me int
rf *raft.Raft
applyCh chan raft.ApplyMsg
dead int32 // set by Kill()
maxraftstate int // snapshot if log grows this big
// Your definitions here.
}
func (kv *KVServer) Get(args *GetArgs, reply *GetReply) {
// Your code here.
}
func (kv *KVServer) PutAppend(args *PutAppendArgs, reply *PutAppendReply) {
// Your code here.
}
//
// the tester calls Kill() when a KVServer instance won't
// be needed again. for your convenience, we supply
// code to set rf.dead (without needing a lock),
// and a killed() method to test rf.dead in
// long-running loops. you can also add your own
// code to Kill(). you're not required to do anything
// about this, but it may be convenient (for example)
// to suppress debug output from a Kill()ed instance.
//
func (kv *KVServer) Kill() {
atomic.StoreInt32(&kv.dead, 1)
kv.rf.Kill()
// Your code here, if desired.
}
func (kv *KVServer) killed() bool {
z := atomic.LoadInt32(&kv.dead)
return z == 1
}
//
// servers[] contains the ports of the set of
// servers that will cooperate via Raft to
// form the fault-tolerant key/value service.
// me is the index of the current server in servers[].
// the k/v server should store snapshots through the underlying Raft
// implementation, which should call persister.SaveStateAndSnapshot() to
// atomically save the Raft state along with the snapshot.
// the k/v server should snapshot when Raft's saved state exceeds maxraftstate bytes,
// in order to allow Raft to garbage-collect its log. if maxraftstate is -1,
// you don't need to snapshot.
// StartKVServer() must return quickly, so it should start goroutines
// for any long-running work.
//
func StartKVServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister, maxraftstate int) *KVServer {
// call labgob.Register on structures you want
// Go's RPC library to marshall/unmarshall.
labgob.Register(Op{})
kv := new(KVServer)
kv.me = me
kv.maxraftstate = maxraftstate
// You may need initialization code here.
kv.applyCh = make(chan raft.ApplyMsg)
kv.rf = raft.Make(servers, me, persister, kv.applyCh)
// You may need initialization code here.
return kv
}

716
src/kvraft/test_test.go Normal file
View File

@ -0,0 +1,716 @@
package kvraft
import "6.824/porcupine"
import "6.824/models"
import "testing"
import "strconv"
import "time"
import "math/rand"
import "strings"
import "sync"
import "sync/atomic"
import "fmt"
import "io/ioutil"
// The tester generously allows solutions to complete elections in one second
// (much more than the paper's range of timeouts).
const electionTimeout = 1 * time.Second
const linearizabilityCheckTimeout = 1 * time.Second
type OpLog struct {
operations []porcupine.Operation
sync.Mutex
}
func (log *OpLog) Append(op porcupine.Operation) {
log.Lock()
defer log.Unlock()
log.operations = append(log.operations, op)
}
func (log *OpLog) Read() []porcupine.Operation {
log.Lock()
defer log.Unlock()
ops := make([]porcupine.Operation, len(log.operations))
copy(ops, log.operations)
return ops
}
// get/put/putappend that keep counts
func Get(cfg *config, ck *Clerk, key string, log *OpLog, cli int) string {
start := time.Now().UnixNano()
v := ck.Get(key)
end := time.Now().UnixNano()
cfg.op()
if log != nil {
log.Append(porcupine.Operation{
Input: models.KvInput{Op: 0, Key: key},
Output: models.KvOutput{Value: v},
Call: start,
Return: end,
ClientId: cli,
})
}
return v
}
func Put(cfg *config, ck *Clerk, key string, value string, log *OpLog, cli int) {
start := time.Now().UnixNano()
ck.Put(key, value)
end := time.Now().UnixNano()
cfg.op()
if log != nil {
log.Append(porcupine.Operation{
Input: models.KvInput{Op: 1, Key: key, Value: value},
Output: models.KvOutput{},
Call: start,
Return: end,
ClientId: cli,
})
}
}
func Append(cfg *config, ck *Clerk, key string, value string, log *OpLog, cli int) {
start := time.Now().UnixNano()
ck.Append(key, value)
end := time.Now().UnixNano()
cfg.op()
if log != nil {
log.Append(porcupine.Operation{
Input: models.KvInput{Op: 2, Key: key, Value: value},
Output: models.KvOutput{},
Call: start,
Return: end,
ClientId: cli,
})
}
}
func check(cfg *config, t *testing.T, ck *Clerk, key string, value string) {
v := Get(cfg, ck, key, nil, -1)
if v != value {
t.Fatalf("Get(%v): expected:\n%v\nreceived:\n%v", key, value, v)
}
}
// a client runs the function f and then signals it is done
func run_client(t *testing.T, cfg *config, me int, ca chan bool, fn func(me int, ck *Clerk, t *testing.T)) {
ok := false
defer func() { ca <- ok }()
ck := cfg.makeClient(cfg.All())
fn(me, ck, t)
ok = true
cfg.deleteClient(ck)
}
// spawn ncli clients and wait until they are all done
func spawn_clients_and_wait(t *testing.T, cfg *config, ncli int, fn func(me int, ck *Clerk, t *testing.T)) {
ca := make([]chan bool, ncli)
for cli := 0; cli < ncli; cli++ {
ca[cli] = make(chan bool)
go run_client(t, cfg, cli, ca[cli], fn)
}
// log.Printf("spawn_clients_and_wait: waiting for clients")
for cli := 0; cli < ncli; cli++ {
ok := <-ca[cli]
// log.Printf("spawn_clients_and_wait: client %d is done\n", cli)
if ok == false {
t.Fatalf("failure")
}
}
}
// predict effect of Append(k, val) if old value is prev.
func NextValue(prev string, val string) string {
return prev + val
}
// check that for a specific client all known appends are present in a value,
// and in order
func checkClntAppends(t *testing.T, clnt int, v string, count int) {
lastoff := -1
for j := 0; j < count; j++ {
wanted := "x " + strconv.Itoa(clnt) + " " + strconv.Itoa(j) + " y"
off := strings.Index(v, wanted)
if off < 0 {
t.Fatalf("%v missing element %v in Append result %v", clnt, wanted, v)
}
off1 := strings.LastIndex(v, wanted)
if off1 != off {
t.Fatalf("duplicate element %v in Append result", wanted)
}
if off <= lastoff {
t.Fatalf("wrong order for element %v in Append result", wanted)
}
lastoff = off
}
}
// check that all known appends are present in a value,
// and are in order for each concurrent client.
func checkConcurrentAppends(t *testing.T, v string, counts []int) {
nclients := len(counts)
for i := 0; i < nclients; i++ {
lastoff := -1
for j := 0; j < counts[i]; j++ {
wanted := "x " + strconv.Itoa(i) + " " + strconv.Itoa(j) + " y"
off := strings.Index(v, wanted)
if off < 0 {
t.Fatalf("%v missing element %v in Append result %v", i, wanted, v)
}
off1 := strings.LastIndex(v, wanted)
if off1 != off {
t.Fatalf("duplicate element %v in Append result", wanted)
}
if off <= lastoff {
t.Fatalf("wrong order for element %v in Append result", wanted)
}
lastoff = off
}
}
}
// repartition the servers periodically
func partitioner(t *testing.T, cfg *config, ch chan bool, done *int32) {
defer func() { ch <- true }()
for atomic.LoadInt32(done) == 0 {
a := make([]int, cfg.n)
for i := 0; i < cfg.n; i++ {
a[i] = (rand.Int() % 2)
}
pa := make([][]int, 2)
for i := 0; i < 2; i++ {
pa[i] = make([]int, 0)
for j := 0; j < cfg.n; j++ {
if a[j] == i {
pa[i] = append(pa[i], j)
}
}
}
cfg.partition(pa[0], pa[1])
time.Sleep(electionTimeout + time.Duration(rand.Int63()%200)*time.Millisecond)
}
}
// Basic test is as follows: one or more clients submitting Append/Get
// operations to set of servers for some period of time. After the period is
// over, test checks that all appended values are present and in order for a
// particular key. If unreliable is set, RPCs may fail. If crash is set, the
// servers crash after the period is over and restart. If partitions is set,
// the test repartitions the network concurrently with the clients and servers. If
// maxraftstate is a positive number, the size of the state for Raft (i.e., log
// size) shouldn't exceed 8*maxraftstate. If maxraftstate is negative,
// snapshots shouldn't be used.
func GenericTest(t *testing.T, part string, nclients int, nservers int, unreliable bool, crash bool, partitions bool, maxraftstate int, randomkeys bool) {
title := "Test: "
if unreliable {
// the network drops RPC requests and replies.
title = title + "unreliable net, "
}
if crash {
// peers re-start, and thus persistence must work.
title = title + "restarts, "
}
if partitions {
// the network may partition
title = title + "partitions, "
}
if maxraftstate != -1 {
title = title + "snapshots, "
}
if randomkeys {
title = title + "random keys, "
}
if nclients > 1 {
title = title + "many clients"
} else {
title = title + "one client"
}
title = title + " (" + part + ")" // 3A or 3B
cfg := make_config(t, nservers, unreliable, maxraftstate)
defer cfg.cleanup()
cfg.begin(title)
opLog := &OpLog{}
ck := cfg.makeClient(cfg.All())
done_partitioner := int32(0)
done_clients := int32(0)
ch_partitioner := make(chan bool)
clnts := make([]chan int, nclients)
for i := 0; i < nclients; i++ {
clnts[i] = make(chan int)
}
for i := 0; i < 3; i++ {
// log.Printf("Iteration %v\n", i)
atomic.StoreInt32(&done_clients, 0)
atomic.StoreInt32(&done_partitioner, 0)
go spawn_clients_and_wait(t, cfg, nclients, func(cli int, myck *Clerk, t *testing.T) {
j := 0
defer func() {
clnts[cli] <- j
}()
last := "" // only used when not randomkeys
if !randomkeys {
Put(cfg, myck, strconv.Itoa(cli), last, opLog, cli)
}
for atomic.LoadInt32(&done_clients) == 0 {
var key string
if randomkeys {
key = strconv.Itoa(rand.Intn(nclients))
} else {
key = strconv.Itoa(cli)
}
nv := "x " + strconv.Itoa(cli) + " " + strconv.Itoa(j) + " y"
if (rand.Int() % 1000) < 500 {
// log.Printf("%d: client new append %v\n", cli, nv)
Append(cfg, myck, key, nv, opLog, cli)
if !randomkeys {
last = NextValue(last, nv)
}
j++
} else if randomkeys && (rand.Int()%1000) < 100 {
// we only do this when using random keys, because it would break the
// check done after Get() operations
Put(cfg, myck, key, nv, opLog, cli)
j++
} else {
// log.Printf("%d: client new get %v\n", cli, key)
v := Get(cfg, myck, key, opLog, cli)
// the following check only makes sense when we're not using random keys
if !randomkeys && v != last {
t.Fatalf("get wrong value, key %v, wanted:\n%v\n, got\n%v\n", key, last, v)
}
}
}
})
if partitions {
// Allow the clients to perform some operations without interruption
time.Sleep(1 * time.Second)
go partitioner(t, cfg, ch_partitioner, &done_partitioner)
}
time.Sleep(5 * time.Second)
atomic.StoreInt32(&done_clients, 1) // tell clients to quit
atomic.StoreInt32(&done_partitioner, 1) // tell partitioner to quit
if partitions {
// log.Printf("wait for partitioner\n")
<-ch_partitioner
// reconnect network and submit a request. A client may
// have submitted a request in a minority. That request
// won't return until that server discovers a new term
// has started.
cfg.ConnectAll()
// wait for a while so that we have a new term
time.Sleep(electionTimeout)
}
if crash {
// log.Printf("shutdown servers\n")
for i := 0; i < nservers; i++ {
cfg.ShutdownServer(i)
}
// Wait for a while for servers to shutdown, since
// shutdown isn't a real crash and isn't instantaneous
time.Sleep(electionTimeout)
// log.Printf("restart servers\n")
// crash and re-start all
for i := 0; i < nservers; i++ {
cfg.StartServer(i)
}
cfg.ConnectAll()
}
// log.Printf("wait for clients\n")
for i := 0; i < nclients; i++ {
// log.Printf("read from clients %d\n", i)
j := <-clnts[i]
// if j < 10 {
// log.Printf("Warning: client %d managed to perform only %d put operations in 1 sec?\n", i, j)
// }
key := strconv.Itoa(i)
// log.Printf("Check %v for client %d\n", j, i)
v := Get(cfg, ck, key, opLog, 0)
if !randomkeys {
checkClntAppends(t, i, v, j)
}
}
if maxraftstate > 0 {
// Check maximum after the servers have processed all client
// requests and had time to checkpoint.
sz := cfg.LogSize()
if sz > 8*maxraftstate {
t.Fatalf("logs were not trimmed (%v > 8*%v)", sz, maxraftstate)
}
}
if maxraftstate < 0 {
// Check that snapshots are not used
ssz := cfg.SnapshotSize()
if ssz > 0 {
t.Fatalf("snapshot too large (%v), should not be used when maxraftstate = %d", ssz, maxraftstate)
}
}
}
res, info := porcupine.CheckOperationsVerbose(models.KvModel, opLog.Read(), linearizabilityCheckTimeout)
if res == porcupine.Illegal {
file, err := ioutil.TempFile("", "*.html")
if err != nil {
fmt.Printf("info: failed to create temp file for visualization")
} else {
err = porcupine.Visualize(models.KvModel, info, file)
if err != nil {
fmt.Printf("info: failed to write history visualization to %s\n", file.Name())
} else {
fmt.Printf("info: wrote history visualization to %s\n", file.Name())
}
}
t.Fatal("history is not linearizable")
} else if res == porcupine.Unknown {
fmt.Println("info: linearizability check timed out, assuming history is ok")
}
cfg.end()
}
// Check that ops are committed fast enough, better than 1 per heartbeat interval
func GenericTestSpeed(t *testing.T, part string, maxraftstate int) {
const nservers = 3
const numOps = 1000
cfg := make_config(t, nservers, false, maxraftstate)
defer cfg.cleanup()
ck := cfg.makeClient(cfg.All())
cfg.begin(fmt.Sprintf("Test: ops complete fast enough (%s)", part))
// wait until first op completes, so we know a leader is elected
// and KV servers are ready to process client requests
ck.Get("x")
start := time.Now()
for i := 0; i < numOps; i++ {
ck.Append("x", "x 0 "+strconv.Itoa(i)+" y")
}
dur := time.Since(start)
v := ck.Get("x")
checkClntAppends(t, 0, v, numOps)
// heartbeat interval should be ~ 100 ms; require at least 3 ops per
const heartbeatInterval = 100 * time.Millisecond
const opsPerInterval = 3
const timePerOp = heartbeatInterval / opsPerInterval
if dur > numOps*timePerOp {
t.Fatalf("Operations completed too slowly %v/op > %v/op\n", dur/numOps, timePerOp)
}
cfg.end()
}
func TestBasic3A(t *testing.T) {
// Test: one client (3A) ...
GenericTest(t, "3A", 1, 5, false, false, false, -1, false)
}
func TestSpeed3A(t *testing.T) {
GenericTestSpeed(t, "3A", -1)
}
func TestConcurrent3A(t *testing.T) {
// Test: many clients (3A) ...
GenericTest(t, "3A", 5, 5, false, false, false, -1, false)
}
func TestUnreliable3A(t *testing.T) {
// Test: unreliable net, many clients (3A) ...
GenericTest(t, "3A", 5, 5, true, false, false, -1, false)
}
func TestUnreliableOneKey3A(t *testing.T) {
const nservers = 3
cfg := make_config(t, nservers, true, -1)
defer cfg.cleanup()
ck := cfg.makeClient(cfg.All())
cfg.begin("Test: concurrent append to same key, unreliable (3A)")
Put(cfg, ck, "k", "", nil, -1)
const nclient = 5
const upto = 10
spawn_clients_and_wait(t, cfg, nclient, func(me int, myck *Clerk, t *testing.T) {
n := 0
for n < upto {
Append(cfg, myck, "k", "x "+strconv.Itoa(me)+" "+strconv.Itoa(n)+" y", nil, -1)
n++
}
})
var counts []int
for i := 0; i < nclient; i++ {
counts = append(counts, upto)
}
vx := Get(cfg, ck, "k", nil, -1)
checkConcurrentAppends(t, vx, counts)
cfg.end()
}
// Submit a request in the minority partition and check that the requests
// doesn't go through until the partition heals. The leader in the original
// network ends up in the minority partition.
func TestOnePartition3A(t *testing.T) {
const nservers = 5
cfg := make_config(t, nservers, false, -1)
defer cfg.cleanup()
ck := cfg.makeClient(cfg.All())
Put(cfg, ck, "1", "13", nil, -1)
cfg.begin("Test: progress in majority (3A)")
p1, p2 := cfg.make_partition()
cfg.partition(p1, p2)
ckp1 := cfg.makeClient(p1) // connect ckp1 to p1
ckp2a := cfg.makeClient(p2) // connect ckp2a to p2
ckp2b := cfg.makeClient(p2) // connect ckp2b to p2
Put(cfg, ckp1, "1", "14", nil, -1)
check(cfg, t, ckp1, "1", "14")
cfg.end()
done0 := make(chan bool)
done1 := make(chan bool)
cfg.begin("Test: no progress in minority (3A)")
go func() {
Put(cfg, ckp2a, "1", "15", nil, -1)
done0 <- true
}()
go func() {
Get(cfg, ckp2b, "1", nil, -1) // different clerk in p2
done1 <- true
}()
select {
case <-done0:
t.Fatalf("Put in minority completed")
case <-done1:
t.Fatalf("Get in minority completed")
case <-time.After(time.Second):
}
check(cfg, t, ckp1, "1", "14")
Put(cfg, ckp1, "1", "16", nil, -1)
check(cfg, t, ckp1, "1", "16")
cfg.end()
cfg.begin("Test: completion after heal (3A)")
cfg.ConnectAll()
cfg.ConnectClient(ckp2a, cfg.All())
cfg.ConnectClient(ckp2b, cfg.All())
time.Sleep(electionTimeout)
select {
case <-done0:
case <-time.After(30 * 100 * time.Millisecond):
t.Fatalf("Put did not complete")
}
select {
case <-done1:
case <-time.After(30 * 100 * time.Millisecond):
t.Fatalf("Get did not complete")
default:
}
check(cfg, t, ck, "1", "15")
cfg.end()
}
func TestManyPartitionsOneClient3A(t *testing.T) {
// Test: partitions, one client (3A) ...
GenericTest(t, "3A", 1, 5, false, false, true, -1, false)
}
func TestManyPartitionsManyClients3A(t *testing.T) {
// Test: partitions, many clients (3A) ...
GenericTest(t, "3A", 5, 5, false, false, true, -1, false)
}
func TestPersistOneClient3A(t *testing.T) {
// Test: restarts, one client (3A) ...
GenericTest(t, "3A", 1, 5, false, true, false, -1, false)
}
func TestPersistConcurrent3A(t *testing.T) {
// Test: restarts, many clients (3A) ...
GenericTest(t, "3A", 5, 5, false, true, false, -1, false)
}
func TestPersistConcurrentUnreliable3A(t *testing.T) {
// Test: unreliable net, restarts, many clients (3A) ...
GenericTest(t, "3A", 5, 5, true, true, false, -1, false)
}
func TestPersistPartition3A(t *testing.T) {
// Test: restarts, partitions, many clients (3A) ...
GenericTest(t, "3A", 5, 5, false, true, true, -1, false)
}
func TestPersistPartitionUnreliable3A(t *testing.T) {
// Test: unreliable net, restarts, partitions, many clients (3A) ...
GenericTest(t, "3A", 5, 5, true, true, true, -1, false)
}
func TestPersistPartitionUnreliableLinearizable3A(t *testing.T) {
// Test: unreliable net, restarts, partitions, random keys, many clients (3A) ...
GenericTest(t, "3A", 15, 7, true, true, true, -1, true)
}
//
// if one server falls behind, then rejoins, does it
// recover by using the InstallSnapshot RPC?
// also checks that majority discards committed log entries
// even if minority doesn't respond.
//
func TestSnapshotRPC3B(t *testing.T) {
const nservers = 3
maxraftstate := 1000
cfg := make_config(t, nservers, false, maxraftstate)
defer cfg.cleanup()
ck := cfg.makeClient(cfg.All())
cfg.begin("Test: InstallSnapshot RPC (3B)")
Put(cfg, ck, "a", "A", nil, -1)
check(cfg, t, ck, "a", "A")
// a bunch of puts into the majority partition.
cfg.partition([]int{0, 1}, []int{2})
{
ck1 := cfg.makeClient([]int{0, 1})
for i := 0; i < 50; i++ {
Put(cfg, ck1, strconv.Itoa(i), strconv.Itoa(i), nil, -1)
}
time.Sleep(electionTimeout)
Put(cfg, ck1, "b", "B", nil, -1)
}
// check that the majority partition has thrown away
// most of its log entries.
sz := cfg.LogSize()
if sz > 8*maxraftstate {
t.Fatalf("logs were not trimmed (%v > 8*%v)", sz, maxraftstate)
}
// now make group that requires participation of
// lagging server, so that it has to catch up.
cfg.partition([]int{0, 2}, []int{1})
{
ck1 := cfg.makeClient([]int{0, 2})
Put(cfg, ck1, "c", "C", nil, -1)
Put(cfg, ck1, "d", "D", nil, -1)
check(cfg, t, ck1, "a", "A")
check(cfg, t, ck1, "b", "B")
check(cfg, t, ck1, "1", "1")
check(cfg, t, ck1, "49", "49")
}
// now everybody
cfg.partition([]int{0, 1, 2}, []int{})
Put(cfg, ck, "e", "E", nil, -1)
check(cfg, t, ck, "c", "C")
check(cfg, t, ck, "e", "E")
check(cfg, t, ck, "1", "1")
cfg.end()
}
// are the snapshots not too huge? 500 bytes is a generous bound for the
// operations we're doing here.
func TestSnapshotSize3B(t *testing.T) {
const nservers = 3
maxraftstate := 1000
maxsnapshotstate := 500
cfg := make_config(t, nservers, false, maxraftstate)
defer cfg.cleanup()
ck := cfg.makeClient(cfg.All())
cfg.begin("Test: snapshot size is reasonable (3B)")
for i := 0; i < 200; i++ {
Put(cfg, ck, "x", "0", nil, -1)
check(cfg, t, ck, "x", "0")
Put(cfg, ck, "x", "1", nil, -1)
check(cfg, t, ck, "x", "1")
}
// check that servers have thrown away most of their log entries
sz := cfg.LogSize()
if sz > 8*maxraftstate {
t.Fatalf("logs were not trimmed (%v > 8*%v)", sz, maxraftstate)
}
// check that the snapshots are not unreasonably large
ssz := cfg.SnapshotSize()
if ssz > maxsnapshotstate {
t.Fatalf("snapshot too large (%v > %v)", ssz, maxsnapshotstate)
}
cfg.end()
}
func TestSpeed3B(t *testing.T) {
GenericTestSpeed(t, "3B", 1000)
}
func TestSnapshotRecover3B(t *testing.T) {
// Test: restarts, snapshots, one client (3B) ...
GenericTest(t, "3B", 1, 5, false, true, false, 1000, false)
}
func TestSnapshotRecoverManyClients3B(t *testing.T) {
// Test: restarts, snapshots, many clients (3B) ...
GenericTest(t, "3B", 20, 5, false, true, false, 1000, false)
}
func TestSnapshotUnreliable3B(t *testing.T) {
// Test: unreliable net, snapshots, many clients (3B) ...
GenericTest(t, "3B", 5, 5, true, false, false, 1000, false)
}
func TestSnapshotUnreliableRecover3B(t *testing.T) {
// Test: unreliable net, restarts, snapshots, many clients (3B) ...
GenericTest(t, "3B", 5, 5, true, true, false, 1000, false)
}
func TestSnapshotUnreliableRecoverConcurrentPartition3B(t *testing.T) {
// Test: unreliable net, restarts, partitions, snapshots, many clients (3B) ...
GenericTest(t, "3B", 5, 5, true, true, true, 1000, false)
}
func TestSnapshotUnreliableRecoverConcurrentPartitionLinearizable3B(t *testing.T) {
// Test: unreliable net, restarts, partitions, snapshots, random keys, many clients (3B) ...
GenericTest(t, "3B", 15, 7, true, true, true, 1000, true)
}

177
src/labgob/labgob.go Normal file
View File

@ -0,0 +1,177 @@
package labgob
//
// trying to send non-capitalized fields over RPC produces a range of
// misbehavior, including both mysterious incorrect computation and
// outright crashes. so this wrapper around Go's encoding/gob warns
// about non-capitalized field names.
//
import "encoding/gob"
import "io"
import "reflect"
import "fmt"
import "sync"
import "unicode"
import "unicode/utf8"
var mu sync.Mutex
var errorCount int // for TestCapital
var checked map[reflect.Type]bool
type LabEncoder struct {
gob *gob.Encoder
}
func NewEncoder(w io.Writer) *LabEncoder {
enc := &LabEncoder{}
enc.gob = gob.NewEncoder(w)
return enc
}
func (enc *LabEncoder) Encode(e interface{}) error {
checkValue(e)
return enc.gob.Encode(e)
}
func (enc *LabEncoder) EncodeValue(value reflect.Value) error {
checkValue(value.Interface())
return enc.gob.EncodeValue(value)
}
type LabDecoder struct {
gob *gob.Decoder
}
func NewDecoder(r io.Reader) *LabDecoder {
dec := &LabDecoder{}
dec.gob = gob.NewDecoder(r)
return dec
}
func (dec *LabDecoder) Decode(e interface{}) error {
checkValue(e)
checkDefault(e)
return dec.gob.Decode(e)
}
func Register(value interface{}) {
checkValue(value)
gob.Register(value)
}
func RegisterName(name string, value interface{}) {
checkValue(value)
gob.RegisterName(name, value)
}
func checkValue(value interface{}) {
checkType(reflect.TypeOf(value))
}
func checkType(t reflect.Type) {
k := t.Kind()
mu.Lock()
// only complain once, and avoid recursion.
if checked == nil {
checked = map[reflect.Type]bool{}
}
if checked[t] {
mu.Unlock()
return
}
checked[t] = true
mu.Unlock()
switch k {
case reflect.Struct:
for i := 0; i < t.NumField(); i++ {
f := t.Field(i)
rune, _ := utf8.DecodeRuneInString(f.Name)
if unicode.IsUpper(rune) == false {
// ta da
fmt.Printf("labgob error: lower-case field %v of %v in RPC or persist/snapshot will break your Raft\n",
f.Name, t.Name())
mu.Lock()
errorCount += 1
mu.Unlock()
}
checkType(f.Type)
}
return
case reflect.Slice, reflect.Array, reflect.Ptr:
checkType(t.Elem())
return
case reflect.Map:
checkType(t.Elem())
checkType(t.Key())
return
default:
return
}
}
//
// warn if the value contains non-default values,
// as it would if one sent an RPC but the reply
// struct was already modified. if the RPC reply
// contains default values, GOB won't overwrite
// the non-default value.
//
func checkDefault(value interface{}) {
if value == nil {
return
}
checkDefault1(reflect.ValueOf(value), 1, "")
}
func checkDefault1(value reflect.Value, depth int, name string) {
if depth > 3 {
return
}
t := value.Type()
k := t.Kind()
switch k {
case reflect.Struct:
for i := 0; i < t.NumField(); i++ {
vv := value.Field(i)
name1 := t.Field(i).Name
if name != "" {
name1 = name + "." + name1
}
checkDefault1(vv, depth+1, name1)
}
return
case reflect.Ptr:
if value.IsNil() {
return
}
checkDefault1(value.Elem(), depth+1, name)
return
case reflect.Bool,
reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64,
reflect.Uintptr, reflect.Float32, reflect.Float64,
reflect.String:
if reflect.DeepEqual(reflect.Zero(t).Interface(), value.Interface()) == false {
mu.Lock()
if errorCount < 1 {
what := name
if what == "" {
what = t.Name()
}
// this warning typically arises if code re-uses the same RPC reply
// variable for multiple RPC calls, or if code restores persisted
// state into variable that already have non-default values.
fmt.Printf("labgob warning: Decoding into a non-default variable/field %v may not work\n",
what)
}
errorCount += 1
mu.Unlock()
}
return
}
}

172
src/labgob/test_test.go Normal file
View File

@ -0,0 +1,172 @@
package labgob
import "testing"
import "bytes"
type T1 struct {
T1int0 int
T1int1 int
T1string0 string
T1string1 string
}
type T2 struct {
T2slice []T1
T2map map[int]*T1
T2t3 interface{}
}
type T3 struct {
T3int999 int
}
//
// test that we didn't break GOB.
//
func TestGOB(t *testing.T) {
e0 := errorCount
w := new(bytes.Buffer)
Register(T3{})
{
x0 := 0
x1 := 1
t1 := T1{}
t1.T1int1 = 1
t1.T1string1 = "6.824"
t2 := T2{}
t2.T2slice = []T1{T1{}, t1}
t2.T2map = map[int]*T1{}
t2.T2map[99] = &T1{1, 2, "x", "y"}
t2.T2t3 = T3{999}
e := NewEncoder(w)
e.Encode(x0)
e.Encode(x1)
e.Encode(t1)
e.Encode(t2)
}
data := w.Bytes()
{
var x0 int
var x1 int
var t1 T1
var t2 T2
r := bytes.NewBuffer(data)
d := NewDecoder(r)
if d.Decode(&x0) != nil ||
d.Decode(&x1) != nil ||
d.Decode(&t1) != nil ||
d.Decode(&t2) != nil {
t.Fatalf("Decode failed")
}
if x0 != 0 {
t.Fatalf("wrong x0 %v\n", x0)
}
if x1 != 1 {
t.Fatalf("wrong x1 %v\n", x1)
}
if t1.T1int0 != 0 {
t.Fatalf("wrong t1.T1int0 %v\n", t1.T1int0)
}
if t1.T1int1 != 1 {
t.Fatalf("wrong t1.T1int1 %v\n", t1.T1int1)
}
if t1.T1string0 != "" {
t.Fatalf("wrong t1.T1string0 %v\n", t1.T1string0)
}
if t1.T1string1 != "6.824" {
t.Fatalf("wrong t1.T1string1 %v\n", t1.T1string1)
}
if len(t2.T2slice) != 2 {
t.Fatalf("wrong t2.T2slice len %v\n", len(t2.T2slice))
}
if t2.T2slice[1].T1int1 != 1 {
t.Fatalf("wrong slice value\n")
}
if len(t2.T2map) != 1 {
t.Fatalf("wrong t2.T2map len %v\n", len(t2.T2map))
}
if t2.T2map[99].T1string1 != "y" {
t.Fatalf("wrong map value\n")
}
t3 := (t2.T2t3).(T3)
if t3.T3int999 != 999 {
t.Fatalf("wrong t2.T2t3.T3int999\n")
}
}
if errorCount != e0 {
t.Fatalf("there were errors, but should not have been")
}
}
type T4 struct {
Yes int
no int
}
//
// make sure we check capitalization
// labgob prints one warning during this test.
//
func TestCapital(t *testing.T) {
e0 := errorCount
v := []map[*T4]int{}
w := new(bytes.Buffer)
e := NewEncoder(w)
e.Encode(v)
data := w.Bytes()
var v1 []map[T4]int
r := bytes.NewBuffer(data)
d := NewDecoder(r)
d.Decode(&v1)
if errorCount != e0+1 {
t.Fatalf("failed to warn about lower-case field")
}
}
//
// check that we warn when someone sends a default value over
// RPC but the target into which we're decoding holds a non-default
// value, which GOB seems not to overwrite as you'd expect.
//
// labgob does not print a warning.
//
func TestDefault(t *testing.T) {
e0 := errorCount
type DD struct {
X int
}
// send a default value...
dd1 := DD{}
w := new(bytes.Buffer)
e := NewEncoder(w)
e.Encode(dd1)
data := w.Bytes()
// and receive it into memory that already
// holds non-default values.
reply := DD{99}
r := bytes.NewBuffer(data)
d := NewDecoder(r)
d.Decode(&reply)
if errorCount != e0+1 {
t.Fatalf("failed to warn about decoding into non-default value")
}
}

513
src/labrpc/labrpc.go Normal file
View File

@ -0,0 +1,513 @@
package labrpc
//
// channel-based RPC, for 824 labs.
//
// simulates a network that can lose requests, lose replies,
// delay messages, and entirely disconnect particular hosts.
//
// we will use the original labrpc.go to test your code for grading.
// so, while you can modify this code to help you debug, please
// test against the original before submitting.
//
// adapted from Go net/rpc/server.go.
//
// sends labgob-encoded values to ensure that RPCs
// don't include references to program objects.
//
// net := MakeNetwork() -- holds network, clients, servers.
// end := net.MakeEnd(endname) -- create a client end-point, to talk to one server.
// net.AddServer(servername, server) -- adds a named server to network.
// net.DeleteServer(servername) -- eliminate the named server.
// net.Connect(endname, servername) -- connect a client to a server.
// net.Enable(endname, enabled) -- enable/disable a client.
// net.Reliable(bool) -- false means drop/delay messages
//
// end.Call("Raft.AppendEntries", &args, &reply) -- send an RPC, wait for reply.
// the "Raft" is the name of the server struct to be called.
// the "AppendEntries" is the name of the method to be called.
// Call() returns true to indicate that the server executed the request
// and the reply is valid.
// Call() returns false if the network lost the request or reply
// or the server is down.
// It is OK to have multiple Call()s in progress at the same time on the
// same ClientEnd.
// Concurrent calls to Call() may be delivered to the server out of order,
// since the network may re-order messages.
// Call() is guaranteed to return (perhaps after a delay) *except* if the
// handler function on the server side does not return.
// the server RPC handler function must declare its args and reply arguments
// as pointers, so that their types exactly match the types of the arguments
// to Call().
//
// srv := MakeServer()
// srv.AddService(svc) -- a server can have multiple services, e.g. Raft and k/v
// pass srv to net.AddServer()
//
// svc := MakeService(receiverObject) -- obj's methods will handle RPCs
// much like Go's rpcs.Register()
// pass svc to srv.AddService()
//
import "6.824/labgob"
import "bytes"
import "reflect"
import "sync"
import "log"
import "strings"
import "math/rand"
import "time"
import "sync/atomic"
type reqMsg struct {
endname interface{} // name of sending ClientEnd
svcMeth string // e.g. "Raft.AppendEntries"
argsType reflect.Type
args []byte
replyCh chan replyMsg
}
type replyMsg struct {
ok bool
reply []byte
}
type ClientEnd struct {
endname interface{} // this end-point's name
ch chan reqMsg // copy of Network.endCh
done chan struct{} // closed when Network is cleaned up
}
// send an RPC, wait for the reply.
// the return value indicates success; false means that
// no reply was received from the server.
func (e *ClientEnd) Call(svcMeth string, args interface{}, reply interface{}) bool {
req := reqMsg{}
req.endname = e.endname
req.svcMeth = svcMeth
req.argsType = reflect.TypeOf(args)
req.replyCh = make(chan replyMsg)
qb := new(bytes.Buffer)
qe := labgob.NewEncoder(qb)
if err := qe.Encode(args); err != nil {
panic(err)
}
req.args = qb.Bytes()
//
// send the request.
//
select {
case e.ch <- req:
// the request has been sent.
case <-e.done:
// entire Network has been destroyed.
return false
}
//
// wait for the reply.
//
rep := <-req.replyCh
if rep.ok {
rb := bytes.NewBuffer(rep.reply)
rd := labgob.NewDecoder(rb)
if err := rd.Decode(reply); err != nil {
log.Fatalf("ClientEnd.Call(): decode reply: %v\n", err)
}
return true
} else {
return false
}
}
type Network struct {
mu sync.Mutex
reliable bool
longDelays bool // pause a long time on send on disabled connection
longReordering bool // sometimes delay replies a long time
ends map[interface{}]*ClientEnd // ends, by name
enabled map[interface{}]bool // by end name
servers map[interface{}]*Server // servers, by name
connections map[interface{}]interface{} // endname -> servername
endCh chan reqMsg
done chan struct{} // closed when Network is cleaned up
count int32 // total RPC count, for statistics
bytes int64 // total bytes send, for statistics
}
func MakeNetwork() *Network {
rn := &Network{}
rn.reliable = true
rn.ends = map[interface{}]*ClientEnd{}
rn.enabled = map[interface{}]bool{}
rn.servers = map[interface{}]*Server{}
rn.connections = map[interface{}](interface{}){}
rn.endCh = make(chan reqMsg)
rn.done = make(chan struct{})
// single goroutine to handle all ClientEnd.Call()s
go func() {
for {
select {
case xreq := <-rn.endCh:
atomic.AddInt32(&rn.count, 1)
atomic.AddInt64(&rn.bytes, int64(len(xreq.args)))
go rn.processReq(xreq)
case <-rn.done:
return
}
}
}()
return rn
}
func (rn *Network) Cleanup() {
close(rn.done)
}
func (rn *Network) Reliable(yes bool) {
rn.mu.Lock()
defer rn.mu.Unlock()
rn.reliable = yes
}
func (rn *Network) LongReordering(yes bool) {
rn.mu.Lock()
defer rn.mu.Unlock()
rn.longReordering = yes
}
func (rn *Network) LongDelays(yes bool) {
rn.mu.Lock()
defer rn.mu.Unlock()
rn.longDelays = yes
}
func (rn *Network) readEndnameInfo(endname interface{}) (enabled bool,
servername interface{}, server *Server, reliable bool, longreordering bool,
) {
rn.mu.Lock()
defer rn.mu.Unlock()
enabled = rn.enabled[endname]
servername = rn.connections[endname]
if servername != nil {
server = rn.servers[servername]
}
reliable = rn.reliable
longreordering = rn.longReordering
return
}
func (rn *Network) isServerDead(endname interface{}, servername interface{}, server *Server) bool {
rn.mu.Lock()
defer rn.mu.Unlock()
if rn.enabled[endname] == false || rn.servers[servername] != server {
return true
}
return false
}
func (rn *Network) processReq(req reqMsg) {
enabled, servername, server, reliable, longreordering := rn.readEndnameInfo(req.endname)
if enabled && servername != nil && server != nil {
if reliable == false {
// short delay
ms := (rand.Int() % 27)
time.Sleep(time.Duration(ms) * time.Millisecond)
}
if reliable == false && (rand.Int()%1000) < 100 {
// drop the request, return as if timeout
req.replyCh <- replyMsg{false, nil}
return
}
// execute the request (call the RPC handler).
// in a separate thread so that we can periodically check
// if the server has been killed and the RPC should get a
// failure reply.
ech := make(chan replyMsg)
go func() {
r := server.dispatch(req)
ech <- r
}()
// wait for handler to return,
// but stop waiting if DeleteServer() has been called,
// and return an error.
var reply replyMsg
replyOK := false
serverDead := false
for replyOK == false && serverDead == false {
select {
case reply = <-ech:
replyOK = true
case <-time.After(100 * time.Millisecond):
serverDead = rn.isServerDead(req.endname, servername, server)
if serverDead {
go func() {
<-ech // drain channel to let the goroutine created earlier terminate
}()
}
}
}
// do not reply if DeleteServer() has been called, i.e.
// the server has been killed. this is needed to avoid
// situation in which a client gets a positive reply
// to an Append, but the server persisted the update
// into the old Persister. config.go is careful to call
// DeleteServer() before superseding the Persister.
serverDead = rn.isServerDead(req.endname, servername, server)
if replyOK == false || serverDead == true {
// server was killed while we were waiting; return error.
req.replyCh <- replyMsg{false, nil}
} else if reliable == false && (rand.Int()%1000) < 100 {
// drop the reply, return as if timeout
req.replyCh <- replyMsg{false, nil}
} else if longreordering == true && rand.Intn(900) < 600 {
// delay the response for a while
ms := 200 + rand.Intn(1+rand.Intn(2000))
// Russ points out that this timer arrangement will decrease
// the number of goroutines, so that the race
// detector is less likely to get upset.
time.AfterFunc(time.Duration(ms)*time.Millisecond, func() {
atomic.AddInt64(&rn.bytes, int64(len(reply.reply)))
req.replyCh <- reply
})
} else {
atomic.AddInt64(&rn.bytes, int64(len(reply.reply)))
req.replyCh <- reply
}
} else {
// simulate no reply and eventual timeout.
ms := 0
if rn.longDelays {
// let Raft tests check that leader doesn't send
// RPCs synchronously.
ms = (rand.Int() % 7000)
} else {
// many kv tests require the client to try each
// server in fairly rapid succession.
ms = (rand.Int() % 100)
}
time.AfterFunc(time.Duration(ms)*time.Millisecond, func() {
req.replyCh <- replyMsg{false, nil}
})
}
}
// create a client end-point.
// start the thread that listens and delivers.
func (rn *Network) MakeEnd(endname interface{}) *ClientEnd {
rn.mu.Lock()
defer rn.mu.Unlock()
if _, ok := rn.ends[endname]; ok {
log.Fatalf("MakeEnd: %v already exists\n", endname)
}
e := &ClientEnd{}
e.endname = endname
e.ch = rn.endCh
e.done = rn.done
rn.ends[endname] = e
rn.enabled[endname] = false
rn.connections[endname] = nil
return e
}
func (rn *Network) AddServer(servername interface{}, rs *Server) {
rn.mu.Lock()
defer rn.mu.Unlock()
rn.servers[servername] = rs
}
func (rn *Network) DeleteServer(servername interface{}) {
rn.mu.Lock()
defer rn.mu.Unlock()
rn.servers[servername] = nil
}
// connect a ClientEnd to a server.
// a ClientEnd can only be connected once in its lifetime.
func (rn *Network) Connect(endname interface{}, servername interface{}) {
rn.mu.Lock()
defer rn.mu.Unlock()
rn.connections[endname] = servername
}
// enable/disable a ClientEnd.
func (rn *Network) Enable(endname interface{}, enabled bool) {
rn.mu.Lock()
defer rn.mu.Unlock()
rn.enabled[endname] = enabled
}
// get a server's count of incoming RPCs.
func (rn *Network) GetCount(servername interface{}) int {
rn.mu.Lock()
defer rn.mu.Unlock()
svr := rn.servers[servername]
return svr.GetCount()
}
func (rn *Network) GetTotalCount() int {
x := atomic.LoadInt32(&rn.count)
return int(x)
}
func (rn *Network) GetTotalBytes() int64 {
x := atomic.LoadInt64(&rn.bytes)
return x
}
//
// a server is a collection of services, all sharing
// the same rpc dispatcher. so that e.g. both a Raft
// and a k/v server can listen to the same rpc endpoint.
//
type Server struct {
mu sync.Mutex
services map[string]*Service
count int // incoming RPCs
}
func MakeServer() *Server {
rs := &Server{}
rs.services = map[string]*Service{}
return rs
}
func (rs *Server) AddService(svc *Service) {
rs.mu.Lock()
defer rs.mu.Unlock()
rs.services[svc.name] = svc
}
func (rs *Server) dispatch(req reqMsg) replyMsg {
rs.mu.Lock()
rs.count += 1
// split Raft.AppendEntries into service and method
dot := strings.LastIndex(req.svcMeth, ".")
serviceName := req.svcMeth[:dot]
methodName := req.svcMeth[dot+1:]
service, ok := rs.services[serviceName]
rs.mu.Unlock()
if ok {
return service.dispatch(methodName, req)
} else {
choices := []string{}
for k, _ := range rs.services {
choices = append(choices, k)
}
log.Fatalf("labrpc.Server.dispatch(): unknown service %v in %v.%v; expecting one of %v\n",
serviceName, serviceName, methodName, choices)
return replyMsg{false, nil}
}
}
func (rs *Server) GetCount() int {
rs.mu.Lock()
defer rs.mu.Unlock()
return rs.count
}
// an object with methods that can be called via RPC.
// a single server may have more than one Service.
type Service struct {
name string
rcvr reflect.Value
typ reflect.Type
methods map[string]reflect.Method
}
func MakeService(rcvr interface{}) *Service {
svc := &Service{}
svc.typ = reflect.TypeOf(rcvr)
svc.rcvr = reflect.ValueOf(rcvr)
svc.name = reflect.Indirect(svc.rcvr).Type().Name()
svc.methods = map[string]reflect.Method{}
for m := 0; m < svc.typ.NumMethod(); m++ {
method := svc.typ.Method(m)
mtype := method.Type
mname := method.Name
//fmt.Printf("%v pp %v ni %v 1k %v 2k %v no %v\n",
// mname, method.PkgPath, mtype.NumIn(), mtype.In(1).Kind(), mtype.In(2).Kind(), mtype.NumOut())
if method.PkgPath != "" || // capitalized?
mtype.NumIn() != 3 ||
//mtype.In(1).Kind() != reflect.Ptr ||
mtype.In(2).Kind() != reflect.Ptr ||
mtype.NumOut() != 0 {
// the method is not suitable for a handler
//fmt.Printf("bad method: %v\n", mname)
} else {
// the method looks like a handler
svc.methods[mname] = method
}
}
return svc
}
func (svc *Service) dispatch(methname string, req reqMsg) replyMsg {
if method, ok := svc.methods[methname]; ok {
// prepare space into which to read the argument.
// the Value's type will be a pointer to req.argsType.
args := reflect.New(req.argsType)
// decode the argument.
ab := bytes.NewBuffer(req.args)
ad := labgob.NewDecoder(ab)
ad.Decode(args.Interface())
// allocate space for the reply.
replyType := method.Type.In(2)
replyType = replyType.Elem()
replyv := reflect.New(replyType)
// call the method.
function := method.Func
function.Call([]reflect.Value{svc.rcvr, args.Elem(), replyv})
// encode the reply.
rb := new(bytes.Buffer)
re := labgob.NewEncoder(rb)
re.EncodeValue(replyv)
return replyMsg{true, rb.Bytes()}
} else {
choices := []string{}
for k, _ := range svc.methods {
choices = append(choices, k)
}
log.Fatalf("labrpc.Service.dispatch(): unknown method %v in %v; expecting one of %v\n",
methname, req.svcMeth, choices)
return replyMsg{false, nil}
}
}

597
src/labrpc/test_test.go Normal file
View File

@ -0,0 +1,597 @@
package labrpc
import "testing"
import "strconv"
import "sync"
import "runtime"
import "time"
import "fmt"
type JunkArgs struct {
X int
}
type JunkReply struct {
X string
}
type JunkServer struct {
mu sync.Mutex
log1 []string
log2 []int
}
func (js *JunkServer) Handler1(args string, reply *int) {
js.mu.Lock()
defer js.mu.Unlock()
js.log1 = append(js.log1, args)
*reply, _ = strconv.Atoi(args)
}
func (js *JunkServer) Handler2(args int, reply *string) {
js.mu.Lock()
defer js.mu.Unlock()
js.log2 = append(js.log2, args)
*reply = "handler2-" + strconv.Itoa(args)
}
func (js *JunkServer) Handler3(args int, reply *int) {
js.mu.Lock()
defer js.mu.Unlock()
time.Sleep(20 * time.Second)
*reply = -args
}
// args is a pointer
func (js *JunkServer) Handler4(args *JunkArgs, reply *JunkReply) {
reply.X = "pointer"
}
// args is a not pointer
func (js *JunkServer) Handler5(args JunkArgs, reply *JunkReply) {
reply.X = "no pointer"
}
func (js *JunkServer) Handler6(args string, reply *int) {
js.mu.Lock()
defer js.mu.Unlock()
*reply = len(args)
}
func (js *JunkServer) Handler7(args int, reply *string) {
js.mu.Lock()
defer js.mu.Unlock()
*reply = ""
for i := 0; i < args; i++ {
*reply = *reply + "y"
}
}
func TestBasic(t *testing.T) {
runtime.GOMAXPROCS(4)
rn := MakeNetwork()
defer rn.Cleanup()
e := rn.MakeEnd("end1-99")
js := &JunkServer{}
svc := MakeService(js)
rs := MakeServer()
rs.AddService(svc)
rn.AddServer("server99", rs)
rn.Connect("end1-99", "server99")
rn.Enable("end1-99", true)
{
reply := ""
e.Call("JunkServer.Handler2", 111, &reply)
if reply != "handler2-111" {
t.Fatalf("wrong reply from Handler2")
}
}
{
reply := 0
e.Call("JunkServer.Handler1", "9099", &reply)
if reply != 9099 {
t.Fatalf("wrong reply from Handler1")
}
}
}
func TestTypes(t *testing.T) {
runtime.GOMAXPROCS(4)
rn := MakeNetwork()
defer rn.Cleanup()
e := rn.MakeEnd("end1-99")
js := &JunkServer{}
svc := MakeService(js)
rs := MakeServer()
rs.AddService(svc)
rn.AddServer("server99", rs)
rn.Connect("end1-99", "server99")
rn.Enable("end1-99", true)
{
var args JunkArgs
var reply JunkReply
// args must match type (pointer or not) of handler.
e.Call("JunkServer.Handler4", &args, &reply)
if reply.X != "pointer" {
t.Fatalf("wrong reply from Handler4")
}
}
{
var args JunkArgs
var reply JunkReply
// args must match type (pointer or not) of handler.
e.Call("JunkServer.Handler5", args, &reply)
if reply.X != "no pointer" {
t.Fatalf("wrong reply from Handler5")
}
}
}
//
// does net.Enable(endname, false) really disconnect a client?
//
func TestDisconnect(t *testing.T) {
runtime.GOMAXPROCS(4)
rn := MakeNetwork()
defer rn.Cleanup()
e := rn.MakeEnd("end1-99")
js := &JunkServer{}
svc := MakeService(js)
rs := MakeServer()
rs.AddService(svc)
rn.AddServer("server99", rs)
rn.Connect("end1-99", "server99")
{
reply := ""
e.Call("JunkServer.Handler2", 111, &reply)
if reply != "" {
t.Fatalf("unexpected reply from Handler2")
}
}
rn.Enable("end1-99", true)
{
reply := 0
e.Call("JunkServer.Handler1", "9099", &reply)
if reply != 9099 {
t.Fatalf("wrong reply from Handler1")
}
}
}
//
// test net.GetCount()
//
func TestCounts(t *testing.T) {
runtime.GOMAXPROCS(4)
rn := MakeNetwork()
defer rn.Cleanup()
e := rn.MakeEnd("end1-99")
js := &JunkServer{}
svc := MakeService(js)
rs := MakeServer()
rs.AddService(svc)
rn.AddServer(99, rs)
rn.Connect("end1-99", 99)
rn.Enable("end1-99", true)
for i := 0; i < 17; i++ {
reply := ""
e.Call("JunkServer.Handler2", i, &reply)
wanted := "handler2-" + strconv.Itoa(i)
if reply != wanted {
t.Fatalf("wrong reply %v from Handler1, expecting %v", reply, wanted)
}
}
n := rn.GetCount(99)
if n != 17 {
t.Fatalf("wrong GetCount() %v, expected 17\n", n)
}
}
//
// test net.GetTotalBytes()
//
func TestBytes(t *testing.T) {
runtime.GOMAXPROCS(4)
rn := MakeNetwork()
defer rn.Cleanup()
e := rn.MakeEnd("end1-99")
js := &JunkServer{}
svc := MakeService(js)
rs := MakeServer()
rs.AddService(svc)
rn.AddServer(99, rs)
rn.Connect("end1-99", 99)
rn.Enable("end1-99", true)
for i := 0; i < 17; i++ {
args := "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
args = args + args
args = args + args
reply := 0
e.Call("JunkServer.Handler6", args, &reply)
wanted := len(args)
if reply != wanted {
t.Fatalf("wrong reply %v from Handler6, expecting %v", reply, wanted)
}
}
n := rn.GetTotalBytes()
if n < 4828 || n > 6000 {
t.Fatalf("wrong GetTotalBytes() %v, expected about 5000\n", n)
}
for i := 0; i < 17; i++ {
args := 107
reply := ""
e.Call("JunkServer.Handler7", args, &reply)
wanted := args
if len(reply) != wanted {
t.Fatalf("wrong reply len=%v from Handler6, expecting %v", len(reply), wanted)
}
}
nn := rn.GetTotalBytes() - n
if nn < 1800 || nn > 2500 {
t.Fatalf("wrong GetTotalBytes() %v, expected about 2000\n", nn)
}
}
//
// test RPCs from concurrent ClientEnds
//
func TestConcurrentMany(t *testing.T) {
runtime.GOMAXPROCS(4)
rn := MakeNetwork()
defer rn.Cleanup()
js := &JunkServer{}
svc := MakeService(js)
rs := MakeServer()
rs.AddService(svc)
rn.AddServer(1000, rs)
ch := make(chan int)
nclients := 20
nrpcs := 10
for ii := 0; ii < nclients; ii++ {
go func(i int) {
n := 0
defer func() { ch <- n }()
e := rn.MakeEnd(i)
rn.Connect(i, 1000)
rn.Enable(i, true)
for j := 0; j < nrpcs; j++ {
arg := i*100 + j
reply := ""
e.Call("JunkServer.Handler2", arg, &reply)
wanted := "handler2-" + strconv.Itoa(arg)
if reply != wanted {
t.Fatalf("wrong reply %v from Handler1, expecting %v", reply, wanted)
}
n += 1
}
}(ii)
}
total := 0
for ii := 0; ii < nclients; ii++ {
x := <-ch
total += x
}
if total != nclients*nrpcs {
t.Fatalf("wrong number of RPCs completed, got %v, expected %v", total, nclients*nrpcs)
}
n := rn.GetCount(1000)
if n != total {
t.Fatalf("wrong GetCount() %v, expected %v\n", n, total)
}
}
//
// test unreliable
//
func TestUnreliable(t *testing.T) {
runtime.GOMAXPROCS(4)
rn := MakeNetwork()
defer rn.Cleanup()
rn.Reliable(false)
js := &JunkServer{}
svc := MakeService(js)
rs := MakeServer()
rs.AddService(svc)
rn.AddServer(1000, rs)
ch := make(chan int)
nclients := 300
for ii := 0; ii < nclients; ii++ {
go func(i int) {
n := 0
defer func() { ch <- n }()
e := rn.MakeEnd(i)
rn.Connect(i, 1000)
rn.Enable(i, true)
arg := i * 100
reply := ""
ok := e.Call("JunkServer.Handler2", arg, &reply)
if ok {
wanted := "handler2-" + strconv.Itoa(arg)
if reply != wanted {
t.Fatalf("wrong reply %v from Handler1, expecting %v", reply, wanted)
}
n += 1
}
}(ii)
}
total := 0
for ii := 0; ii < nclients; ii++ {
x := <-ch
total += x
}
if total == nclients || total == 0 {
t.Fatalf("all RPCs succeeded despite unreliable")
}
}
//
// test concurrent RPCs from a single ClientEnd
//
func TestConcurrentOne(t *testing.T) {
runtime.GOMAXPROCS(4)
rn := MakeNetwork()
defer rn.Cleanup()
js := &JunkServer{}
svc := MakeService(js)
rs := MakeServer()
rs.AddService(svc)
rn.AddServer(1000, rs)
e := rn.MakeEnd("c")
rn.Connect("c", 1000)
rn.Enable("c", true)
ch := make(chan int)
nrpcs := 20
for ii := 0; ii < nrpcs; ii++ {
go func(i int) {
n := 0
defer func() { ch <- n }()
arg := 100 + i
reply := ""
e.Call("JunkServer.Handler2", arg, &reply)
wanted := "handler2-" + strconv.Itoa(arg)
if reply != wanted {
t.Fatalf("wrong reply %v from Handler2, expecting %v", reply, wanted)
}
n += 1
}(ii)
}
total := 0
for ii := 0; ii < nrpcs; ii++ {
x := <-ch
total += x
}
if total != nrpcs {
t.Fatalf("wrong number of RPCs completed, got %v, expected %v", total, nrpcs)
}
js.mu.Lock()
defer js.mu.Unlock()
if len(js.log2) != nrpcs {
t.Fatalf("wrong number of RPCs delivered")
}
n := rn.GetCount(1000)
if n != total {
t.Fatalf("wrong GetCount() %v, expected %v\n", n, total)
}
}
//
// regression: an RPC that's delayed during Enabled=false
// should not delay subsequent RPCs (e.g. after Enabled=true).
//
func TestRegression1(t *testing.T) {
runtime.GOMAXPROCS(4)
rn := MakeNetwork()
defer rn.Cleanup()
js := &JunkServer{}
svc := MakeService(js)
rs := MakeServer()
rs.AddService(svc)
rn.AddServer(1000, rs)
e := rn.MakeEnd("c")
rn.Connect("c", 1000)
// start some RPCs while the ClientEnd is disabled.
// they'll be delayed.
rn.Enable("c", false)
ch := make(chan bool)
nrpcs := 20
for ii := 0; ii < nrpcs; ii++ {
go func(i int) {
ok := false
defer func() { ch <- ok }()
arg := 100 + i
reply := ""
// this call ought to return false.
e.Call("JunkServer.Handler2", arg, &reply)
ok = true
}(ii)
}
time.Sleep(100 * time.Millisecond)
// now enable the ClientEnd and check that an RPC completes quickly.
t0 := time.Now()
rn.Enable("c", true)
{
arg := 99
reply := ""
e.Call("JunkServer.Handler2", arg, &reply)
wanted := "handler2-" + strconv.Itoa(arg)
if reply != wanted {
t.Fatalf("wrong reply %v from Handler2, expecting %v", reply, wanted)
}
}
dur := time.Since(t0).Seconds()
if dur > 0.03 {
t.Fatalf("RPC took too long (%v) after Enable", dur)
}
for ii := 0; ii < nrpcs; ii++ {
<-ch
}
js.mu.Lock()
defer js.mu.Unlock()
if len(js.log2) != 1 {
t.Fatalf("wrong number (%v) of RPCs delivered, expected 1", len(js.log2))
}
n := rn.GetCount(1000)
if n != 1 {
t.Fatalf("wrong GetCount() %v, expected %v\n", n, 1)
}
}
//
// if an RPC is stuck in a server, and the server
// is killed with DeleteServer(), does the RPC
// get un-stuck?
//
func TestKilled(t *testing.T) {
runtime.GOMAXPROCS(4)
rn := MakeNetwork()
defer rn.Cleanup()
e := rn.MakeEnd("end1-99")
js := &JunkServer{}
svc := MakeService(js)
rs := MakeServer()
rs.AddService(svc)
rn.AddServer("server99", rs)
rn.Connect("end1-99", "server99")
rn.Enable("end1-99", true)
doneCh := make(chan bool)
go func() {
reply := 0
ok := e.Call("JunkServer.Handler3", 99, &reply)
doneCh <- ok
}()
time.Sleep(1000 * time.Millisecond)
select {
case <-doneCh:
t.Fatalf("Handler3 should not have returned yet")
case <-time.After(100 * time.Millisecond):
}
rn.DeleteServer("server99")
select {
case x := <-doneCh:
if x != false {
t.Fatalf("Handler3 returned successfully despite DeleteServer()")
}
case <-time.After(100 * time.Millisecond):
t.Fatalf("Handler3 should return after DeleteServer()")
}
}
func TestBenchmark(t *testing.T) {
runtime.GOMAXPROCS(4)
rn := MakeNetwork()
defer rn.Cleanup()
e := rn.MakeEnd("end1-99")
js := &JunkServer{}
svc := MakeService(js)
rs := MakeServer()
rs.AddService(svc)
rn.AddServer("server99", rs)
rn.Connect("end1-99", "server99")
rn.Enable("end1-99", true)
t0 := time.Now()
n := 100000
for iters := 0; iters < n; iters++ {
reply := ""
e.Call("JunkServer.Handler2", 111, &reply)
if reply != "handler2-111" {
t.Fatalf("wrong reply from Handler2")
}
}
fmt.Printf("%v for %v\n", time.Since(t0), n)
// march 2016, rtm laptop, 22 microseconds per RPC
}

74
src/main/diskvd.go Normal file
View File

@ -0,0 +1,74 @@
package main
//
// start a diskvd server. it's a member of some replica
// group, which has other members, and it needs to know
// how to talk to the members of the shardmaster service.
// used by ../diskv/test_test.go
//
// arguments:
// -g groupid
// -m masterport1 -m masterport2 ...
// -s replicaport1 -s replicaport2 ...
// -i my-index-in-server-port-list
// -u unreliable
// -d directory
// -r restart
import "time"
import "6.824/diskv"
import "os"
import "fmt"
import "strconv"
import "runtime"
func usage() {
fmt.Printf("Usage: diskvd -g gid -m master... -s server... -i my-index -d dir\n")
os.Exit(1)
}
func main() {
var gid int64 = -1 // my replica group ID
masters := []string{} // ports of shardmasters
replicas := []string{} // ports of servers in my replica group
me := -1 // my index in replicas[]
unreliable := false
dir := "" // store persistent data here
restart := false
for i := 1; i+1 < len(os.Args); i += 2 {
a0 := os.Args[i]
a1 := os.Args[i+1]
if a0 == "-g" {
gid, _ = strconv.ParseInt(a1, 10, 64)
} else if a0 == "-m" {
masters = append(masters, a1)
} else if a0 == "-s" {
replicas = append(replicas, a1)
} else if a0 == "-i" {
me, _ = strconv.Atoi(a1)
} else if a0 == "-u" {
unreliable, _ = strconv.ParseBool(a1)
} else if a0 == "-d" {
dir = a1
} else if a0 == "-r" {
restart, _ = strconv.ParseBool(a1)
} else {
usage()
}
}
if gid < 0 || me < 0 || len(masters) < 1 || me >= len(replicas) || dir == "" {
usage()
}
runtime.GOMAXPROCS(4)
srv := diskv.StartServer(gid, masters, replicas, me, dir, restart)
srv.Setunreliable(unreliable)
// for safety, force quit after 10 minutes.
time.Sleep(10 * 60 * time.Second)
mep, _ := os.FindProcess(os.Getpid())
mep.Kill()
}

31
src/main/lockc.go Normal file
View File

@ -0,0 +1,31 @@
package main
//
// see comments in lockd.go
//
import "6.824/lockservice"
import "os"
import "fmt"
func usage() {
fmt.Printf("Usage: lockc -l|-u primaryport backupport lockname\n")
os.Exit(1)
}
func main() {
if len(os.Args) == 5 {
ck := lockservice.MakeClerk(os.Args[2], os.Args[3])
var ok bool
if os.Args[1] == "-l" {
ok = ck.Lock(os.Args[4])
} else if os.Args[1] == "-u" {
ok = ck.Unlock(os.Args[4])
} else {
usage()
}
fmt.Printf("reply: %v\n", ok)
} else {
usage()
}
}

31
src/main/lockd.go Normal file
View File

@ -0,0 +1,31 @@
package main
// export GOPATH=~/6.824
// go build lockd.go
// go build lockc.go
// ./lockd -p a b &
// ./lockd -b a b &
// ./lockc -l a b lx
// ./lockc -u a b lx
//
// on Athena, use /tmp/myname-a and /tmp/myname-b
// instead of a and b.
import "time"
import "6.824/lockservice"
import "os"
import "fmt"
func main() {
if len(os.Args) == 4 && os.Args[1] == "-p" {
lockservice.StartServer(os.Args[2], os.Args[3], true)
} else if len(os.Args) == 4 && os.Args[1] == "-b" {
lockservice.StartServer(os.Args[2], os.Args[3], false)
} else {
fmt.Printf("Usage: lockd -p|-b primaryport backupport\n")
os.Exit(1)
}
for {
time.Sleep(100 * time.Second)
}
}

29
src/main/mrcoordinator.go Normal file
View File

@ -0,0 +1,29 @@
package main
//
// start the coordinator process, which is implemented
// in ../mr/coordinator.go
//
// go run mrcoordinator.go pg*.txt
//
// Please do not change this file.
//
import "6.824/mr"
import "time"
import "os"
import "fmt"
func main() {
if len(os.Args) < 2 {
fmt.Fprintf(os.Stderr, "Usage: mrcoordinator inputfiles...\n")
os.Exit(1)
}
m := mr.MakeCoordinator(os.Args[1:], 10)
for m.Done() == false {
time.Sleep(time.Second)
}
time.Sleep(time.Second)
}

110
src/main/mrsequential.go Normal file
View File

@ -0,0 +1,110 @@
package main
//
// simple sequential MapReduce.
//
// go run mrsequential.go wc.so pg*.txt
//
import "fmt"
import "6.824/mr"
import "plugin"
import "os"
import "log"
import "io/ioutil"
import "sort"
// for sorting by key.
type ByKey []mr.KeyValue
// for sorting by key.
func (a ByKey) Len() int { return len(a) }
func (a ByKey) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a ByKey) Less(i, j int) bool { return a[i].Key < a[j].Key }
func main() {
if len(os.Args) < 3 {
fmt.Fprintf(os.Stderr, "Usage: mrsequential xxx.so inputfiles...\n")
os.Exit(1)
}
mapf, reducef := loadPlugin(os.Args[1])
//
// read each input file,
// pass it to Map,
// accumulate the intermediate Map output.
//
intermediate := []mr.KeyValue{}
for _, filename := range os.Args[2:] {
file, err := os.Open(filename)
if err != nil {
log.Fatalf("cannot open %v", filename)
}
content, err := ioutil.ReadAll(file)
if err != nil {
log.Fatalf("cannot read %v", filename)
}
file.Close()
kva := mapf(filename, string(content))
intermediate = append(intermediate, kva...)
}
//
// a big difference from real MapReduce is that all the
// intermediate data is in one place, intermediate[],
// rather than being partitioned into NxM buckets.
//
sort.Sort(ByKey(intermediate))
oname := "mr-out-0"
ofile, _ := os.Create(oname)
//
// call Reduce on each distinct key in intermediate[],
// and print the result to mr-out-0.
//
i := 0
for i < len(intermediate) {
j := i + 1
for j < len(intermediate) && intermediate[j].Key == intermediate[i].Key {
j++
}
values := []string{}
for k := i; k < j; k++ {
values = append(values, intermediate[k].Value)
}
output := reducef(intermediate[i].Key, values)
// this is the correct format for each line of Reduce output.
fmt.Fprintf(ofile, "%v %v\n", intermediate[i].Key, output)
i = j
}
ofile.Close()
}
//
// load the application Map and Reduce functions
// from a plugin file, e.g. ../mrapps/wc.so
//
func loadPlugin(filename string) (func(string, string) []mr.KeyValue, func(string, []string) string) {
p, err := plugin.Open(filename)
if err != nil {
log.Fatalf("cannot load plugin %v", filename)
}
xmapf, err := p.Lookup("Map")
if err != nil {
log.Fatalf("cannot find Map in %v", filename)
}
mapf := xmapf.(func(string, string) []mr.KeyValue)
xreducef, err := p.Lookup("Reduce")
if err != nil {
log.Fatalf("cannot find Reduce in %v", filename)
}
reducef := xreducef.(func(string, []string) string)
return mapf, reducef
}

51
src/main/mrworker.go Normal file
View File

@ -0,0 +1,51 @@
package main
//
// start a worker process, which is implemented
// in ../mr/worker.go. typically there will be
// multiple worker processes, talking to one coordinator.
//
// go run mrworker.go wc.so
//
// Please do not change this file.
//
import "6.824/mr"
import "plugin"
import "os"
import "fmt"
import "log"
func main() {
if len(os.Args) != 2 {
fmt.Fprintf(os.Stderr, "Usage: mrworker xxx.so\n")
os.Exit(1)
}
mapf, reducef := loadPlugin(os.Args[1])
mr.Worker(mapf, reducef)
}
//
// load the application Map and Reduce functions
// from a plugin file, e.g. ../mrapps/wc.so
//
func loadPlugin(filename string) (func(string, string) []mr.KeyValue, func(string, []string) string) {
p, err := plugin.Open(filename)
if err != nil {
log.Fatalf("cannot load plugin %v", filename)
}
xmapf, err := p.Lookup("Map")
if err != nil {
log.Fatalf("cannot find Map in %v", filename)
}
mapf := xmapf.(func(string, string) []mr.KeyValue)
xreducef, err := p.Lookup("Reduce")
if err != nil {
log.Fatalf("cannot find Reduce in %v", filename)
}
reducef := xreducef.(func(string, []string) string)
return mapf, reducef
}

44
src/main/pbc.go Normal file
View File

@ -0,0 +1,44 @@
package main
//
// pbservice client application
//
// export GOPATH=~/6.824
// go build viewd.go
// go build pbd.go
// go build pbc.go
// ./viewd /tmp/rtm-v &
// ./pbd /tmp/rtm-v /tmp/rtm-1 &
// ./pbd /tmp/rtm-v /tmp/rtm-2 &
// ./pbc /tmp/rtm-v key1 value1
// ./pbc /tmp/rtm-v key1
//
// change "rtm" to your user name.
// start the pbd programs in separate windows and kill
// and restart them to exercise fault tolerance.
//
import "6.824/pbservice"
import "os"
import "fmt"
func usage() {
fmt.Printf("Usage: pbc viewport key\n")
fmt.Printf(" pbc viewport key value\n")
os.Exit(1)
}
func main() {
if len(os.Args) == 3 {
// get
ck := pbservice.MakeClerk(os.Args[1], "")
v := ck.Get(os.Args[2])
fmt.Printf("%v\n", v)
} else if len(os.Args) == 4 {
// put
ck := pbservice.MakeClerk(os.Args[1], "")
ck.Put(os.Args[2], os.Args[3])
} else {
usage()
}
}

23
src/main/pbd.go Normal file
View File

@ -0,0 +1,23 @@
package main
//
// see directions in pbc.go
//
import "time"
import "6.824/pbservice"
import "os"
import "fmt"
func main() {
if len(os.Args) != 3 {
fmt.Printf("Usage: pbd viewport myport\n")
os.Exit(1)
}
pbservice.StartServer(os.Args[1], os.Args[2])
for {
time.Sleep(100 * time.Second)
}
}

3495
src/main/pg-being_ernest.txt Normal file

File diff suppressed because it is too large Load Diff

8904
src/main/pg-dorian_gray.txt Normal file

File diff suppressed because it is too large Load Diff

7653
src/main/pg-frankenstein.txt Normal file

File diff suppressed because it is too large Load Diff

9569
src/main/pg-grimm.txt Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

9206
src/main/pg-tom_sawyer.txt Normal file

File diff suppressed because it is too large Load Diff

23
src/main/test-mr-many.sh Normal file
View File

@ -0,0 +1,23 @@
#!/usr/bin/env bash
if [ $# -ne 1 ]; then
echo "Usage: $0 numTrials"
exit 1
fi
trap 'kill -INT -$pid; exit 1' INT
# Note: because the socketID is based on the current userID,
# ./test-mr.sh cannot be run in parallel
runs=$1
chmod +x test-mr.sh
for i in $(seq 1 $runs); do
timeout -k 2s 900s ./test-mr.sh &
pid=$!
if ! wait $pid; then
echo '***' FAILED TESTS IN TRIAL $i
exit 1
fi
done
echo '***' PASSED ALL $i TESTING TRIALS

278
src/main/test-mr.sh Normal file
View File

@ -0,0 +1,278 @@
#!/usr/bin/env bash
#
# basic map-reduce test
#
#RACE=
# comment this to run the tests without the Go race detector.
RACE=-race
# run the test in a fresh sub-directory.
rm -rf mr-tmp
mkdir mr-tmp || exit 1
cd mr-tmp || exit 1
rm -f mr-*
# make sure software is freshly built.
(cd ../../mrapps && go build $RACE -buildmode=plugin wc.go) || exit 1
(cd ../../mrapps && go build $RACE -buildmode=plugin indexer.go) || exit 1
(cd ../../mrapps && go build $RACE -buildmode=plugin mtiming.go) || exit 1
(cd ../../mrapps && go build $RACE -buildmode=plugin rtiming.go) || exit 1
(cd ../../mrapps && go build $RACE -buildmode=plugin jobcount.go) || exit 1
(cd ../../mrapps && go build $RACE -buildmode=plugin early_exit.go) || exit 1
(cd ../../mrapps && go build $RACE -buildmode=plugin crash.go) || exit 1
(cd ../../mrapps && go build $RACE -buildmode=plugin nocrash.go) || exit 1
(cd .. && go build $RACE mrcoordinator.go) || exit 1
(cd .. && go build $RACE mrworker.go) || exit 1
(cd .. && go build $RACE mrsequential.go) || exit 1
failed_any=0
#########################################################
# first word-count
# generate the correct output
../mrsequential ../../mrapps/wc.so ../pg*txt || exit 1
sort mr-out-0 > mr-correct-wc.txt
rm -f mr-out*
echo '***' Starting wc test.
timeout -k 2s 180s ../mrcoordinator ../pg*txt &
pid=$!
# give the coordinator time to create the sockets.
sleep 1
# start multiple workers.
timeout -k 2s 180s ../mrworker ../../mrapps/wc.so &
timeout -k 2s 180s ../mrworker ../../mrapps/wc.so &
timeout -k 2s 180s ../mrworker ../../mrapps/wc.so &
# wait for the coordinator to exit.
wait $pid
# since workers are required to exit when a job is completely finished,
# and not before, that means the job has finished.
sort mr-out* | grep . > mr-wc-all
if cmp mr-wc-all mr-correct-wc.txt
then
echo '---' wc test: PASS
else
echo '---' wc output is not the same as mr-correct-wc.txt
echo '---' wc test: FAIL
failed_any=1
fi
# wait for remaining workers and coordinator to exit.
wait
#########################################################
# now indexer
rm -f mr-*
# generate the correct output
../mrsequential ../../mrapps/indexer.so ../pg*txt || exit 1
sort mr-out-0 > mr-correct-indexer.txt
rm -f mr-out*
echo '***' Starting indexer test.
timeout -k 2s 180s ../mrcoordinator ../pg*txt &
sleep 1
# start multiple workers
timeout -k 2s 180s ../mrworker ../../mrapps/indexer.so &
timeout -k 2s 180s ../mrworker ../../mrapps/indexer.so
sort mr-out* | grep . > mr-indexer-all
if cmp mr-indexer-all mr-correct-indexer.txt
then
echo '---' indexer test: PASS
else
echo '---' indexer output is not the same as mr-correct-indexer.txt
echo '---' indexer test: FAIL
failed_any=1
fi
wait
#########################################################
echo '***' Starting map parallelism test.
rm -f mr-*
timeout -k 2s 180s ../mrcoordinator ../pg*txt &
sleep 1
timeout -k 2s 180s ../mrworker ../../mrapps/mtiming.so &
timeout -k 2s 180s ../mrworker ../../mrapps/mtiming.so
NT=`cat mr-out* | grep '^times-' | wc -l | sed 's/ //g'`
if [ "$NT" != "2" ]
then
echo '---' saw "$NT" workers rather than 2
echo '---' map parallelism test: FAIL
failed_any=1
fi
if cat mr-out* | grep '^parallel.* 2' > /dev/null
then
echo '---' map parallelism test: PASS
else
echo '---' map workers did not run in parallel
echo '---' map parallelism test: FAIL
failed_any=1
fi
wait
#########################################################
echo '***' Starting reduce parallelism test.
rm -f mr-*
timeout -k 2s 180s ../mrcoordinator ../pg*txt &
sleep 1
timeout -k 2s 180s ../mrworker ../../mrapps/rtiming.so &
timeout -k 2s 180s ../mrworker ../../mrapps/rtiming.so
NT=`cat mr-out* | grep '^[a-z] 2' | wc -l | sed 's/ //g'`
if [ "$NT" -lt "2" ]
then
echo '---' too few parallel reduces.
echo '---' reduce parallelism test: FAIL
failed_any=1
else
echo '---' reduce parallelism test: PASS
fi
wait
#########################################################
echo '***' Starting job count test.
rm -f mr-*
timeout -k 2s 180s ../mrcoordinator ../pg*txt &
sleep 1
timeout -k 2s 180s ../mrworker ../../mrapps/jobcount.so &
timeout -k 2s 180s ../mrworker ../../mrapps/jobcount.so
timeout -k 2s 180s ../mrworker ../../mrapps/jobcount.so &
timeout -k 2s 180s ../mrworker ../../mrapps/jobcount.so
NT=`cat mr-out* | awk '{print $2}'`
if [ "$NT" -ne "8" ]
then
echo '---' map jobs ran incorrect number of times "($NT != 8)"
echo '---' job count test: FAIL
failed_any=1
else
echo '---' job count test: PASS
fi
wait
#########################################################
# test whether any worker or coordinator exits before the
# task has completed (i.e., all output files have been finalized)
rm -f mr-*
echo '***' Starting early exit test.
timeout -k 2s 180s ../mrcoordinator ../pg*txt &
# give the coordinator time to create the sockets.
sleep 1
# start multiple workers.
timeout -k 2s 180s ../mrworker ../../mrapps/early_exit.so &
timeout -k 2s 180s ../mrworker ../../mrapps/early_exit.so &
timeout -k 2s 180s ../mrworker ../../mrapps/early_exit.so &
# wait for any of the coord or workers to exit
# `jobs` ensures that any completed old processes from other tests
# are not waited upon
jobs &> /dev/null
wait -n
# a process has exited. this means that the output should be finalized
# otherwise, either a worker or the coordinator exited early
sort mr-out* | grep . > mr-wc-all-initial
# wait for remaining workers and coordinator to exit.
wait
# compare initial and final outputs
sort mr-out* | grep . > mr-wc-all-final
if cmp mr-wc-all-final mr-wc-all-initial
then
echo '---' early exit test: PASS
else
echo '---' output changed after first worker exited
echo '---' early exit test: FAIL
failed_any=1
fi
rm -f mr-*
#########################################################
echo '***' Starting crash test.
# generate the correct output
../mrsequential ../../mrapps/nocrash.so ../pg*txt || exit 1
sort mr-out-0 > mr-correct-crash.txt
rm -f mr-out*
rm -f mr-done
(timeout -k 2s 180s ../mrcoordinator ../pg*txt ; touch mr-done ) &
sleep 1
# start multiple workers
timeout -k 2s 180s ../mrworker ../../mrapps/crash.so &
# mimic rpc.go's coordinatorSock()
SOCKNAME=/var/tmp/824-mr-`id -u`
( while [ -e $SOCKNAME -a ! -f mr-done ]
do
timeout -k 2s 180s ../mrworker ../../mrapps/crash.so
sleep 1
done ) &
( while [ -e $SOCKNAME -a ! -f mr-done ]
do
timeout -k 2s 180s ../mrworker ../../mrapps/crash.so
sleep 1
done ) &
while [ -e $SOCKNAME -a ! -f mr-done ]
do
timeout -k 2s 180s ../mrworker ../../mrapps/crash.so
sleep 1
done
wait
rm $SOCKNAME
sort mr-out* | grep . > mr-crash-all
if cmp mr-crash-all mr-correct-crash.txt
then
echo '---' crash test: PASS
else
echo '---' crash output is not the same as mr-correct-crash.txt
echo '---' crash test: FAIL
failed_any=1
fi
#########################################################
if [ $failed_any -eq 0 ]; then
echo '***' PASSED ALL TESTS
else
echo '***' FAILED SOME TESTS
exit 1
fi

23
src/main/viewd.go Normal file
View File

@ -0,0 +1,23 @@
package main
//
// see directions in pbc.go
//
import "time"
import "6.824/viewservice"
import "os"
import "fmt"
func main() {
if len(os.Args) != 2 {
fmt.Printf("Usage: viewd port\n")
os.Exit(1)
}
viewservice.StartServer(os.Args[1])
for {
time.Sleep(100 * time.Second)
}
}

69
src/models/kv.go Normal file
View File

@ -0,0 +1,69 @@
package models
import "6.824/porcupine"
import "fmt"
import "sort"
type KvInput struct {
Op uint8 // 0 => get, 1 => put, 2 => append
Key string
Value string
}
type KvOutput struct {
Value string
}
var KvModel = porcupine.Model{
Partition: func(history []porcupine.Operation) [][]porcupine.Operation {
m := make(map[string][]porcupine.Operation)
for _, v := range history {
key := v.Input.(KvInput).Key
m[key] = append(m[key], v)
}
keys := make([]string, 0, len(m))
for k := range m {
keys = append(keys, k)
}
sort.Strings(keys)
ret := make([][]porcupine.Operation, 0, len(keys))
for _, k := range keys {
ret = append(ret, m[k])
}
return ret
},
Init: func() interface{} {
// note: we are modeling a single key's value here;
// we're partitioning by key, so this is okay
return ""
},
Step: func(state, input, output interface{}) (bool, interface{}) {
inp := input.(KvInput)
out := output.(KvOutput)
st := state.(string)
if inp.Op == 0 {
// get
return out.Value == st, state
} else if inp.Op == 1 {
// put
return true, inp.Value
} else {
// append
return true, (st + inp.Value)
}
},
DescribeOperation: func(input, output interface{}) string {
inp := input.(KvInput)
out := output.(KvOutput)
switch inp.Op {
case 0:
return fmt.Sprintf("get('%s') -> '%s'", inp.Key, out.Value)
case 1:
return fmt.Sprintf("put('%s', '%s')", inp.Key, inp.Value)
case 2:
return fmt.Sprintf("append('%s', '%s')", inp.Key, inp.Value)
default:
return "<invalid>"
}
},
}

70
src/mr/coordinator.go Normal file
View File

@ -0,0 +1,70 @@
package mr
import "log"
import "net"
import "os"
import "net/rpc"
import "net/http"
type Coordinator struct {
// Your definitions here.
}
// Your code here -- RPC handlers for the worker to call.
//
// an example RPC handler.
//
// the RPC argument and reply types are defined in rpc.go.
//
func (c *Coordinator) Example(args *ExampleArgs, reply *ExampleReply) error {
reply.Y = args.X + 1
return nil
}
//
// start a thread that listens for RPCs from worker.go
//
func (c *Coordinator) server() {
rpc.Register(c)
rpc.HandleHTTP()
//l, e := net.Listen("tcp", ":1234")
sockname := coordinatorSock()
os.Remove(sockname)
l, e := net.Listen("unix", sockname)
if e != nil {
log.Fatal("listen error:", e)
}
go http.Serve(l, nil)
}
//
// main/mrcoordinator.go calls Done() periodically to find out
// if the entire job has finished.
//
func (c *Coordinator) Done() bool {
ret := false
// Your code here.
return ret
}
//
// create a Coordinator.
// main/mrcoordinator.go calls this function.
// nReduce is the number of reduce tasks to use.
//
func MakeCoordinator(files []string, nReduce int) *Coordinator {
c := Coordinator{}
// Your code here.
c.server()
return &c
}

36
src/mr/rpc.go Normal file
View File

@ -0,0 +1,36 @@
package mr
//
// RPC definitions.
//
// remember to capitalize all names.
//
import "os"
import "strconv"
//
// example to show how to declare the arguments
// and reply for an RPC.
//
type ExampleArgs struct {
X int
}
type ExampleReply struct {
Y int
}
// Add your RPC definitions here.
// Cook up a unique-ish UNIX-domain socket name
// in /var/tmp, for the coordinator.
// Can't use the current directory since
// Athena AFS doesn't support UNIX-domain sockets.
func coordinatorSock() string {
s := "/var/tmp/824-mr-"
s += strconv.Itoa(os.Getuid())
return s
}

85
src/mr/worker.go Normal file
View File

@ -0,0 +1,85 @@
package mr
import "fmt"
import "log"
import "net/rpc"
import "hash/fnv"
//
// Map functions return a slice of KeyValue.
//
type KeyValue struct {
Key string
Value string
}
//
// use ihash(key) % NReduce to choose the reduce
// task number for each KeyValue emitted by Map.
//
func ihash(key string) int {
h := fnv.New32a()
h.Write([]byte(key))
return int(h.Sum32() & 0x7fffffff)
}
//
// main/mrworker.go calls this function.
//
func Worker(mapf func(string, string) []KeyValue,
reducef func(string, []string) string) {
// Your worker implementation here.
// uncomment to send the Example RPC to the coordinator.
// CallExample()
}
//
// example function to show how to make an RPC call to the coordinator.
//
// the RPC argument and reply types are defined in rpc.go.
//
func CallExample() {
// declare an argument structure.
args := ExampleArgs{}
// fill in the argument(s).
args.X = 99
// declare a reply structure.
reply := ExampleReply{}
// send the RPC request, wait for the reply.
call("Coordinator.Example", &args, &reply)
// reply.Y should be 100.
fmt.Printf("reply.Y %v\n", reply.Y)
}
//
// send an RPC request to the coordinator, wait for the response.
// usually returns true.
// returns false if something goes wrong.
//
func call(rpcname string, args interface{}, reply interface{}) bool {
// c, err := rpc.DialHTTP("tcp", "127.0.0.1"+":1234")
sockname := coordinatorSock()
c, err := rpc.DialHTTP("unix", sockname)
if err != nil {
log.Fatal("dialing:", err)
}
defer c.Close()
err = c.Call(rpcname, args, reply)
if err == nil {
return true
}
fmt.Println(err)
return false
}

55
src/mrapps/crash.go Normal file
View File

@ -0,0 +1,55 @@
package main
//
// a MapReduce pseudo-application that sometimes crashes,
// and sometimes takes a long time,
// to test MapReduce's ability to recover.
//
// go build -buildmode=plugin crash.go
//
import "6.824/mr"
import crand "crypto/rand"
import "math/big"
import "strings"
import "os"
import "sort"
import "strconv"
import "time"
func maybeCrash() {
max := big.NewInt(1000)
rr, _ := crand.Int(crand.Reader, max)
if rr.Int64() < 330 {
// crash!
os.Exit(1)
} else if rr.Int64() < 660 {
// delay for a while.
maxms := big.NewInt(10 * 1000)
ms, _ := crand.Int(crand.Reader, maxms)
time.Sleep(time.Duration(ms.Int64()) * time.Millisecond)
}
}
func Map(filename string, contents string) []mr.KeyValue {
maybeCrash()
kva := []mr.KeyValue{}
kva = append(kva, mr.KeyValue{"a", filename})
kva = append(kva, mr.KeyValue{"b", strconv.Itoa(len(filename))})
kva = append(kva, mr.KeyValue{"c", strconv.Itoa(len(contents))})
kva = append(kva, mr.KeyValue{"d", "xyzzy"})
return kva
}
func Reduce(key string, values []string) string {
maybeCrash()
// sort values to ensure deterministic output.
vv := make([]string, len(values))
copy(vv, values)
sort.Strings(vv)
val := strings.Join(vv, " ")
return val
}

40
src/mrapps/early_exit.go Normal file
View File

@ -0,0 +1,40 @@
package main
//
// a word-count application "plugin" for MapReduce.
//
// go build -buildmode=plugin wc_long.go
//
import (
"strconv"
"strings"
"time"
"6.824/mr"
)
//
// The map function is called once for each file of input.
// This map function just returns 1 for each file
//
func Map(filename string, contents string) []mr.KeyValue {
kva := []mr.KeyValue{}
kva = append(kva, mr.KeyValue{filename, "1"})
return kva
}
//
// The reduce function is called once for each key generated by the
// map tasks, with a list of all the values created for that key by
// any map task.
//
func Reduce(key string, values []string) string {
// some reduce tasks sleep for a long time; potentially seeing if
// a worker will accidentally exit early
if strings.Contains(key, "sherlock") || strings.Contains(key, "tom") {
time.Sleep(time.Duration(3 * time.Second))
}
// return the number of occurrences of this file.
return strconv.Itoa(len(values))
}

39
src/mrapps/indexer.go Normal file
View File

@ -0,0 +1,39 @@
package main
//
// an indexing application "plugin" for MapReduce.
//
// go build -buildmode=plugin indexer.go
//
import "fmt"
import "6.824/mr"
import "strings"
import "unicode"
import "sort"
// The mapping function is called once for each piece of the input.
// In this framework, the key is the name of the file that is being processed,
// and the value is the file's contents. The return value should be a slice of
// key/value pairs, each represented by a mr.KeyValue.
func Map(document string, value string) (res []mr.KeyValue) {
m := make(map[string]bool)
words := strings.FieldsFunc(value, func(x rune) bool { return !unicode.IsLetter(x) })
for _, w := range words {
m[w] = true
}
for w := range m {
kv := mr.KeyValue{w, document}
res = append(res, kv)
}
return
}
// The reduce function is called once for each key generated by Map, with a
// list of that key's string value (merged across all inputs). The return value
// should be a single output value for that key.
func Reduce(key string, values []string) string {
sort.Strings(values)
return fmt.Sprintf("%d %s", len(values), strings.Join(values, ","))
}

46
src/mrapps/jobcount.go Normal file
View File

@ -0,0 +1,46 @@
package main
//
// a MapReduce pseudo-application that counts the number of times map/reduce
// tasks are run, to test whether jobs are assigned multiple times even when
// there is no failure.
//
// go build -buildmode=plugin crash.go
//
import "6.824/mr"
import "math/rand"
import "strings"
import "strconv"
import "time"
import "fmt"
import "os"
import "io/ioutil"
var count int
func Map(filename string, contents string) []mr.KeyValue {
me := os.Getpid()
f := fmt.Sprintf("mr-worker-jobcount-%d-%d", me, count)
count++
err := ioutil.WriteFile(f, []byte("x"), 0666)
if err != nil {
panic(err)
}
time.Sleep(time.Duration(2000+rand.Intn(3000)) * time.Millisecond)
return []mr.KeyValue{mr.KeyValue{"a", "x"}}
}
func Reduce(key string, values []string) string {
files, err := ioutil.ReadDir(".")
if err != nil {
panic(err)
}
invocations := 0
for _, f := range files {
if strings.HasPrefix(f.Name(), "mr-worker-jobcount") {
invocations++
}
}
return strconv.Itoa(invocations)
}

91
src/mrapps/mtiming.go Normal file
View File

@ -0,0 +1,91 @@
package main
//
// a MapReduce pseudo-application to test that workers
// execute map tasks in parallel.
//
// go build -buildmode=plugin mtiming.go
//
import "6.824/mr"
import "strings"
import "fmt"
import "os"
import "syscall"
import "time"
import "sort"
import "io/ioutil"
func nparallel(phase string) int {
// create a file so that other workers will see that
// we're running at the same time as them.
pid := os.Getpid()
myfilename := fmt.Sprintf("mr-worker-%s-%d", phase, pid)
err := ioutil.WriteFile(myfilename, []byte("x"), 0666)
if err != nil {
panic(err)
}
// are any other workers running?
// find their PIDs by scanning directory for mr-worker-XXX files.
dd, err := os.Open(".")
if err != nil {
panic(err)
}
names, err := dd.Readdirnames(1000000)
if err != nil {
panic(err)
}
ret := 0
for _, name := range names {
var xpid int
pat := fmt.Sprintf("mr-worker-%s-%%d", phase)
n, err := fmt.Sscanf(name, pat, &xpid)
if n == 1 && err == nil {
err := syscall.Kill(xpid, 0)
if err == nil {
// if err == nil, xpid is alive.
ret += 1
}
}
}
dd.Close()
time.Sleep(1 * time.Second)
err = os.Remove(myfilename)
if err != nil {
panic(err)
}
return ret
}
func Map(filename string, contents string) []mr.KeyValue {
t0 := time.Now()
ts := float64(t0.Unix()) + (float64(t0.Nanosecond()) / 1000000000.0)
pid := os.Getpid()
n := nparallel("map")
kva := []mr.KeyValue{}
kva = append(kva, mr.KeyValue{
fmt.Sprintf("times-%v", pid),
fmt.Sprintf("%.1f", ts)})
kva = append(kva, mr.KeyValue{
fmt.Sprintf("parallel-%v", pid),
fmt.Sprintf("%d", n)})
return kva
}
func Reduce(key string, values []string) string {
//n := nparallel("reduce")
// sort values to ensure deterministic output.
vv := make([]string, len(values))
copy(vv, values)
sort.Strings(vv)
val := strings.Join(vv, " ")
return val
}

47
src/mrapps/nocrash.go Normal file
View File

@ -0,0 +1,47 @@
package main
//
// same as crash.go but doesn't actually crash.
//
// go build -buildmode=plugin nocrash.go
//
import "6.824/mr"
import crand "crypto/rand"
import "math/big"
import "strings"
import "os"
import "sort"
import "strconv"
func maybeCrash() {
max := big.NewInt(1000)
rr, _ := crand.Int(crand.Reader, max)
if false && rr.Int64() < 500 {
// crash!
os.Exit(1)
}
}
func Map(filename string, contents string) []mr.KeyValue {
maybeCrash()
kva := []mr.KeyValue{}
kva = append(kva, mr.KeyValue{"a", filename})
kva = append(kva, mr.KeyValue{"b", strconv.Itoa(len(filename))})
kva = append(kva, mr.KeyValue{"c", strconv.Itoa(len(contents))})
kva = append(kva, mr.KeyValue{"d", "xyzzy"})
return kva
}
func Reduce(key string, values []string) string {
maybeCrash()
// sort values to ensure deterministic output.
vv := make([]string, len(values))
copy(vv, values)
sort.Strings(vv)
val := strings.Join(vv, " ")
return val
}

84
src/mrapps/rtiming.go Normal file
View File

@ -0,0 +1,84 @@
package main
//
// a MapReduce pseudo-application to test that workers
// execute reduce tasks in parallel.
//
// go build -buildmode=plugin rtiming.go
//
import "6.824/mr"
import "fmt"
import "os"
import "syscall"
import "time"
import "io/ioutil"
func nparallel(phase string) int {
// create a file so that other workers will see that
// we're running at the same time as them.
pid := os.Getpid()
myfilename := fmt.Sprintf("mr-worker-%s-%d", phase, pid)
err := ioutil.WriteFile(myfilename, []byte("x"), 0666)
if err != nil {
panic(err)
}
// are any other workers running?
// find their PIDs by scanning directory for mr-worker-XXX files.
dd, err := os.Open(".")
if err != nil {
panic(err)
}
names, err := dd.Readdirnames(1000000)
if err != nil {
panic(err)
}
ret := 0
for _, name := range names {
var xpid int
pat := fmt.Sprintf("mr-worker-%s-%%d", phase)
n, err := fmt.Sscanf(name, pat, &xpid)
if n == 1 && err == nil {
err := syscall.Kill(xpid, 0)
if err == nil {
// if err == nil, xpid is alive.
ret += 1
}
}
}
dd.Close()
time.Sleep(1 * time.Second)
err = os.Remove(myfilename)
if err != nil {
panic(err)
}
return ret
}
func Map(filename string, contents string) []mr.KeyValue {
kva := []mr.KeyValue{}
kva = append(kva, mr.KeyValue{"a", "1"})
kva = append(kva, mr.KeyValue{"b", "1"})
kva = append(kva, mr.KeyValue{"c", "1"})
kva = append(kva, mr.KeyValue{"d", "1"})
kva = append(kva, mr.KeyValue{"e", "1"})
kva = append(kva, mr.KeyValue{"f", "1"})
kva = append(kva, mr.KeyValue{"g", "1"})
kva = append(kva, mr.KeyValue{"h", "1"})
kva = append(kva, mr.KeyValue{"i", "1"})
kva = append(kva, mr.KeyValue{"j", "1"})
return kva
}
func Reduce(key string, values []string) string {
n := nparallel("reduce")
val := fmt.Sprintf("%d", n)
return val
}

44
src/mrapps/wc.go Normal file
View File

@ -0,0 +1,44 @@
package main
//
// a word-count application "plugin" for MapReduce.
//
// go build -buildmode=plugin wc.go
//
import "6.824/mr"
import "unicode"
import "strings"
import "strconv"
//
// The map function is called once for each file of input. The first
// argument is the name of the input file, and the second is the
// file's complete contents. You should ignore the input file name,
// and look only at the contents argument. The return value is a slice
// of key/value pairs.
//
func Map(filename string, contents string) []mr.KeyValue {
// function to detect word separators.
ff := func(r rune) bool { return !unicode.IsLetter(r) }
// split contents into an array of words.
words := strings.FieldsFunc(contents, ff)
kva := []mr.KeyValue{}
for _, w := range words {
kv := mr.KeyValue{w, "1"}
kva = append(kva, kv)
}
return kva
}
//
// The reduce function is called once for each key generated by the
// map tasks, with a list of all the values created for that key by
// any map task.
//
func Reduce(key string, values []string) string {
// return the number of occurrences of this word.
return strconv.Itoa(len(values))
}

72
src/porcupine/bitset.go Normal file
View File

@ -0,0 +1,72 @@
package porcupine
import "math/bits"
type bitset []uint64
// data layout:
// bits 0-63 are in data[0], the next are in data[1], etc.
func newBitset(bits uint) bitset {
extra := uint(0)
if bits%64 != 0 {
extra = 1
}
chunks := bits/64 + extra
return bitset(make([]uint64, chunks))
}
func (b bitset) clone() bitset {
dataCopy := make([]uint64, len(b))
copy(dataCopy, b)
return bitset(dataCopy)
}
func bitsetIndex(pos uint) (uint, uint) {
return pos / 64, pos % 64
}
func (b bitset) set(pos uint) bitset {
major, minor := bitsetIndex(pos)
b[major] |= (1 << minor)
return b
}
func (b bitset) clear(pos uint) bitset {
major, minor := bitsetIndex(pos)
b[major] &^= (1 << minor)
return b
}
func (b bitset) get(pos uint) bool {
major, minor := bitsetIndex(pos)
return b[major]&(1<<minor) != 0
}
func (b bitset) popcnt() uint {
total := 0
for _, v := range b {
total += bits.OnesCount64(v)
}
return uint(total)
}
func (b bitset) hash() uint64 {
hash := uint64(b.popcnt())
for _, v := range b {
hash ^= v
}
return hash
}
func (b bitset) equals(b2 bitset) bool {
if len(b) != len(b2) {
return false
}
for i := range b {
if b[i] != b2[i] {
return false
}
}
return true
}

373
src/porcupine/checker.go Normal file
View File

@ -0,0 +1,373 @@
package porcupine
import (
"sort"
"sync/atomic"
"time"
)
type entryKind bool
const (
callEntry entryKind = false
returnEntry = true
)
type entry struct {
kind entryKind
value interface{}
id int
time int64
clientId int
}
type linearizationInfo struct {
history [][]entry // for each partition, a list of entries
partialLinearizations [][][]int // for each partition, a set of histories (list of ids)
}
type byTime []entry
func (a byTime) Len() int {
return len(a)
}
func (a byTime) Swap(i, j int) {
a[i], a[j] = a[j], a[i]
}
func (a byTime) Less(i, j int) bool {
if a[i].time != a[j].time {
return a[i].time < a[j].time
}
// if the timestamps are the same, we need to make sure we order calls
// before returns
return a[i].kind == callEntry && a[j].kind == returnEntry
}
func makeEntries(history []Operation) []entry {
var entries []entry = nil
id := 0
for _, elem := range history {
entries = append(entries, entry{
callEntry, elem.Input, id, elem.Call, elem.ClientId})
entries = append(entries, entry{
returnEntry, elem.Output, id, elem.Return, elem.ClientId})
id++
}
sort.Sort(byTime(entries))
return entries
}
type node struct {
value interface{}
match *node // call if match is nil, otherwise return
id int
next *node
prev *node
}
func insertBefore(n *node, mark *node) *node {
if mark != nil {
beforeMark := mark.prev
mark.prev = n
n.next = mark
if beforeMark != nil {
n.prev = beforeMark
beforeMark.next = n
}
}
return n
}
func length(n *node) int {
l := 0
for n != nil {
n = n.next
l++
}
return l
}
func renumber(events []Event) []Event {
var e []Event
m := make(map[int]int) // renumbering
id := 0
for _, v := range events {
if r, ok := m[v.Id]; ok {
e = append(e, Event{v.ClientId, v.Kind, v.Value, r})
} else {
e = append(e, Event{v.ClientId, v.Kind, v.Value, id})
m[v.Id] = id
id++
}
}
return e
}
func convertEntries(events []Event) []entry {
var entries []entry
for i, elem := range events {
kind := callEntry
if elem.Kind == ReturnEvent {
kind = returnEntry
}
// use index as "time"
entries = append(entries, entry{kind, elem.Value, elem.Id, int64(i), elem.ClientId})
}
return entries
}
func makeLinkedEntries(entries []entry) *node {
var root *node = nil
match := make(map[int]*node)
for i := len(entries) - 1; i >= 0; i-- {
elem := entries[i]
if elem.kind == returnEntry {
entry := &node{value: elem.value, match: nil, id: elem.id}
match[elem.id] = entry
insertBefore(entry, root)
root = entry
} else {
entry := &node{value: elem.value, match: match[elem.id], id: elem.id}
insertBefore(entry, root)
root = entry
}
}
return root
}
type cacheEntry struct {
linearized bitset
state interface{}
}
func cacheContains(model Model, cache map[uint64][]cacheEntry, entry cacheEntry) bool {
for _, elem := range cache[entry.linearized.hash()] {
if entry.linearized.equals(elem.linearized) && model.Equal(entry.state, elem.state) {
return true
}
}
return false
}
type callsEntry struct {
entry *node
state interface{}
}
func lift(entry *node) {
entry.prev.next = entry.next
entry.next.prev = entry.prev
match := entry.match
match.prev.next = match.next
if match.next != nil {
match.next.prev = match.prev
}
}
func unlift(entry *node) {
match := entry.match
match.prev.next = match
if match.next != nil {
match.next.prev = match
}
entry.prev.next = entry
entry.next.prev = entry
}
func checkSingle(model Model, history []entry, computePartial bool, kill *int32) (bool, []*[]int) {
entry := makeLinkedEntries(history)
n := length(entry) / 2
linearized := newBitset(uint(n))
cache := make(map[uint64][]cacheEntry) // map from hash to cache entry
var calls []callsEntry
// longest linearizable prefix that includes the given entry
longest := make([]*[]int, n)
state := model.Init()
headEntry := insertBefore(&node{value: nil, match: nil, id: -1}, entry)
for headEntry.next != nil {
if atomic.LoadInt32(kill) != 0 {
return false, longest
}
if entry.match != nil {
matching := entry.match // the return entry
ok, newState := model.Step(state, entry.value, matching.value)
if ok {
newLinearized := linearized.clone().set(uint(entry.id))
newCacheEntry := cacheEntry{newLinearized, newState}
if !cacheContains(model, cache, newCacheEntry) {
hash := newLinearized.hash()
cache[hash] = append(cache[hash], newCacheEntry)
calls = append(calls, callsEntry{entry, state})
state = newState
linearized.set(uint(entry.id))
lift(entry)
entry = headEntry.next
} else {
entry = entry.next
}
} else {
entry = entry.next
}
} else {
if len(calls) == 0 {
return false, longest
}
// longest
if computePartial {
callsLen := len(calls)
var seq []int = nil
for _, v := range calls {
if longest[v.entry.id] == nil || callsLen > len(*longest[v.entry.id]) {
// create seq lazily
if seq == nil {
seq = make([]int, len(calls))
for i, v := range calls {
seq[i] = v.entry.id
}
}
longest[v.entry.id] = &seq
}
}
}
callsTop := calls[len(calls)-1]
entry = callsTop.entry
state = callsTop.state
linearized.clear(uint(entry.id))
calls = calls[:len(calls)-1]
unlift(entry)
entry = entry.next
}
}
// longest linearization is the complete linearization, which is calls
seq := make([]int, len(calls))
for i, v := range calls {
seq[i] = v.entry.id
}
for i := 0; i < n; i++ {
longest[i] = &seq
}
return true, longest
}
func fillDefault(model Model) Model {
if model.Partition == nil {
model.Partition = NoPartition
}
if model.PartitionEvent == nil {
model.PartitionEvent = NoPartitionEvent
}
if model.Equal == nil {
model.Equal = ShallowEqual
}
if model.DescribeOperation == nil {
model.DescribeOperation = DefaultDescribeOperation
}
if model.DescribeState == nil {
model.DescribeState = DefaultDescribeState
}
return model
}
func checkParallel(model Model, history [][]entry, computeInfo bool, timeout time.Duration) (CheckResult, linearizationInfo) {
ok := true
timedOut := false
results := make(chan bool, len(history))
longest := make([][]*[]int, len(history))
kill := int32(0)
for i, subhistory := range history {
go func(i int, subhistory []entry) {
ok, l := checkSingle(model, subhistory, computeInfo, &kill)
longest[i] = l
results <- ok
}(i, subhistory)
}
var timeoutChan <-chan time.Time
if timeout > 0 {
timeoutChan = time.After(timeout)
}
count := 0
loop:
for {
select {
case result := <-results:
count++
ok = ok && result
if !ok && !computeInfo {
atomic.StoreInt32(&kill, 1)
break loop
}
if count >= len(history) {
break loop
}
case <-timeoutChan:
timedOut = true
atomic.StoreInt32(&kill, 1)
break loop // if we time out, we might get a false positive
}
}
var info linearizationInfo
if computeInfo {
// make sure we've waited for all goroutines to finish,
// otherwise we might race on access to longest[]
for count < len(history) {
<-results
count++
}
// return longest linearizable prefixes that include each history element
partialLinearizations := make([][][]int, len(history))
for i := 0; i < len(history); i++ {
var partials [][]int
// turn longest into a set of unique linearizations
set := make(map[*[]int]struct{})
for _, v := range longest[i] {
if v != nil {
set[v] = struct{}{}
}
}
for k := range set {
arr := make([]int, len(*k))
for i, v := range *k {
arr[i] = v
}
partials = append(partials, arr)
}
partialLinearizations[i] = partials
}
info.history = history
info.partialLinearizations = partialLinearizations
}
var result CheckResult
if !ok {
result = Illegal
} else {
if timedOut {
result = Unknown
} else {
result = Ok
}
}
return result, info
}
func checkEvents(model Model, history []Event, verbose bool, timeout time.Duration) (CheckResult, linearizationInfo) {
model = fillDefault(model)
partitions := model.PartitionEvent(history)
l := make([][]entry, len(partitions))
for i, subhistory := range partitions {
l[i] = convertEntries(renumber(subhistory))
}
return checkParallel(model, l, verbose, timeout)
}
func checkOperations(model Model, history []Operation, verbose bool, timeout time.Duration) (CheckResult, linearizationInfo) {
model = fillDefault(model)
partitions := model.Partition(history)
l := make([][]entry, len(partitions))
for i, subhistory := range partitions {
l[i] = makeEntries(subhistory)
}
return checkParallel(model, l, verbose, timeout)
}

77
src/porcupine/model.go Normal file
View File

@ -0,0 +1,77 @@
package porcupine
import "fmt"
type Operation struct {
ClientId int // optional, unless you want a visualization; zero-indexed
Input interface{}
Call int64 // invocation time
Output interface{}
Return int64 // response time
}
type EventKind bool
const (
CallEvent EventKind = false
ReturnEvent EventKind = true
)
type Event struct {
ClientId int // optional, unless you want a visualization; zero-indexed
Kind EventKind
Value interface{}
Id int
}
type Model struct {
// Partition functions, such that a history is linearizable if and only
// if each partition is linearizable. If you don't want to implement
// this, you can always use the `NoPartition` functions implemented
// below.
Partition func(history []Operation) [][]Operation
PartitionEvent func(history []Event) [][]Event
// Initial state of the system.
Init func() interface{}
// Step function for the system. Returns whether or not the system
// could take this step with the given inputs and outputs and also
// returns the new state. This should not mutate the existing state.
Step func(state interface{}, input interface{}, output interface{}) (bool, interface{})
// Equality on states. If you are using a simple data type for states,
// you can use the `ShallowEqual` function implemented below.
Equal func(state1, state2 interface{}) bool
// For visualization, describe an operation as a string.
// For example, "Get('x') -> 'y'".
DescribeOperation func(input interface{}, output interface{}) string
// For visualization purposes, describe a state as a string.
// For example, "{'x' -> 'y', 'z' -> 'w'}"
DescribeState func(state interface{}) string
}
func NoPartition(history []Operation) [][]Operation {
return [][]Operation{history}
}
func NoPartitionEvent(history []Event) [][]Event {
return [][]Event{history}
}
func ShallowEqual(state1, state2 interface{}) bool {
return state1 == state2
}
func DefaultDescribeOperation(input interface{}, output interface{}) string {
return fmt.Sprintf("%v -> %v", input, output)
}
func DefaultDescribeState(state interface{}) string {
return fmt.Sprintf("%v", state)
}
type CheckResult string
const (
Unknown CheckResult = "Unknown" // timed out
Ok = "Ok"
Illegal = "Illegal"
)

View File

@ -0,0 +1,39 @@
package porcupine
import "time"
func CheckOperations(model Model, history []Operation) bool {
res, _ := checkOperations(model, history, false, 0)
return res == Ok
}
// timeout = 0 means no timeout
// if this operation times out, then a false positive is possible
func CheckOperationsTimeout(model Model, history []Operation, timeout time.Duration) CheckResult {
res, _ := checkOperations(model, history, false, timeout)
return res
}
// timeout = 0 means no timeout
// if this operation times out, then a false positive is possible
func CheckOperationsVerbose(model Model, history []Operation, timeout time.Duration) (CheckResult, linearizationInfo) {
return checkOperations(model, history, true, timeout)
}
func CheckEvents(model Model, history []Event) bool {
res, _ := checkEvents(model, history, false, 0)
return res == Ok
}
// timeout = 0 means no timeout
// if this operation times out, then a false positive is possible
func CheckEventsTimeout(model Model, history []Event, timeout time.Duration) CheckResult {
res, _ := checkEvents(model, history, false, timeout)
return res
}
// timeout = 0 means no timeout
// if this operation times out, then a false positive is possible
func CheckEventsVerbose(model Model, history []Event, timeout time.Duration) (CheckResult, linearizationInfo) {
return checkEvents(model, history, true, timeout)
}

View File

@ -0,0 +1,897 @@
package porcupine
import (
"encoding/json"
"fmt"
"io"
"os"
"sort"
)
type historyElement struct {
ClientId int
Start int64
End int64
Description string
}
type linearizationStep struct {
Index int
StateDescription string
}
type partialLinearization = []linearizationStep
type partitionVisualizationData struct {
History []historyElement
PartialLinearizations []partialLinearization
Largest map[int]int
}
type visualizationData = []partitionVisualizationData
func computeVisualizationData(model Model, info linearizationInfo) visualizationData {
model = fillDefault(model)
data := make(visualizationData, len(info.history))
for partition := 0; partition < len(info.history); partition++ {
// history
n := len(info.history[partition]) / 2
history := make([]historyElement, n)
callValue := make(map[int]interface{})
returnValue := make(map[int]interface{})
for _, elem := range info.history[partition] {
switch elem.kind {
case callEntry:
history[elem.id].ClientId = elem.clientId
history[elem.id].Start = elem.time
callValue[elem.id] = elem.value
case returnEntry:
history[elem.id].End = elem.time
history[elem.id].Description = model.DescribeOperation(callValue[elem.id], elem.value)
returnValue[elem.id] = elem.value
}
}
// partial linearizations
largestIndex := make(map[int]int)
largestSize := make(map[int]int)
linearizations := make([]partialLinearization, len(info.partialLinearizations[partition]))
partials := info.partialLinearizations[partition]
sort.Slice(partials, func(i, j int) bool {
return len(partials[i]) > len(partials[j])
})
for i, partial := range partials {
linearization := make(partialLinearization, len(partial))
state := model.Init()
for j, histId := range partial {
var ok bool
ok, state = model.Step(state, callValue[histId], returnValue[histId])
if ok != true {
panic("valid partial linearization returned non-ok result from model step")
}
stateDesc := model.DescribeState(state)
linearization[j] = linearizationStep{histId, stateDesc}
if largestSize[histId] < len(partial) {
largestSize[histId] = len(partial)
largestIndex[histId] = i
}
}
linearizations[i] = linearization
}
data[partition] = partitionVisualizationData{
History: history,
PartialLinearizations: linearizations,
Largest: largestIndex,
}
}
return data
}
func Visualize(model Model, info linearizationInfo, output io.Writer) error {
data := computeVisualizationData(model, info)
jsonData, err := json.Marshal(data)
if err != nil {
return err
}
_, err = fmt.Fprintf(output, html, jsonData)
if err != nil {
return err
}
return nil
}
func VisualizePath(model Model, info linearizationInfo, path string) error {
f, err := os.Create(path)
if err != nil {
return err
}
defer f.Close()
return Visualize(model, info, f)
}
const html = `
<!DOCTYPE html>
<html>
<head><title>Porcupine</title>
<style>
html {
font-family: Helvetica, Arial, sans-serif;
font-size: 16px;
}
text {
dominant-baseline: middle;
}
#legend {
position: fixed;
left: 10px;
top: 10px;
background-color: rgba(255, 255, 255, 0.5);
backdrop-filter: blur(3px);
padding: 5px 2px 1px 2px;
border-radius: 4px;
}
#canvas {
margin-top: 45px;
}
#calc {
width: 0;
height: 0;
visibility: hidden;
}
.bg {
fill: transparent;
}
.divider {
stroke: #ccc;
stroke-width: 1;
}
.history-rect {
stroke: #888;
stroke-width: 1;
fill: #42d1f5;
}
.link {
fill: #206475;
cursor: pointer;
}
.selected {
stroke-width: 5;
}
.target-rect {
opacity: 0;
}
.history-text {
font-size: 0.9rem;
font-family: Menlo, Courier New, monospace;
}
.hidden {
opacity: 0.2;
}
.hidden line {
opacity: 0.5; /* note: this is multiplicative */
}
.linearization {
stroke: rgba(0, 0, 0, 0.5);
}
.linearization-invalid {
stroke: rgba(255, 0, 0, 0.5);
}
.linearization-point {
stroke-width: 5;
}
.linearization-line {
stroke-width: 2;
}
.tooltip {
position: absolute;
opacity: 0;
border: 1px solid #ccc;
background: white;
border-radius: 4px;
padding: 5px;
font-size: 0.8rem;
}
.inactive {
display: none;
}
</style>
</head>
<body>
<div id="legend">
<svg xmlns="http://www.w3.org/2000/svg" width="660" height="20">
<text x="0" y="10">Clients</text>
<line x1="50" y1="0" x2="70" y2="20" stroke="#000" stroke-width="1"></line>
<text x="70" y="10">Time</text>
<line x1="110" y1="10" x2="200" y2="10" stroke="#000" stroke-width="2"></line>
<polygon points="200,5 200,15, 210,10" fill="#000"></polygon>
<rect x="300" y="5" width="10" height="10" fill="rgba(0, 0, 0, 0.5)"></rect>
<text x="315" y="10">Valid LP</text>
<rect x="400" y="5" width="10" height="10" fill="rgba(255, 0, 0, 0.5)"></rect>
<text x="415" y="10">Invalid LP</text>
<text x="520" y="10" id="jump-link" class="link">[ jump to first error ]</text>
</svg>
</div>
<div id="canvas">
</div>
<div id="calc">
</div>
<script>
'use strict'
const SVG_NS = 'http://www.w3.org/2000/svg'
function svgnew(tag, attrs) {
const el = document.createElementNS(SVG_NS, tag)
svgattr(el, attrs)
return el
}
function svgattr(el, attrs) {
if (attrs != null) {
for (var k in attrs) {
if (Object.prototype.hasOwnProperty.call(attrs, k)) {
el.setAttributeNS(null, k, attrs[k])
}
}
}
}
function svgattach(parent, child) {
return parent.appendChild(child)
}
function svgadd(el, tag, attrs) {
return svgattach(el, svgnew(tag, attrs))
}
function newArray(n, fn) {
const arr = new Array(n)
for (let i = 0; i < n; i++) {
arr[i] = fn(i)
}
return arr
}
function arrayEq(a, b) {
if (a === b) {
return true
}
if (a == null || b == null) {
return false
}
if (a.length != b.length) {
return false
}
for (let i = 0; i < a.length; i++) {
if (a[i] !== b[i]) {
return false
}
}
return true
}
function render(data) {
const PADDING = 10
const BOX_HEIGHT = 30
const BOX_SPACE = 15
const XOFF = 20
const EPSILON = 20
const LINE_BLEED = 5
const BOX_GAP = 20
const BOX_TEXT_PADDING = 10
const HISTORY_RECT_RADIUS = 4
let maxClient = -1
data.forEach(partition => {
partition['History'].forEach(el => {
maxClient = Math.max(maxClient, el['ClientId'])
})
})
const nClient = maxClient + 1
// Prepare some useful data to be used later:
// - Add a GID to each event
// - Create a mapping from GIDs back to events
// - Create a set of all timestamps
// - Create a set of all start timestamps
const allTimestamps = new Set()
const startTimestamps = new Set()
let gid = 0
const byGid = {}
data.forEach(partition => {
partition['History'].forEach(el => {
allTimestamps.add(el['Start'])
startTimestamps.add(el['Start'])
allTimestamps.add(el['End'])
// give elements GIDs
el['Gid'] = gid
byGid[gid] = el
gid++
})
})
let sortedTimestamps = Array.from(allTimestamps).sort((a, b) => a - b)
// This should not happen with "real" histories, but for certain edge
// cases, we need to deal with having multiple events share a start/end
// time. We solve this by tweaking the events that share the end time,
// updating the time to end+epsilon. In practice, rather than having to
// choose an epsilon, we choose to average the value with the next largest
// timestamp.
const nextTs = {}
for (let i = 0; i < sortedTimestamps.length-1; i++) {
nextTs[sortedTimestamps[i]] = sortedTimestamps[i+1]
}
data.forEach(partition => {
partition['History'].forEach(el => {
let end = el['End']
el['OriginalEnd'] = end // for display purposes
if (startTimestamps.has(end)) {
if (Object.prototype.hasOwnProperty.call(nextTs, end)) {
const tweaked = (end + nextTs[end])/2
el['End'] = tweaked
allTimestamps.add(tweaked)
}
}
})
})
// Update sortedTimestamps, because we created some new timestamps.
sortedTimestamps = Array.from(allTimestamps).sort((a, b) => a - b)
// Compute layout.
//
// We warp time to make it easier to see what's going on. We can think
// of there being a monotonically increasing mapping from timestamps to
// x-positions. This mapping should satisfy some criteria to make the
// visualization interpretable:
//
// - distinguishability: there should be some minimum distance between
// unequal timestamps
// - visible text: history boxes should be wide enough to fit the text
// they contain
// - enough space for LPs: history boxes should be wide enough to fit
// all linearization points that go through them, while maintaining
// readability of linearizations (where each LP in a sequence is spaced
// some minimum distance away from the previous one)
//
// Originally, I thought about this as a linear program:
//
// - variables for every unique timestamp, x_i = warp(timestamp_i)
// - objective: minimize sum x_i
// - constraint: non-negative
// - constraint: ordering + distinguishability, timestamp_i < timestamp_j -> x_i + EPS < x_j
// - constraint: visible text, size_text_j < x_{timestamp_j_end} - x_{timestamp_j_start}
// - constraint: linearization lines have points that fit within box, ...
//
// This used to actually be implemented using an LP solver (without the
// linearization point part, though that should be doable too), but
// then I realized it's possible to solve optimally using a greedy
// left-to-right scan in linear time.
//
// So that is what we do here. We optimally solve the above, and while
// doing so, also compute some useful information (e.g. x-positions of
// linearization points) that is useful later.
const xPos = {}
// Compute some information about history elements, sorted by end time;
// the most important information here is box width.
const byEnd = data.flatMap(partition =>
partition['History'].map(el => {
// compute width of the text inside the history element by actually
// drawing it (in a hidden div)
const scratch = document.getElementById('calc')
scratch.innerHTML = ''
const svg = svgadd(scratch, 'svg')
const text = svgadd(svg, 'text', {
'text-anchor': 'middle',
'class': 'history-text',
})
text.textContent = el['Description']
const bbox = text.getBBox()
const width = bbox.width + 2*BOX_TEXT_PADDING
return {
'start': el['Start'],
'end': el['End'],
'width': width,
'gid': el['Gid']
}
})
).sort((a, b) => a.end - b.end)
// Some preprocessing for linearization points and illegal next
// linearizations. We need to figure out where exactly LPs end up
// as we go, so we can make sure event boxes are wide enough.
const eventToLinearizations = newArray(gid, () => []) // event -> [{index, position}]
const eventIllegalLast = newArray(gid, () => []) // event -> [index]
const allLinearizations = []
let lgid = 0
data.forEach(partition => {
partition['PartialLinearizations'].forEach(lin => {
const globalized = [] // linearization with global indexes instead of partition-local ones
const included = new Set() // for figuring out illegal next LPs
lin.forEach((id, position) => {
included.add(id['Index'])
const gid = partition['History'][id['Index']]['Gid']
globalized.push(gid)
eventToLinearizations[gid].push({'index': lgid, 'position': position})
})
allLinearizations.push(globalized)
let minEnd = Infinity
partition['History'].forEach((el, index) => {
if (!included.has(index)) {
minEnd = Math.min(minEnd, el['End'])
}
})
partition['History'].forEach((el, index) => {
if (!included.has(index) && el['Start'] < minEnd) {
eventIllegalLast[el['Gid']].push(lgid)
}
})
lgid++
})
})
const linearizationPositions = newArray(lgid, () => []) // [[xpos]]
// Okay, now we're ready to do the left-to-right scan.
// Solve timestamp -> xPos.
let eventIndex = 0
xPos[sortedTimestamps[0]] = 0 // positions start at 0
for (let i = 1; i < sortedTimestamps.length; i++) {
// left-to-right scan, finding minimum time we can use
const ts = sortedTimestamps[i]
// ensure some gap from last timestamp
let pos = xPos[sortedTimestamps[i-1]] + BOX_GAP
// ensure that text fits in boxes
while (eventIndex < byEnd.length && byEnd[eventIndex].end <= ts) {
// push our position as far as necessary to accommodate text in box
const event = byEnd[eventIndex]
const textEndPos = xPos[event.start] + event.width
pos = Math.max(pos, textEndPos)
// Ensure that LPs fit in box.
//
// When placing the end of an event, for all partial linearizations
// that include that event, for the prefix that comes before that event,
// all their start points must have been placed already, so we can figure
// out the minimum width that the box needs to be to accommodate the LP.
eventToLinearizations[event.gid]
.concat(eventIllegalLast[event.gid].map(index => {
return {
'index': index,
'position': allLinearizations[index].length-1,
}
}))
.forEach(li => {
const {index, position} = li
for (let i = linearizationPositions[index].length; i <= position; i++) {
// determine past points
let prev = null
if (linearizationPositions[index].length != 0) {
prev = linearizationPositions[index][i-1]
}
const nextGid = allLinearizations[index][i]
let nextPos
if (prev === null) {
nextPos = xPos[byGid[nextGid]['Start']]
} else {
nextPos = Math.max(xPos[byGid[nextGid]['Start']], prev + EPSILON)
}
linearizationPositions[index].push(nextPos)
}
// this next line only really makes sense for the ones in
// eventToLinearizations, not the ones from eventIllegalLast,
// but it's safe to do it for all points, so we don't bother to
// distinguish.
pos = Math.max(pos, linearizationPositions[index][position])
})
// ensure that illegal next LPs fit in box too
eventIllegalLast[event.gid].forEach(li => {
const lin = linearizationPositions[li]
const prev = lin[lin.length-1]
pos = Math.max(pos, prev + EPSILON)
})
eventIndex++
}
xPos[ts] = pos
}
// Solved, now draw UI.
let selected = false
let selectedIndex = [-1, -1]
const height = 2*PADDING + BOX_HEIGHT * nClient + BOX_SPACE * (nClient - 1)
const width = 2*PADDING + XOFF + xPos[sortedTimestamps[sortedTimestamps.length-1]]
const svg = svgadd(document.getElementById('canvas'), 'svg', {
'width': width,
'height': height,
})
// draw background, etc.
const bg = svgadd(svg, 'g')
const bgRect = svgadd(bg, 'rect', {
'height': height,
'width': width,
'x': 0,
'y': 0,
'class': 'bg',
})
bgRect.onclick = handleBgClick
for (let i = 0; i < nClient; i++) {
const text = svgadd(bg, 'text', {
'x': XOFF/2,
'y': PADDING + BOX_HEIGHT/2 + i * (BOX_HEIGHT + BOX_SPACE),
'text-anchor': 'middle',
})
text.textContent = i
}
svgadd(bg, 'line', {
'x1': PADDING + XOFF,
'y1': PADDING,
'x2': PADDING + XOFF,
'y2': height - PADDING,
'class': 'divider'
})
// draw history
const historyLayers = []
const historyRects = []
const targetRects = svgnew('g')
data.forEach((partition, partitionIndex) => {
const l = svgadd(svg, 'g')
historyLayers.push(l)
const rects = []
partition['History'].forEach((el, elIndex) => {
const g = svgadd(l, 'g')
const rx = xPos[el['Start']]
const width = xPos[el['End']] - rx
const x = rx + XOFF + PADDING
const y = PADDING + el['ClientId'] * (BOX_HEIGHT + BOX_SPACE)
rects.push(svgadd(g, 'rect', {
'height': BOX_HEIGHT,
'width': width,
'x': x,
'y': y,
'rx': HISTORY_RECT_RADIUS,
'ry': HISTORY_RECT_RADIUS,
'class': 'history-rect'
}))
const text = svgadd(g, 'text', {
'x': x + width/2,
'y': y + BOX_HEIGHT/2,
'text-anchor': 'middle',
'class': 'history-text',
})
text.textContent = el['Description']
// we don't add mouseTarget to g, but to targetRects, because we
// want to layer this on top of everything at the end; otherwise, the
// LPs and lines will be over the target, which will create holes
// where hover etc. won't work
const mouseTarget = svgadd(targetRects, 'rect', {
'height': BOX_HEIGHT,
'width': width,
'x': x,
'y': y,
'class': 'target-rect',
'data-partition': partitionIndex,
'data-index': elIndex,
})
mouseTarget.onmouseover = handleMouseOver
mouseTarget.onmousemove = handleMouseMove
mouseTarget.onmouseout = handleMouseOut
mouseTarget.onclick = handleClick
})
historyRects.push(rects)
})
// draw partial linearizations
const illegalLast = data.map(partition => {
return partition['PartialLinearizations'].map(() => new Set())
})
const largestIllegal = data.map(() => {return {}})
const largestIllegalLength = data.map(() => {return {}})
const partialLayers = []
const errorPoints = []
data.forEach((partition, partitionIndex) => {
const l = []
partialLayers.push(l)
partition['PartialLinearizations'].forEach((lin, linIndex) => {
const g = svgadd(svg, 'g')
l.push(g)
let prevX = null
let prevY = null
let prevEl = null
const included = new Set()
lin.forEach(id => {
const el = partition['History'][id['Index']]
const hereX = PADDING + XOFF + xPos[el['Start']]
const x = prevX !== null ? Math.max(hereX, prevX + EPSILON) : hereX
const y = PADDING + el['ClientId'] * (BOX_HEIGHT + BOX_SPACE) - LINE_BLEED
// line from previous
if (prevEl !== null) {
svgadd(g, 'line', {
'x1': prevX,
'x2': x,
'y1': prevEl['ClientId'] >= el['ClientId'] ? prevY : prevY + BOX_HEIGHT + 2*LINE_BLEED,
'y2': prevEl['ClientId'] <= el['ClientId'] ? y : y + BOX_HEIGHT + 2*LINE_BLEED,
'class': 'linearization linearization-line',
})
}
// current line
svgadd(g, 'line', {
'x1': x,
'x2': x,
'y1': y,
'y2': y + BOX_HEIGHT + 2*LINE_BLEED,
'class': 'linearization linearization-point'
})
prevX = x
prevY = y
prevEl = el
included.add(id['Index'])
})
// show possible but illegal next linearizations
// a history element is a possible next try
// if no other history element must be linearized earlier
// i.e. forall others, this.start < other.end
let minEnd = Infinity
partition['History'].forEach((el, index) => {
if (!included.has(index)) {
minEnd = Math.min(minEnd, el['End'])
}
})
partition['History'].forEach((el, index) => {
if (!included.has(index) && el['Start'] < minEnd) {
const hereX = PADDING + XOFF + xPos[el['Start']]
const x = prevX !== null ? Math.max(hereX, prevX + EPSILON) : hereX
const y = PADDING + el['ClientId'] * (BOX_HEIGHT + BOX_SPACE) - LINE_BLEED
// line from previous
svgadd(g, 'line', {
'x1': prevX,
'x2': x,
'y1': prevEl['ClientId'] >= el['ClientId'] ? prevY : prevY + BOX_HEIGHT + 2*LINE_BLEED,
'y2': prevEl['ClientId'] <= el['ClientId'] ? y : y + BOX_HEIGHT + 2*LINE_BLEED,
'class': 'linearization-invalid linearization-line',
})
// current line
const point = svgadd(g, 'line', {
'x1': x,
'x2': x,
'y1': y,
'y2': y + BOX_HEIGHT + 2*LINE_BLEED,
'class': 'linearization-invalid linearization-point',
})
errorPoints.push({
x: x,
partition: partitionIndex,
index: lin[lin.length-1]['Index'], // NOTE not index
element: point
})
illegalLast[partitionIndex][linIndex].add(index)
if (!Object.prototype.hasOwnProperty.call(largestIllegalLength[partitionIndex], index) || largestIllegalLength[partitionIndex][index] < lin.length) {
largestIllegalLength[partitionIndex][index] = lin.length
largestIllegal[partitionIndex][index] = linIndex
}
}
})
})
})
errorPoints.sort((a, b) => a.x - b.x)
// attach targetRects
svgattach(svg, targetRects)
// tooltip
const tooltip = document.getElementById('canvas').appendChild(document.createElement('div'))
tooltip.setAttribute('class', 'tooltip')
function handleMouseOver() {
if (!selected) {
const partition = parseInt(this.dataset['partition'])
const index = parseInt(this.dataset['index'])
highlight(partition, index)
}
tooltip.style.opacity = 1
}
function linearizationIndex(partition, index) {
// show this linearization
if (Object.prototype.hasOwnProperty.call(data[partition]['Largest'], index)) {
return data[partition]['Largest'][index]
} else if (Object.prototype.hasOwnProperty.call(largestIllegal[partition], index)) {
return largestIllegal[partition][index]
}
return null
}
function highlight(partition, index) {
// hide all but this partition
historyLayers.forEach((layer, i) => {
if (i === partition) {
layer.classList.remove('hidden')
} else {
layer.classList.add('hidden')
}
})
// hide all but the relevant linearization
partialLayers.forEach(layer => {
layer.forEach(g => {
g.classList.add('hidden')
})
})
// show this linearization
const maxIndex = linearizationIndex(partition, index)
if (maxIndex !== null) {
partialLayers[partition][maxIndex].classList.remove('hidden')
}
updateJump()
}
let lastTooltip = [null, null, null, null, null]
function handleMouseMove() {
const partition = parseInt(this.dataset['partition'])
const index = parseInt(this.dataset['index'])
const [sPartition, sIndex] = selectedIndex
const thisTooltip = [partition, index, selected, sPartition, sIndex]
if (!arrayEq(lastTooltip, thisTooltip)) {
let maxIndex
if (!selected) {
maxIndex = linearizationIndex(partition, index)
} else {
// if selected, show info relevant to the selected linearization
maxIndex = linearizationIndex(sPartition, sIndex)
}
if (selected && sPartition !== partition) {
tooltip.innerHTML = 'Not part of selected partition.'
} else if (maxIndex === null) {
if (!selected) {
tooltip.innerHTML = 'Not part of any partial linearization.'
} else {
tooltip.innerHTML = 'Selected element is not part of any partial linearization.'
}
} else {
const lin = data[partition]['PartialLinearizations'][maxIndex]
let prev = null, curr = null
let found = false
for (let i = 0; i < lin.length; i++) {
prev = curr
curr = lin[i]
if (curr['Index'] === index) {
found = true
break
}
}
let call = data[partition]['History'][index]['Start']
let ret = data[partition]['History'][index]['OriginalEnd']
let msg = ''
if (found) {
// part of linearization
if (prev !== null) {
msg = '<strong>Previous state:</strong><br>' + prev['StateDescription'] + '<br><br>'
}
msg += '<strong>New state:</strong><br>' + curr['StateDescription'] +
'<br><br>Call: ' + call +
'<br><br>Return: ' + ret
} else if (illegalLast[partition][maxIndex].has(index)) {
// illegal next one
msg = '<strong>Previous state:</strong><br>' + lin[lin.length-1]['StateDescription'] +
'<br><br><strong>New state:</strong><br>&langle;invalid op&rangle;' +
'<br><br>Call: ' + call +
'<br><br>Return: ' + ret
} else {
// not part of this one
msg = 'Not part of selected element\'s partial linearization.'
}
tooltip.innerHTML = msg
}
lastTooltip = thisTooltip
}
tooltip.style.left = (event.pageX+20) + 'px'
tooltip.style.top = (event.pageY+20) + 'px'
}
function handleMouseOut() {
if (!selected) {
resetHighlight()
}
tooltip.style.opacity = 0
lastTooltip = [null, null, null, null, null]
}
function resetHighlight() {
// show all layers
historyLayers.forEach(layer => {
layer.classList.remove('hidden')
})
// show longest linearizations, which are first
partialLayers.forEach(layers => {
layers.forEach((l, i) => {
if (i === 0) {
l.classList.remove('hidden')
} else {
l.classList.add('hidden')
}
})
})
updateJump()
}
function updateJump() {
const jump = document.getElementById('jump-link')
// find first non-hidden point
// feels a little hacky, but it works
const point = errorPoints.find(pt => !pt.element.parentElement.classList.contains('hidden'))
if (point) {
jump.classList.remove('inactive')
jump.onclick = () => {
point.element.scrollIntoView({behavior: 'smooth', inline: 'center', block: 'center'})
if (!selected) {
select(point.partition, point.index)
}
}
} else {
jump.classList.add('inactive')
}
}
function handleClick() {
const partition = parseInt(this.dataset['partition'])
const index = parseInt(this.dataset['index'])
if (selected) {
const [sPartition, sIndex] = selectedIndex
if (partition === sPartition && index === sIndex) {
deselect()
return
} else {
historyRects[sPartition][sIndex].classList.remove('selected')
}
}
select(partition, index)
}
function handleBgClick() {
deselect()
}
function select(partition, index) {
selected = true
selectedIndex = [partition, index]
highlight(partition, index)
historyRects[partition][index].classList.add('selected')
}
function deselect() {
if (!selected) {
return
}
selected = false
resetHighlight()
const [partition, index] = selectedIndex
historyRects[partition][index].classList.remove('selected')
}
handleMouseOut() // initialize, same as mouse out
}
const data = %s
render(data)
</script>
</body>
</html>
`

591
src/raft/config.go Normal file
View File

@ -0,0 +1,591 @@
package raft
//
// support for Raft tester.
//
// we will use the original config.go to test your code for grading.
// so, while you can modify this code to help you debug, please
// test with the original before submitting.
//
import "6.824/labgob"
import "6.824/labrpc"
import "bytes"
import "log"
import "sync"
import "testing"
import "runtime"
import "math/rand"
import crand "crypto/rand"
import "math/big"
import "encoding/base64"
import "time"
import "fmt"
func randstring(n int) string {
b := make([]byte, 2*n)
crand.Read(b)
s := base64.URLEncoding.EncodeToString(b)
return s[0:n]
}
func makeSeed() int64 {
max := big.NewInt(int64(1) << 62)
bigx, _ := crand.Int(crand.Reader, max)
x := bigx.Int64()
return x
}
type config struct {
mu sync.Mutex
t *testing.T
net *labrpc.Network
n int
rafts []*Raft
applyErr []string // from apply channel readers
connected []bool // whether each server is on the net
saved []*Persister
endnames [][]string // the port file names each sends to
logs []map[int]interface{} // copy of each server's committed entries
start time.Time // time at which make_config() was called
// begin()/end() statistics
t0 time.Time // time at which test_test.go called cfg.begin()
rpcs0 int // rpcTotal() at start of test
cmds0 int // number of agreements
bytes0 int64
maxIndex int
maxIndex0 int
}
var ncpu_once sync.Once
func make_config(t *testing.T, n int, unreliable bool, snapshot bool) *config {
ncpu_once.Do(func() {
if runtime.NumCPU() < 2 {
fmt.Printf("warning: only one CPU, which may conceal locking bugs\n")
}
rand.Seed(makeSeed())
})
runtime.GOMAXPROCS(4)
cfg := &config{}
cfg.t = t
cfg.net = labrpc.MakeNetwork()
cfg.n = n
cfg.applyErr = make([]string, cfg.n)
cfg.rafts = make([]*Raft, cfg.n)
cfg.connected = make([]bool, cfg.n)
cfg.saved = make([]*Persister, cfg.n)
cfg.endnames = make([][]string, cfg.n)
cfg.logs = make([]map[int]interface{}, cfg.n)
cfg.start = time.Now()
cfg.setunreliable(unreliable)
cfg.net.LongDelays(true)
applier := cfg.applier
if snapshot {
applier = cfg.applierSnap
}
// create a full set of Rafts.
for i := 0; i < cfg.n; i++ {
cfg.logs[i] = map[int]interface{}{}
cfg.start1(i, applier)
}
// connect everyone
for i := 0; i < cfg.n; i++ {
cfg.connect(i)
}
return cfg
}
// shut down a Raft server but save its persistent state.
func (cfg *config) crash1(i int) {
cfg.disconnect(i)
cfg.net.DeleteServer(i) // disable client connections to the server.
cfg.mu.Lock()
defer cfg.mu.Unlock()
// a fresh persister, in case old instance
// continues to update the Persister.
// but copy old persister's content so that we always
// pass Make() the last persisted state.
if cfg.saved[i] != nil {
cfg.saved[i] = cfg.saved[i].Copy()
}
rf := cfg.rafts[i]
if rf != nil {
cfg.mu.Unlock()
rf.Kill()
cfg.mu.Lock()
cfg.rafts[i] = nil
}
if cfg.saved[i] != nil {
raftlog := cfg.saved[i].ReadRaftState()
snapshot := cfg.saved[i].ReadSnapshot()
cfg.saved[i] = &Persister{}
cfg.saved[i].SaveStateAndSnapshot(raftlog, snapshot)
}
}
func (cfg *config) checkLogs(i int, m ApplyMsg) (string, bool) {
err_msg := ""
v := m.Command
for j := 0; j < len(cfg.logs); j++ {
if old, oldok := cfg.logs[j][m.CommandIndex]; oldok && old != v {
log.Printf("%v: log %v; server %v\n", i, cfg.logs[i], cfg.logs[j])
// some server has already committed a different value for this entry!
err_msg = fmt.Sprintf("commit index=%v server=%v %v != server=%v %v",
m.CommandIndex, i, m.Command, j, old)
}
}
_, prevok := cfg.logs[i][m.CommandIndex-1]
cfg.logs[i][m.CommandIndex] = v
if m.CommandIndex > cfg.maxIndex {
cfg.maxIndex = m.CommandIndex
}
return err_msg, prevok
}
// applier reads message from apply ch and checks that they match the log
// contents
func (cfg *config) applier(i int, applyCh chan ApplyMsg) {
for m := range applyCh {
if m.CommandValid == false {
// ignore other types of ApplyMsg
} else {
cfg.mu.Lock()
err_msg, prevok := cfg.checkLogs(i, m)
cfg.mu.Unlock()
if m.CommandIndex > 1 && prevok == false {
err_msg = fmt.Sprintf("server %v apply out of order %v", i, m.CommandIndex)
}
if err_msg != "" {
log.Fatalf("apply error: %v\n", err_msg)
cfg.applyErr[i] = err_msg
// keep reading after error so that Raft doesn't block
// holding locks...
}
}
}
}
const SnapShotInterval = 10
// periodically snapshot raft state
func (cfg *config) applierSnap(i int, applyCh chan ApplyMsg) {
lastApplied := 0
for m := range applyCh {
if m.SnapshotValid {
//DPrintf("Installsnapshot %v %v\n", m.SnapshotIndex, lastApplied)
cfg.mu.Lock()
if cfg.rafts[i].CondInstallSnapshot(m.SnapshotTerm,
m.SnapshotIndex, m.Snapshot) {
cfg.logs[i] = make(map[int]interface{})
r := bytes.NewBuffer(m.Snapshot)
d := labgob.NewDecoder(r)
var v int
if d.Decode(&v) != nil {
log.Fatalf("decode error\n")
}
cfg.logs[i][m.SnapshotIndex] = v
lastApplied = m.SnapshotIndex
}
cfg.mu.Unlock()
} else if m.CommandValid && m.CommandIndex > lastApplied {
//DPrintf("apply %v lastApplied %v\n", m.CommandIndex, lastApplied)
cfg.mu.Lock()
err_msg, prevok := cfg.checkLogs(i, m)
cfg.mu.Unlock()
if m.CommandIndex > 1 && prevok == false {
err_msg = fmt.Sprintf("server %v apply out of order %v", i, m.CommandIndex)
}
if err_msg != "" {
log.Fatalf("apply error: %v\n", err_msg)
cfg.applyErr[i] = err_msg
// keep reading after error so that Raft doesn't block
// holding locks...
}
lastApplied = m.CommandIndex
if (m.CommandIndex+1)%SnapShotInterval == 0 {
w := new(bytes.Buffer)
e := labgob.NewEncoder(w)
v := m.Command
e.Encode(v)
cfg.rafts[i].Snapshot(m.CommandIndex, w.Bytes())
}
} else {
// Ignore other types of ApplyMsg or old
// commands. Old command may never happen,
// depending on the Raft implementation, but
// just in case.
// DPrintf("Ignore: Index %v lastApplied %v\n", m.CommandIndex, lastApplied)
}
}
}
//
// start or re-start a Raft.
// if one already exists, "kill" it first.
// allocate new outgoing port file names, and a new
// state persister, to isolate previous instance of
// this server. since we cannot really kill it.
//
func (cfg *config) start1(i int, applier func(int, chan ApplyMsg)) {
cfg.crash1(i)
// a fresh set of outgoing ClientEnd names.
// so that old crashed instance's ClientEnds can't send.
cfg.endnames[i] = make([]string, cfg.n)
for j := 0; j < cfg.n; j++ {
cfg.endnames[i][j] = randstring(20)
}
// a fresh set of ClientEnds.
ends := make([]*labrpc.ClientEnd, cfg.n)
for j := 0; j < cfg.n; j++ {
ends[j] = cfg.net.MakeEnd(cfg.endnames[i][j])
cfg.net.Connect(cfg.endnames[i][j], j)
}
cfg.mu.Lock()
// a fresh persister, so old instance doesn't overwrite
// new instance's persisted state.
// but copy old persister's content so that we always
// pass Make() the last persisted state.
if cfg.saved[i] != nil {
cfg.saved[i] = cfg.saved[i].Copy()
} else {
cfg.saved[i] = MakePersister()
}
cfg.mu.Unlock()
applyCh := make(chan ApplyMsg)
rf := Make(ends, i, cfg.saved[i], applyCh)
cfg.mu.Lock()
cfg.rafts[i] = rf
cfg.mu.Unlock()
go applier(i, applyCh)
svc := labrpc.MakeService(rf)
srv := labrpc.MakeServer()
srv.AddService(svc)
cfg.net.AddServer(i, srv)
}
func (cfg *config) checkTimeout() {
// enforce a two minute real-time limit on each test
if !cfg.t.Failed() && time.Since(cfg.start) > 120*time.Second {
cfg.t.Fatal("test took longer than 120 seconds")
}
}
func (cfg *config) cleanup() {
for i := 0; i < len(cfg.rafts); i++ {
if cfg.rafts[i] != nil {
cfg.rafts[i].Kill()
}
}
cfg.net.Cleanup()
cfg.checkTimeout()
}
// attach server i to the net.
func (cfg *config) connect(i int) {
// fmt.Printf("connect(%d)\n", i)
cfg.connected[i] = true
// outgoing ClientEnds
for j := 0; j < cfg.n; j++ {
if cfg.connected[j] {
endname := cfg.endnames[i][j]
cfg.net.Enable(endname, true)
}
}
// incoming ClientEnds
for j := 0; j < cfg.n; j++ {
if cfg.connected[j] {
endname := cfg.endnames[j][i]
cfg.net.Enable(endname, true)
}
}
}
// detach server i from the net.
func (cfg *config) disconnect(i int) {
// fmt.Printf("disconnect(%d)\n", i)
cfg.connected[i] = false
// outgoing ClientEnds
for j := 0; j < cfg.n; j++ {
if cfg.endnames[i] != nil {
endname := cfg.endnames[i][j]
cfg.net.Enable(endname, false)
}
}
// incoming ClientEnds
for j := 0; j < cfg.n; j++ {
if cfg.endnames[j] != nil {
endname := cfg.endnames[j][i]
cfg.net.Enable(endname, false)
}
}
}
func (cfg *config) rpcCount(server int) int {
return cfg.net.GetCount(server)
}
func (cfg *config) rpcTotal() int {
return cfg.net.GetTotalCount()
}
func (cfg *config) setunreliable(unrel bool) {
cfg.net.Reliable(!unrel)
}
func (cfg *config) bytesTotal() int64 {
return cfg.net.GetTotalBytes()
}
func (cfg *config) setlongreordering(longrel bool) {
cfg.net.LongReordering(longrel)
}
// check that there's exactly one leader.
// try a few times in case re-elections are needed.
func (cfg *config) checkOneLeader() int {
for iters := 0; iters < 10; iters++ {
ms := 450 + (rand.Int63() % 100)
time.Sleep(time.Duration(ms) * time.Millisecond)
leaders := make(map[int][]int)
for i := 0; i < cfg.n; i++ {
if cfg.connected[i] {
if term, leader := cfg.rafts[i].GetState(); leader {
leaders[term] = append(leaders[term], i)
}
}
}
lastTermWithLeader := -1
for term, leaders := range leaders {
if len(leaders) > 1 {
cfg.t.Fatalf("term %d has %d (>1) leaders", term, len(leaders))
}
if term > lastTermWithLeader {
lastTermWithLeader = term
}
}
if len(leaders) != 0 {
return leaders[lastTermWithLeader][0]
}
}
cfg.t.Fatalf("expected one leader, got none")
return -1
}
// check that everyone agrees on the term.
func (cfg *config) checkTerms() int {
term := -1
for i := 0; i < cfg.n; i++ {
if cfg.connected[i] {
xterm, _ := cfg.rafts[i].GetState()
if term == -1 {
term = xterm
} else if term != xterm {
cfg.t.Fatalf("servers disagree on term")
}
}
}
return term
}
// check that there's no leader
func (cfg *config) checkNoLeader() {
for i := 0; i < cfg.n; i++ {
if cfg.connected[i] {
_, is_leader := cfg.rafts[i].GetState()
if is_leader {
cfg.t.Fatalf("expected no leader, but %v claims to be leader", i)
}
}
}
}
// how many servers think a log entry is committed?
func (cfg *config) nCommitted(index int) (int, interface{}) {
count := 0
var cmd interface{} = nil
for i := 0; i < len(cfg.rafts); i++ {
if cfg.applyErr[i] != "" {
cfg.t.Fatal(cfg.applyErr[i])
}
cfg.mu.Lock()
cmd1, ok := cfg.logs[i][index]
cfg.mu.Unlock()
if ok {
if count > 0 && cmd != cmd1 {
cfg.t.Fatalf("committed values do not match: index %v, %v, %v\n",
index, cmd, cmd1)
}
count += 1
cmd = cmd1
}
}
return count, cmd
}
// wait for at least n servers to commit.
// but don't wait forever.
func (cfg *config) wait(index int, n int, startTerm int) interface{} {
to := 10 * time.Millisecond
for iters := 0; iters < 30; iters++ {
nd, _ := cfg.nCommitted(index)
if nd >= n {
break
}
time.Sleep(to)
if to < time.Second {
to *= 2
}
if startTerm > -1 {
for _, r := range cfg.rafts {
if t, _ := r.GetState(); t > startTerm {
// someone has moved on
// can no longer guarantee that we'll "win"
return -1
}
}
}
}
nd, cmd := cfg.nCommitted(index)
if nd < n {
cfg.t.Fatalf("only %d decided for index %d; wanted %d\n",
nd, index, n)
}
return cmd
}
// do a complete agreement.
// it might choose the wrong leader initially,
// and have to re-submit after giving up.
// entirely gives up after about 10 seconds.
// indirectly checks that the servers agree on the
// same value, since nCommitted() checks this,
// as do the threads that read from applyCh.
// returns index.
// if retry==true, may submit the command multiple
// times, in case a leader fails just after Start().
// if retry==false, calls Start() only once, in order
// to simplify the early Lab 2B tests.
func (cfg *config) one(cmd interface{}, expectedServers int, retry bool) int {
t0 := time.Now()
starts := 0
for time.Since(t0).Seconds() < 10 {
// try all the servers, maybe one is the leader.
index := -1
for si := 0; si < cfg.n; si++ {
starts = (starts + 1) % cfg.n
var rf *Raft
cfg.mu.Lock()
if cfg.connected[starts] {
rf = cfg.rafts[starts]
}
cfg.mu.Unlock()
if rf != nil {
index1, _, ok := rf.Start(cmd)
if ok {
index = index1
break
}
}
}
if index != -1 {
// somebody claimed to be the leader and to have
// submitted our command; wait a while for agreement.
t1 := time.Now()
for time.Since(t1).Seconds() < 2 {
nd, cmd1 := cfg.nCommitted(index)
if nd > 0 && nd >= expectedServers {
// committed
if cmd1 == cmd {
// and it was the command we submitted.
return index
}
}
time.Sleep(20 * time.Millisecond)
}
if retry == false {
cfg.t.Fatalf("one(%v) failed to reach agreement", cmd)
}
} else {
time.Sleep(50 * time.Millisecond)
}
}
cfg.t.Fatalf("one(%v) failed to reach agreement", cmd)
return -1
}
// start a Test.
// print the Test message.
// e.g. cfg.begin("Test (2B): RPC counts aren't too high")
func (cfg *config) begin(description string) {
fmt.Printf("%s ...\n", description)
cfg.t0 = time.Now()
cfg.rpcs0 = cfg.rpcTotal()
cfg.bytes0 = cfg.bytesTotal()
cfg.cmds0 = 0
cfg.maxIndex0 = cfg.maxIndex
}
// end a Test -- the fact that we got here means there
// was no failure.
// print the Passed message,
// and some performance numbers.
func (cfg *config) end() {
cfg.checkTimeout()
if cfg.t.Failed() == false {
cfg.mu.Lock()
t := time.Since(cfg.t0).Seconds() // real time
npeers := cfg.n // number of Raft peers
nrpc := cfg.rpcTotal() - cfg.rpcs0 // number of RPC sends
nbytes := cfg.bytesTotal() - cfg.bytes0 // number of bytes
ncmds := cfg.maxIndex - cfg.maxIndex0 // number of Raft agreements reported
cfg.mu.Unlock()
fmt.Printf(" ... Passed --")
fmt.Printf(" %4.1f %d %4d %7d %4d\n", t, npeers, nrpc, nbytes, ncmds)
}
}
// Maximum log size across all servers
func (cfg *config) LogSize() int {
logsize := 0
for i := 0; i < cfg.n; i++ {
n := cfg.saved[i].RaftStateSize()
if n > logsize {
logsize = n
}
}
return logsize
}

76
src/raft/persister.go Normal file
View File

@ -0,0 +1,76 @@
package raft
//
// support for Raft and kvraft to save persistent
// Raft state (log &c) and k/v server snapshots.
//
// we will use the original persister.go to test your code for grading.
// so, while you can modify this code to help you debug, please
// test with the original before submitting.
//
import "sync"
type Persister struct {
mu sync.Mutex
raftstate []byte
snapshot []byte
}
func MakePersister() *Persister {
return &Persister{}
}
func clone(orig []byte) []byte {
x := make([]byte, len(orig))
copy(x, orig)
return x
}
func (ps *Persister) Copy() *Persister {
ps.mu.Lock()
defer ps.mu.Unlock()
np := MakePersister()
np.raftstate = ps.raftstate
np.snapshot = ps.snapshot
return np
}
func (ps *Persister) SaveRaftState(state []byte) {
ps.mu.Lock()
defer ps.mu.Unlock()
ps.raftstate = clone(state)
}
func (ps *Persister) ReadRaftState() []byte {
ps.mu.Lock()
defer ps.mu.Unlock()
return clone(ps.raftstate)
}
func (ps *Persister) RaftStateSize() int {
ps.mu.Lock()
defer ps.mu.Unlock()
return len(ps.raftstate)
}
// Save both Raft state and K/V snapshot as a single atomic action,
// to help avoid them getting out of sync.
func (ps *Persister) SaveStateAndSnapshot(state []byte, snapshot []byte) {
ps.mu.Lock()
defer ps.mu.Unlock()
ps.raftstate = clone(state)
ps.snapshot = clone(snapshot)
}
func (ps *Persister) ReadSnapshot() []byte {
ps.mu.Lock()
defer ps.mu.Unlock()
return clone(ps.snapshot)
}
func (ps *Persister) SnapshotSize() int {
ps.mu.Lock()
defer ps.mu.Unlock()
return len(ps.snapshot)
}

284
src/raft/raft.go Normal file
View File

@ -0,0 +1,284 @@
package raft
//
// this is an outline of the API that raft must expose to
// the service (or tester). see comments below for
// each of these functions for more details.
//
// rf = Make(...)
// create a new Raft server.
// rf.Start(command interface{}) (index, term, isleader)
// start agreement on a new log entry
// rf.GetState() (term, isLeader)
// ask a Raft for its current term, and whether it thinks it is leader
// ApplyMsg
// each time a new entry is committed to the log, each Raft peer
// should send an ApplyMsg to the service (or tester)
// in the same server.
//
import (
// "bytes"
"sync"
"sync/atomic"
// "6.824/labgob"
"6.824/labrpc"
)
//
// as each Raft peer becomes aware that successive log entries are
// committed, the peer should send an ApplyMsg to the service (or
// tester) on the same server, via the applyCh passed to Make(). set
// CommandValid to true to indicate that the ApplyMsg contains a newly
// committed log entry.
//
// in part 2D you'll want to send other kinds of messages (e.g.,
// snapshots) on the applyCh, but set CommandValid to false for these
// other uses.
//
type ApplyMsg struct {
CommandValid bool
Command interface{}
CommandIndex int
// For 2D:
SnapshotValid bool
Snapshot []byte
SnapshotTerm int
SnapshotIndex int
}
//
// A Go object implementing a single Raft peer.
//
type Raft struct {
mu sync.Mutex // Lock to protect shared access to this peer's state
peers []*labrpc.ClientEnd // RPC end points of all peers
persister *Persister // Object to hold this peer's persisted state
me int // this peer's index into peers[]
dead int32 // set by Kill()
// Your data here (2A, 2B, 2C).
// Look at the paper's Figure 2 for a description of what
// state a Raft server must maintain.
}
// return currentTerm and whether this server
// believes it is the leader.
func (rf *Raft) GetState() (int, bool) {
var term int
var isleader bool
// Your code here (2A).
return term, isleader
}
//
// save Raft's persistent state to stable storage,
// where it can later be retrieved after a crash and restart.
// see paper's Figure 2 for a description of what should be persistent.
//
func (rf *Raft) persist() {
// Your code here (2C).
// Example:
// w := new(bytes.Buffer)
// e := labgob.NewEncoder(w)
// e.Encode(rf.xxx)
// e.Encode(rf.yyy)
// data := w.Bytes()
// rf.persister.SaveRaftState(data)
}
//
// restore previously persisted state.
//
func (rf *Raft) readPersist(data []byte) {
if data == nil || len(data) < 1 { // bootstrap without any state?
return
}
// Your code here (2C).
// Example:
// r := bytes.NewBuffer(data)
// d := labgob.NewDecoder(r)
// var xxx
// var yyy
// if d.Decode(&xxx) != nil ||
// d.Decode(&yyy) != nil {
// error...
// } else {
// rf.xxx = xxx
// rf.yyy = yyy
// }
}
//
// A service wants to switch to snapshot. Only do so if Raft hasn't
// have more recent info since it communicate the snapshot on applyCh.
//
func (rf *Raft) CondInstallSnapshot(lastIncludedTerm int, lastIncludedIndex int, snapshot []byte) bool {
// Your code here (2D).
return true
}
// the service says it has created a snapshot that has
// all info up to and including index. this means the
// service no longer needs the log through (and including)
// that index. Raft should now trim its log as much as possible.
func (rf *Raft) Snapshot(index int, snapshot []byte) {
// Your code here (2D).
}
//
// example RequestVote RPC arguments structure.
// field names must start with capital letters!
//
type RequestVoteArgs struct {
// Your data here (2A, 2B).
}
//
// example RequestVote RPC reply structure.
// field names must start with capital letters!
//
type RequestVoteReply struct {
// Your data here (2A).
}
//
// example RequestVote RPC handler.
//
func (rf *Raft) RequestVote(args *RequestVoteArgs, reply *RequestVoteReply) {
// Your code here (2A, 2B).
}
//
// example code to send a RequestVote RPC to a server.
// server is the index of the target server in rf.peers[].
// expects RPC arguments in args.
// fills in *reply with RPC reply, so caller should
// pass &reply.
// the types of the args and reply passed to Call() must be
// the same as the types of the arguments declared in the
// handler function (including whether they are pointers).
//
// The labrpc package simulates a lossy network, in which servers
// may be unreachable, and in which requests and replies may be lost.
// Call() sends a request and waits for a reply. If a reply arrives
// within a timeout interval, Call() returns true; otherwise
// Call() returns false. Thus Call() may not return for a while.
// A false return can be caused by a dead server, a live server that
// can't be reached, a lost request, or a lost reply.
//
// Call() is guaranteed to return (perhaps after a delay) *except* if the
// handler function on the server side does not return. Thus there
// is no need to implement your own timeouts around Call().
//
// look at the comments in ../labrpc/labrpc.go for more details.
//
// if you're having trouble getting RPC to work, check that you've
// capitalized all field names in structs passed over RPC, and
// that the caller passes the address of the reply struct with &, not
// the struct itself.
//
func (rf *Raft) sendRequestVote(server int, args *RequestVoteArgs, reply *RequestVoteReply) bool {
ok := rf.peers[server].Call("Raft.RequestVote", args, reply)
return ok
}
//
// the service using Raft (e.g. a k/v server) wants to start
// agreement on the next command to be appended to Raft's log. if this
// server isn't the leader, returns false. otherwise start the
// agreement and return immediately. there is no guarantee that this
// command will ever be committed to the Raft log, since the leader
// may fail or lose an election. even if the Raft instance has been killed,
// this function should return gracefully.
//
// the first return value is the index that the command will appear at
// if it's ever committed. the second return value is the current
// term. the third return value is true if this server believes it is
// the leader.
//
func (rf *Raft) Start(command interface{}) (int, int, bool) {
index := -1
term := -1
isLeader := true
// Your code here (2B).
return index, term, isLeader
}
//
// the tester doesn't halt goroutines created by Raft after each test,
// but it does call the Kill() method. your code can use killed() to
// check whether Kill() has been called. the use of atomic avoids the
// need for a lock.
//
// the issue is that long-running goroutines use memory and may chew
// up CPU time, perhaps causing later tests to fail and generating
// confusing debug output. any goroutine with a long-running loop
// should call killed() to check whether it should stop.
//
func (rf *Raft) Kill() {
atomic.StoreInt32(&rf.dead, 1)
// Your code here, if desired.
}
func (rf *Raft) killed() bool {
z := atomic.LoadInt32(&rf.dead)
return z == 1
}
// The ticker go routine starts a new election if this peer hasn't received
// heartsbeats recently.
func (rf *Raft) ticker() {
for rf.killed() == false {
// Your code here to check if a leader election should
// be started and to randomize sleeping time using
// time.Sleep().
}
}
//
// the service or tester wants to create a Raft server. the ports
// of all the Raft servers (including this one) are in peers[]. this
// server's port is peers[me]. all the servers' peers[] arrays
// have the same order. persister is a place for this server to
// save its persistent state, and also initially holds the most
// recent saved state, if any. applyCh is a channel on which the
// tester or service expects Raft to send ApplyMsg messages.
// Make() must return quickly, so it should start goroutines
// for any long-running work.
//
func Make(peers []*labrpc.ClientEnd, me int,
persister *Persister, applyCh chan ApplyMsg) *Raft {
rf := &Raft{}
rf.peers = peers
rf.persister = persister
rf.me = me
// Your initialization code here (2A, 2B, 2C).
// initialize from state persisted before a crash
rf.readPersist(persister.ReadRaftState())
// start ticker goroutine to start elections
go rf.ticker()
return rf
}

1086
src/raft/test_test.go Normal file

File diff suppressed because it is too large Load Diff

13
src/raft/util.go Normal file
View File

@ -0,0 +1,13 @@
package raft
import "log"
// Debugging
const Debug = false
func DPrintf(format string, a ...interface{}) (n int, err error) {
if Debug {
log.Printf(format, a...)
}
return
}

101
src/shardctrler/client.go Normal file
View File

@ -0,0 +1,101 @@
package shardctrler
//
// Shardctrler clerk.
//
import "6.824/labrpc"
import "time"
import "crypto/rand"
import "math/big"
type Clerk struct {
servers []*labrpc.ClientEnd
// Your data here.
}
func nrand() int64 {
max := big.NewInt(int64(1) << 62)
bigx, _ := rand.Int(rand.Reader, max)
x := bigx.Int64()
return x
}
func MakeClerk(servers []*labrpc.ClientEnd) *Clerk {
ck := new(Clerk)
ck.servers = servers
// Your code here.
return ck
}
func (ck *Clerk) Query(num int) Config {
args := &QueryArgs{}
// Your code here.
args.Num = num
for {
// try each known server.
for _, srv := range ck.servers {
var reply QueryReply
ok := srv.Call("ShardCtrler.Query", args, &reply)
if ok && reply.WrongLeader == false {
return reply.Config
}
}
time.Sleep(100 * time.Millisecond)
}
}
func (ck *Clerk) Join(servers map[int][]string) {
args := &JoinArgs{}
// Your code here.
args.Servers = servers
for {
// try each known server.
for _, srv := range ck.servers {
var reply JoinReply
ok := srv.Call("ShardCtrler.Join", args, &reply)
if ok && reply.WrongLeader == false {
return
}
}
time.Sleep(100 * time.Millisecond)
}
}
func (ck *Clerk) Leave(gids []int) {
args := &LeaveArgs{}
// Your code here.
args.GIDs = gids
for {
// try each known server.
for _, srv := range ck.servers {
var reply LeaveReply
ok := srv.Call("ShardCtrler.Leave", args, &reply)
if ok && reply.WrongLeader == false {
return
}
}
time.Sleep(100 * time.Millisecond)
}
}
func (ck *Clerk) Move(shard int, gid int) {
args := &MoveArgs{}
// Your code here.
args.Shard = shard
args.GID = gid
for {
// try each known server.
for _, srv := range ck.servers {
var reply MoveReply
ok := srv.Call("ShardCtrler.Move", args, &reply)
if ok && reply.WrongLeader == false {
return
}
}
time.Sleep(100 * time.Millisecond)
}
}

73
src/shardctrler/common.go Normal file
View File

@ -0,0 +1,73 @@
package shardctrler
//
// Shard controler: assigns shards to replication groups.
//
// RPC interface:
// Join(servers) -- add a set of groups (gid -> server-list mapping).
// Leave(gids) -- delete a set of groups.
// Move(shard, gid) -- hand off one shard from current owner to gid.
// Query(num) -> fetch Config # num, or latest config if num==-1.
//
// A Config (configuration) describes a set of replica groups, and the
// replica group responsible for each shard. Configs are numbered. Config
// #0 is the initial configuration, with no groups and all shards
// assigned to group 0 (the invalid group).
//
// You will need to add fields to the RPC argument structs.
//
// The number of shards.
const NShards = 10
// A configuration -- an assignment of shards to groups.
// Please don't change this.
type Config struct {
Num int // config number
Shards [NShards]int // shard -> gid
Groups map[int][]string // gid -> servers[]
}
const (
OK = "OK"
)
type Err string
type JoinArgs struct {
Servers map[int][]string // new GID -> servers mappings
}
type JoinReply struct {
WrongLeader bool
Err Err
}
type LeaveArgs struct {
GIDs []int
}
type LeaveReply struct {
WrongLeader bool
Err Err
}
type MoveArgs struct {
Shard int
GID int
}
type MoveReply struct {
WrongLeader bool
Err Err
}
type QueryArgs struct {
Num int // desired config number
}
type QueryReply struct {
WrongLeader bool
Err Err
Config Config
}

357
src/shardctrler/config.go Normal file
View File

@ -0,0 +1,357 @@
package shardctrler
import "6.824/labrpc"
import "6.824/raft"
import "testing"
import "os"
// import "log"
import crand "crypto/rand"
import "math/rand"
import "encoding/base64"
import "sync"
import "runtime"
import "time"
func randstring(n int) string {
b := make([]byte, 2*n)
crand.Read(b)
s := base64.URLEncoding.EncodeToString(b)
return s[0:n]
}
// Randomize server handles
func random_handles(kvh []*labrpc.ClientEnd) []*labrpc.ClientEnd {
sa := make([]*labrpc.ClientEnd, len(kvh))
copy(sa, kvh)
for i := range sa {
j := rand.Intn(i + 1)
sa[i], sa[j] = sa[j], sa[i]
}
return sa
}
type config struct {
mu sync.Mutex
t *testing.T
net *labrpc.Network
n int
servers []*ShardCtrler
saved []*raft.Persister
endnames [][]string // names of each server's sending ClientEnds
clerks map[*Clerk][]string
nextClientId int
start time.Time // time at which make_config() was called
}
func (cfg *config) checkTimeout() {
// enforce a two minute real-time limit on each test
if !cfg.t.Failed() && time.Since(cfg.start) > 120*time.Second {
cfg.t.Fatal("test took longer than 120 seconds")
}
}
func (cfg *config) cleanup() {
cfg.mu.Lock()
defer cfg.mu.Unlock()
for i := 0; i < len(cfg.servers); i++ {
if cfg.servers[i] != nil {
cfg.servers[i].Kill()
}
}
cfg.net.Cleanup()
cfg.checkTimeout()
}
// Maximum log size across all servers
func (cfg *config) LogSize() int {
logsize := 0
for i := 0; i < cfg.n; i++ {
n := cfg.saved[i].RaftStateSize()
if n > logsize {
logsize = n
}
}
return logsize
}
// attach server i to servers listed in to
// caller must hold cfg.mu
func (cfg *config) connectUnlocked(i int, to []int) {
// log.Printf("connect peer %d to %v\n", i, to)
// outgoing socket files
for j := 0; j < len(to); j++ {
endname := cfg.endnames[i][to[j]]
cfg.net.Enable(endname, true)
}
// incoming socket files
for j := 0; j < len(to); j++ {
endname := cfg.endnames[to[j]][i]
cfg.net.Enable(endname, true)
}
}
func (cfg *config) connect(i int, to []int) {
cfg.mu.Lock()
defer cfg.mu.Unlock()
cfg.connectUnlocked(i, to)
}
// detach server i from the servers listed in from
// caller must hold cfg.mu
func (cfg *config) disconnectUnlocked(i int, from []int) {
// log.Printf("disconnect peer %d from %v\n", i, from)
// outgoing socket files
for j := 0; j < len(from); j++ {
if cfg.endnames[i] != nil {
endname := cfg.endnames[i][from[j]]
cfg.net.Enable(endname, false)
}
}
// incoming socket files
for j := 0; j < len(from); j++ {
if cfg.endnames[j] != nil {
endname := cfg.endnames[from[j]][i]
cfg.net.Enable(endname, false)
}
}
}
func (cfg *config) disconnect(i int, from []int) {
cfg.mu.Lock()
defer cfg.mu.Unlock()
cfg.disconnectUnlocked(i, from)
}
func (cfg *config) All() []int {
all := make([]int, cfg.n)
for i := 0; i < cfg.n; i++ {
all[i] = i
}
return all
}
func (cfg *config) ConnectAll() {
cfg.mu.Lock()
defer cfg.mu.Unlock()
for i := 0; i < cfg.n; i++ {
cfg.connectUnlocked(i, cfg.All())
}
}
// Sets up 2 partitions with connectivity between servers in each partition.
func (cfg *config) partition(p1 []int, p2 []int) {
cfg.mu.Lock()
defer cfg.mu.Unlock()
// log.Printf("partition servers into: %v %v\n", p1, p2)
for i := 0; i < len(p1); i++ {
cfg.disconnectUnlocked(p1[i], p2)
cfg.connectUnlocked(p1[i], p1)
}
for i := 0; i < len(p2); i++ {
cfg.disconnectUnlocked(p2[i], p1)
cfg.connectUnlocked(p2[i], p2)
}
}
// Create a clerk with clerk specific server names.
// Give it connections to all of the servers, but for
// now enable only connections to servers in to[].
func (cfg *config) makeClient(to []int) *Clerk {
cfg.mu.Lock()
defer cfg.mu.Unlock()
// a fresh set of ClientEnds.
ends := make([]*labrpc.ClientEnd, cfg.n)
endnames := make([]string, cfg.n)
for j := 0; j < cfg.n; j++ {
endnames[j] = randstring(20)
ends[j] = cfg.net.MakeEnd(endnames[j])
cfg.net.Connect(endnames[j], j)
}
ck := MakeClerk(random_handles(ends))
cfg.clerks[ck] = endnames
cfg.nextClientId++
cfg.ConnectClientUnlocked(ck, to)
return ck
}
func (cfg *config) deleteClient(ck *Clerk) {
cfg.mu.Lock()
defer cfg.mu.Unlock()
v := cfg.clerks[ck]
for i := 0; i < len(v); i++ {
os.Remove(v[i])
}
delete(cfg.clerks, ck)
}
// caller should hold cfg.mu
func (cfg *config) ConnectClientUnlocked(ck *Clerk, to []int) {
// log.Printf("ConnectClient %v to %v\n", ck, to)
endnames := cfg.clerks[ck]
for j := 0; j < len(to); j++ {
s := endnames[to[j]]
cfg.net.Enable(s, true)
}
}
func (cfg *config) ConnectClient(ck *Clerk, to []int) {
cfg.mu.Lock()
defer cfg.mu.Unlock()
cfg.ConnectClientUnlocked(ck, to)
}
// caller should hold cfg.mu
func (cfg *config) DisconnectClientUnlocked(ck *Clerk, from []int) {
// log.Printf("DisconnectClient %v from %v\n", ck, from)
endnames := cfg.clerks[ck]
for j := 0; j < len(from); j++ {
s := endnames[from[j]]
cfg.net.Enable(s, false)
}
}
func (cfg *config) DisconnectClient(ck *Clerk, from []int) {
cfg.mu.Lock()
defer cfg.mu.Unlock()
cfg.DisconnectClientUnlocked(ck, from)
}
// Shutdown a server by isolating it
func (cfg *config) ShutdownServer(i int) {
cfg.mu.Lock()
defer cfg.mu.Unlock()
cfg.disconnectUnlocked(i, cfg.All())
// disable client connections to the server.
// it's important to do this before creating
// the new Persister in saved[i], to avoid
// the possibility of the server returning a
// positive reply to an Append but persisting
// the result in the superseded Persister.
cfg.net.DeleteServer(i)
// a fresh persister, in case old instance
// continues to update the Persister.
// but copy old persister's content so that we always
// pass Make() the last persisted state.
if cfg.saved[i] != nil {
cfg.saved[i] = cfg.saved[i].Copy()
}
kv := cfg.servers[i]
if kv != nil {
cfg.mu.Unlock()
kv.Kill()
cfg.mu.Lock()
cfg.servers[i] = nil
}
}
// If restart servers, first call ShutdownServer
func (cfg *config) StartServer(i int) {
cfg.mu.Lock()
// a fresh set of outgoing ClientEnd names.
cfg.endnames[i] = make([]string, cfg.n)
for j := 0; j < cfg.n; j++ {
cfg.endnames[i][j] = randstring(20)
}
// a fresh set of ClientEnds.
ends := make([]*labrpc.ClientEnd, cfg.n)
for j := 0; j < cfg.n; j++ {
ends[j] = cfg.net.MakeEnd(cfg.endnames[i][j])
cfg.net.Connect(cfg.endnames[i][j], j)
}
// a fresh persister, so old instance doesn't overwrite
// new instance's persisted state.
// give the fresh persister a copy of the old persister's
// state, so that the spec is that we pass StartKVServer()
// the last persisted state.
if cfg.saved[i] != nil {
cfg.saved[i] = cfg.saved[i].Copy()
} else {
cfg.saved[i] = raft.MakePersister()
}
cfg.mu.Unlock()
cfg.servers[i] = StartServer(ends, i, cfg.saved[i])
kvsvc := labrpc.MakeService(cfg.servers[i])
rfsvc := labrpc.MakeService(cfg.servers[i].rf)
srv := labrpc.MakeServer()
srv.AddService(kvsvc)
srv.AddService(rfsvc)
cfg.net.AddServer(i, srv)
}
func (cfg *config) Leader() (bool, int) {
cfg.mu.Lock()
defer cfg.mu.Unlock()
for i := 0; i < cfg.n; i++ {
if cfg.servers[i] != nil {
_, is_leader := cfg.servers[i].rf.GetState()
if is_leader {
return true, i
}
}
}
return false, 0
}
// Partition servers into 2 groups and put current leader in minority
func (cfg *config) make_partition() ([]int, []int) {
_, l := cfg.Leader()
p1 := make([]int, cfg.n/2+1)
p2 := make([]int, cfg.n/2)
j := 0
for i := 0; i < cfg.n; i++ {
if i != l {
if j < len(p1) {
p1[j] = i
} else {
p2[j-len(p1)] = i
}
j++
}
}
p2[len(p2)-1] = l
return p1, p2
}
func make_config(t *testing.T, n int, unreliable bool) *config {
runtime.GOMAXPROCS(4)
cfg := &config{}
cfg.t = t
cfg.net = labrpc.MakeNetwork()
cfg.n = n
cfg.servers = make([]*ShardCtrler, cfg.n)
cfg.saved = make([]*raft.Persister, cfg.n)
cfg.endnames = make([][]string, cfg.n)
cfg.clerks = make(map[*Clerk][]string)
cfg.nextClientId = cfg.n + 1000 // client ids start 1000 above the highest serverid
cfg.start = time.Now()
// create a full set of KV servers.
for i := 0; i < cfg.n; i++ {
cfg.StartServer(i)
}
cfg.ConnectAll()
cfg.net.Reliable(!unreliable)
return cfg
}

80
src/shardctrler/server.go Normal file
View File

@ -0,0 +1,80 @@
package shardctrler
import "6.824/raft"
import "6.824/labrpc"
import "sync"
import "6.824/labgob"
type ShardCtrler struct {
mu sync.Mutex
me int
rf *raft.Raft
applyCh chan raft.ApplyMsg
// Your data here.
configs []Config // indexed by config num
}
type Op struct {
// Your data here.
}
func (sc *ShardCtrler) Join(args *JoinArgs, reply *JoinReply) {
// Your code here.
}
func (sc *ShardCtrler) Leave(args *LeaveArgs, reply *LeaveReply) {
// Your code here.
}
func (sc *ShardCtrler) Move(args *MoveArgs, reply *MoveReply) {
// Your code here.
}
func (sc *ShardCtrler) Query(args *QueryArgs, reply *QueryReply) {
// Your code here.
}
//
// the tester calls Kill() when a ShardCtrler instance won't
// be needed again. you are not required to do anything
// in Kill(), but it might be convenient to (for example)
// turn off debug output from this instance.
//
func (sc *ShardCtrler) Kill() {
sc.rf.Kill()
// Your code here, if desired.
}
// needed by shardkv tester
func (sc *ShardCtrler) Raft() *raft.Raft {
return sc.rf
}
//
// servers[] contains the ports of the set of
// servers that will cooperate via Raft to
// form the fault-tolerant shardctrler service.
// me is the index of the current server in servers[].
//
func StartServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister) *ShardCtrler {
sc := new(ShardCtrler)
sc.me = me
sc.configs = make([]Config, 1)
sc.configs[0].Groups = map[int][]string{}
labgob.Register(Op{})
sc.applyCh = make(chan raft.ApplyMsg)
sc.rf = raft.Make(servers, me, persister, sc.applyCh)
// Your code here.
return sc
}

View File

@ -0,0 +1,403 @@
package shardctrler
import (
"fmt"
"sync"
"testing"
"time"
)
// import "time"
func check(t *testing.T, groups []int, ck *Clerk) {
c := ck.Query(-1)
if len(c.Groups) != len(groups) {
t.Fatalf("wanted %v groups, got %v", len(groups), len(c.Groups))
}
// are the groups as expected?
for _, g := range groups {
_, ok := c.Groups[g]
if ok != true {
t.Fatalf("missing group %v", g)
}
}
// any un-allocated shards?
if len(groups) > 0 {
for s, g := range c.Shards {
_, ok := c.Groups[g]
if ok == false {
t.Fatalf("shard %v -> invalid group %v", s, g)
}
}
}
// more or less balanced sharding?
counts := map[int]int{}
for _, g := range c.Shards {
counts[g] += 1
}
min := 257
max := 0
for g, _ := range c.Groups {
if counts[g] > max {
max = counts[g]
}
if counts[g] < min {
min = counts[g]
}
}
if max > min+1 {
t.Fatalf("max %v too much larger than min %v", max, min)
}
}
func check_same_config(t *testing.T, c1 Config, c2 Config) {
if c1.Num != c2.Num {
t.Fatalf("Num wrong")
}
if c1.Shards != c2.Shards {
t.Fatalf("Shards wrong")
}
if len(c1.Groups) != len(c2.Groups) {
t.Fatalf("number of Groups is wrong")
}
for gid, sa := range c1.Groups {
sa1, ok := c2.Groups[gid]
if ok == false || len(sa1) != len(sa) {
t.Fatalf("len(Groups) wrong")
}
if ok && len(sa1) == len(sa) {
for j := 0; j < len(sa); j++ {
if sa[j] != sa1[j] {
t.Fatalf("Groups wrong")
}
}
}
}
}
func TestBasic(t *testing.T) {
const nservers = 3
cfg := make_config(t, nservers, false)
defer cfg.cleanup()
ck := cfg.makeClient(cfg.All())
fmt.Printf("Test: Basic leave/join ...\n")
cfa := make([]Config, 6)
cfa[0] = ck.Query(-1)
check(t, []int{}, ck)
var gid1 int = 1
ck.Join(map[int][]string{gid1: []string{"x", "y", "z"}})
check(t, []int{gid1}, ck)
cfa[1] = ck.Query(-1)
var gid2 int = 2
ck.Join(map[int][]string{gid2: []string{"a", "b", "c"}})
check(t, []int{gid1, gid2}, ck)
cfa[2] = ck.Query(-1)
cfx := ck.Query(-1)
sa1 := cfx.Groups[gid1]
if len(sa1) != 3 || sa1[0] != "x" || sa1[1] != "y" || sa1[2] != "z" {
t.Fatalf("wrong servers for gid %v: %v\n", gid1, sa1)
}
sa2 := cfx.Groups[gid2]
if len(sa2) != 3 || sa2[0] != "a" || sa2[1] != "b" || sa2[2] != "c" {
t.Fatalf("wrong servers for gid %v: %v\n", gid2, sa2)
}
ck.Leave([]int{gid1})
check(t, []int{gid2}, ck)
cfa[4] = ck.Query(-1)
ck.Leave([]int{gid2})
cfa[5] = ck.Query(-1)
fmt.Printf(" ... Passed\n")
fmt.Printf("Test: Historical queries ...\n")
for s := 0; s < nservers; s++ {
cfg.ShutdownServer(s)
for i := 0; i < len(cfa); i++ {
c := ck.Query(cfa[i].Num)
check_same_config(t, c, cfa[i])
}
cfg.StartServer(s)
cfg.ConnectAll()
}
fmt.Printf(" ... Passed\n")
fmt.Printf("Test: Move ...\n")
{
var gid3 int = 503
ck.Join(map[int][]string{gid3: []string{"3a", "3b", "3c"}})
var gid4 int = 504
ck.Join(map[int][]string{gid4: []string{"4a", "4b", "4c"}})
for i := 0; i < NShards; i++ {
cf := ck.Query(-1)
if i < NShards/2 {
ck.Move(i, gid3)
if cf.Shards[i] != gid3 {
cf1 := ck.Query(-1)
if cf1.Num <= cf.Num {
t.Fatalf("Move should increase Config.Num")
}
}
} else {
ck.Move(i, gid4)
if cf.Shards[i] != gid4 {
cf1 := ck.Query(-1)
if cf1.Num <= cf.Num {
t.Fatalf("Move should increase Config.Num")
}
}
}
}
cf2 := ck.Query(-1)
for i := 0; i < NShards; i++ {
if i < NShards/2 {
if cf2.Shards[i] != gid3 {
t.Fatalf("expected shard %v on gid %v actually %v",
i, gid3, cf2.Shards[i])
}
} else {
if cf2.Shards[i] != gid4 {
t.Fatalf("expected shard %v on gid %v actually %v",
i, gid4, cf2.Shards[i])
}
}
}
ck.Leave([]int{gid3})
ck.Leave([]int{gid4})
}
fmt.Printf(" ... Passed\n")
fmt.Printf("Test: Concurrent leave/join ...\n")
const npara = 10
var cka [npara]*Clerk
for i := 0; i < len(cka); i++ {
cka[i] = cfg.makeClient(cfg.All())
}
gids := make([]int, npara)
ch := make(chan bool)
for xi := 0; xi < npara; xi++ {
gids[xi] = int((xi * 10) + 100)
go func(i int) {
defer func() { ch <- true }()
var gid int = gids[i]
var sid1 = fmt.Sprintf("s%da", gid)
var sid2 = fmt.Sprintf("s%db", gid)
cka[i].Join(map[int][]string{gid + 1000: []string{sid1}})
cka[i].Join(map[int][]string{gid: []string{sid2}})
cka[i].Leave([]int{gid + 1000})
}(xi)
}
for i := 0; i < npara; i++ {
<-ch
}
check(t, gids, ck)
fmt.Printf(" ... Passed\n")
fmt.Printf("Test: Minimal transfers after joins ...\n")
c1 := ck.Query(-1)
for i := 0; i < 5; i++ {
var gid = int(npara + 1 + i)
ck.Join(map[int][]string{gid: []string{
fmt.Sprintf("%da", gid),
fmt.Sprintf("%db", gid),
fmt.Sprintf("%db", gid)}})
}
c2 := ck.Query(-1)
for i := int(1); i <= npara; i++ {
for j := 0; j < len(c1.Shards); j++ {
if c2.Shards[j] == i {
if c1.Shards[j] != i {
t.Fatalf("non-minimal transfer after Join()s")
}
}
}
}
fmt.Printf(" ... Passed\n")
fmt.Printf("Test: Minimal transfers after leaves ...\n")
for i := 0; i < 5; i++ {
ck.Leave([]int{int(npara + 1 + i)})
}
c3 := ck.Query(-1)
for i := int(1); i <= npara; i++ {
for j := 0; j < len(c1.Shards); j++ {
if c2.Shards[j] == i {
if c3.Shards[j] != i {
t.Fatalf("non-minimal transfer after Leave()s")
}
}
}
}
fmt.Printf(" ... Passed\n")
}
func TestMulti(t *testing.T) {
const nservers = 3
cfg := make_config(t, nservers, false)
defer cfg.cleanup()
ck := cfg.makeClient(cfg.All())
fmt.Printf("Test: Multi-group join/leave ...\n")
cfa := make([]Config, 6)
cfa[0] = ck.Query(-1)
check(t, []int{}, ck)
var gid1 int = 1
var gid2 int = 2
ck.Join(map[int][]string{
gid1: []string{"x", "y", "z"},
gid2: []string{"a", "b", "c"},
})
check(t, []int{gid1, gid2}, ck)
cfa[1] = ck.Query(-1)
var gid3 int = 3
ck.Join(map[int][]string{gid3: []string{"j", "k", "l"}})
check(t, []int{gid1, gid2, gid3}, ck)
cfa[2] = ck.Query(-1)
cfx := ck.Query(-1)
sa1 := cfx.Groups[gid1]
if len(sa1) != 3 || sa1[0] != "x" || sa1[1] != "y" || sa1[2] != "z" {
t.Fatalf("wrong servers for gid %v: %v\n", gid1, sa1)
}
sa2 := cfx.Groups[gid2]
if len(sa2) != 3 || sa2[0] != "a" || sa2[1] != "b" || sa2[2] != "c" {
t.Fatalf("wrong servers for gid %v: %v\n", gid2, sa2)
}
sa3 := cfx.Groups[gid3]
if len(sa3) != 3 || sa3[0] != "j" || sa3[1] != "k" || sa3[2] != "l" {
t.Fatalf("wrong servers for gid %v: %v\n", gid3, sa3)
}
ck.Leave([]int{gid1, gid3})
check(t, []int{gid2}, ck)
cfa[3] = ck.Query(-1)
cfx = ck.Query(-1)
sa2 = cfx.Groups[gid2]
if len(sa2) != 3 || sa2[0] != "a" || sa2[1] != "b" || sa2[2] != "c" {
t.Fatalf("wrong servers for gid %v: %v\n", gid2, sa2)
}
ck.Leave([]int{gid2})
fmt.Printf(" ... Passed\n")
fmt.Printf("Test: Concurrent multi leave/join ...\n")
const npara = 10
var cka [npara]*Clerk
for i := 0; i < len(cka); i++ {
cka[i] = cfg.makeClient(cfg.All())
}
gids := make([]int, npara)
var wg sync.WaitGroup
for xi := 0; xi < npara; xi++ {
wg.Add(1)
gids[xi] = int(xi + 1000)
go func(i int) {
defer wg.Done()
var gid int = gids[i]
cka[i].Join(map[int][]string{
gid: []string{
fmt.Sprintf("%da", gid),
fmt.Sprintf("%db", gid),
fmt.Sprintf("%dc", gid)},
gid + 1000: []string{fmt.Sprintf("%da", gid+1000)},
gid + 2000: []string{fmt.Sprintf("%da", gid+2000)},
})
cka[i].Leave([]int{gid + 1000, gid + 2000})
}(xi)
}
wg.Wait()
check(t, gids, ck)
fmt.Printf(" ... Passed\n")
fmt.Printf("Test: Minimal transfers after multijoins ...\n")
c1 := ck.Query(-1)
m := make(map[int][]string)
for i := 0; i < 5; i++ {
var gid = npara + 1 + i
m[gid] = []string{fmt.Sprintf("%da", gid), fmt.Sprintf("%db", gid)}
}
ck.Join(m)
c2 := ck.Query(-1)
for i := int(1); i <= npara; i++ {
for j := 0; j < len(c1.Shards); j++ {
if c2.Shards[j] == i {
if c1.Shards[j] != i {
t.Fatalf("non-minimal transfer after Join()s")
}
}
}
}
fmt.Printf(" ... Passed\n")
fmt.Printf("Test: Minimal transfers after multileaves ...\n")
var l []int
for i := 0; i < 5; i++ {
l = append(l, npara+1+i)
}
ck.Leave(l)
c3 := ck.Query(-1)
for i := int(1); i <= npara; i++ {
for j := 0; j < len(c1.Shards); j++ {
if c2.Shards[j] == i {
if c3.Shards[j] != i {
t.Fatalf("non-minimal transfer after Leave()s")
}
}
}
}
fmt.Printf(" ... Passed\n")
fmt.Printf("Test: Check Same config on servers ...\n")
isLeader, leader := cfg.Leader()
if !isLeader {
t.Fatalf("Leader not found")
}
c := ck.Query(-1) // Config leader claims
cfg.ShutdownServer(leader)
attempts := 0
for isLeader, leader = cfg.Leader(); isLeader; time.Sleep(1 * time.Second) {
if attempts++; attempts >= 3 {
t.Fatalf("Leader not found")
}
}
c1 = ck.Query(-1)
check_same_config(t, c, c1)
fmt.Printf(" ... Passed\n")
}

137
src/shardkv/client.go Normal file
View File

@ -0,0 +1,137 @@
package shardkv
//
// client code to talk to a sharded key/value service.
//
// the client first talks to the shardctrler to find out
// the assignment of shards (keys) to groups, and then
// talks to the group that holds the key's shard.
//
import "6.824/labrpc"
import "crypto/rand"
import "math/big"
import "6.824/shardctrler"
import "time"
//
// which shard is a key in?
// please use this function,
// and please do not change it.
//
func key2shard(key string) int {
shard := 0
if len(key) > 0 {
shard = int(key[0])
}
shard %= shardctrler.NShards
return shard
}
func nrand() int64 {
max := big.NewInt(int64(1) << 62)
bigx, _ := rand.Int(rand.Reader, max)
x := bigx.Int64()
return x
}
type Clerk struct {
sm *shardctrler.Clerk
config shardctrler.Config
make_end func(string) *labrpc.ClientEnd
// You will have to modify this struct.
}
//
// the tester calls MakeClerk.
//
// ctrlers[] is needed to call shardctrler.MakeClerk().
//
// make_end(servername) turns a server name from a
// Config.Groups[gid][i] into a labrpc.ClientEnd on which you can
// send RPCs.
//
func MakeClerk(ctrlers []*labrpc.ClientEnd, make_end func(string) *labrpc.ClientEnd) *Clerk {
ck := new(Clerk)
ck.sm = shardctrler.MakeClerk(ctrlers)
ck.make_end = make_end
// You'll have to add code here.
return ck
}
//
// fetch the current value for a key.
// returns "" if the key does not exist.
// keeps trying forever in the face of all other errors.
// You will have to modify this function.
//
func (ck *Clerk) Get(key string) string {
args := GetArgs{}
args.Key = key
for {
shard := key2shard(key)
gid := ck.config.Shards[shard]
if servers, ok := ck.config.Groups[gid]; ok {
// try each server for the shard.
for si := 0; si < len(servers); si++ {
srv := ck.make_end(servers[si])
var reply GetReply
ok := srv.Call("ShardKV.Get", &args, &reply)
if ok && (reply.Err == OK || reply.Err == ErrNoKey) {
return reply.Value
}
if ok && (reply.Err == ErrWrongGroup) {
break
}
// ... not ok, or ErrWrongLeader
}
}
time.Sleep(100 * time.Millisecond)
// ask controler for the latest configuration.
ck.config = ck.sm.Query(-1)
}
return ""
}
//
// shared by Put and Append.
// You will have to modify this function.
//
func (ck *Clerk) PutAppend(key string, value string, op string) {
args := PutAppendArgs{}
args.Key = key
args.Value = value
args.Op = op
for {
shard := key2shard(key)
gid := ck.config.Shards[shard]
if servers, ok := ck.config.Groups[gid]; ok {
for si := 0; si < len(servers); si++ {
srv := ck.make_end(servers[si])
var reply PutAppendReply
ok := srv.Call("ShardKV.PutAppend", &args, &reply)
if ok && reply.Err == OK {
return
}
if ok && reply.Err == ErrWrongGroup {
break
}
// ... not ok, or ErrWrongLeader
}
}
time.Sleep(100 * time.Millisecond)
// ask controler for the latest configuration.
ck.config = ck.sm.Query(-1)
}
}
func (ck *Clerk) Put(key string, value string) {
ck.PutAppend(key, value, "Put")
}
func (ck *Clerk) Append(key string, value string) {
ck.PutAppend(key, value, "Append")
}

44
src/shardkv/common.go Normal file
View File

@ -0,0 +1,44 @@
package shardkv
//
// Sharded key/value server.
// Lots of replica groups, each running Raft.
// Shardctrler decides which group serves each shard.
// Shardctrler may change shard assignment from time to time.
//
// You will have to modify these definitions.
//
const (
OK = "OK"
ErrNoKey = "ErrNoKey"
ErrWrongGroup = "ErrWrongGroup"
ErrWrongLeader = "ErrWrongLeader"
)
type Err string
// Put or Append
type PutAppendArgs struct {
// You'll have to add definitions here.
Key string
Value string
Op string // "Put" or "Append"
// You'll have to add definitions here.
// Field names must start with capital letters,
// otherwise RPC will break.
}
type PutAppendReply struct {
Err Err
}
type GetArgs struct {
Key string
// You'll have to add definitions here.
}
type GetReply struct {
Err Err
Value string
}

382
src/shardkv/config.go Normal file
View File

@ -0,0 +1,382 @@
package shardkv
import "6.824/shardctrler"
import "6.824/labrpc"
import "testing"
import "os"
// import "log"
import crand "crypto/rand"
import "math/big"
import "math/rand"
import "encoding/base64"
import "sync"
import "runtime"
import "6.824/raft"
import "strconv"
import "fmt"
import "time"
func randstring(n int) string {
b := make([]byte, 2*n)
crand.Read(b)
s := base64.URLEncoding.EncodeToString(b)
return s[0:n]
}
func makeSeed() int64 {
max := big.NewInt(int64(1) << 62)
bigx, _ := crand.Int(crand.Reader, max)
x := bigx.Int64()
return x
}
// Randomize server handles
func random_handles(kvh []*labrpc.ClientEnd) []*labrpc.ClientEnd {
sa := make([]*labrpc.ClientEnd, len(kvh))
copy(sa, kvh)
for i := range sa {
j := rand.Intn(i + 1)
sa[i], sa[j] = sa[j], sa[i]
}
return sa
}
type group struct {
gid int
servers []*ShardKV
saved []*raft.Persister
endnames [][]string
mendnames [][]string
}
type config struct {
mu sync.Mutex
t *testing.T
net *labrpc.Network
start time.Time // time at which make_config() was called
nctrlers int
ctrlerservers []*shardctrler.ShardCtrler
mck *shardctrler.Clerk
ngroups int
n int // servers per k/v group
groups []*group
clerks map[*Clerk][]string
nextClientId int
maxraftstate int
}
func (cfg *config) checkTimeout() {
// enforce a two minute real-time limit on each test
if !cfg.t.Failed() && time.Since(cfg.start) > 120*time.Second {
cfg.t.Fatal("test took longer than 120 seconds")
}
}
func (cfg *config) cleanup() {
for gi := 0; gi < cfg.ngroups; gi++ {
cfg.ShutdownGroup(gi)
}
for i := 0; i < cfg.nctrlers; i++ {
cfg.ctrlerservers[i].Kill()
}
cfg.net.Cleanup()
cfg.checkTimeout()
}
// check that no server's log is too big.
func (cfg *config) checklogs() {
for gi := 0; gi < cfg.ngroups; gi++ {
for i := 0; i < cfg.n; i++ {
raft := cfg.groups[gi].saved[i].RaftStateSize()
snap := len(cfg.groups[gi].saved[i].ReadSnapshot())
if cfg.maxraftstate >= 0 && raft > 8*cfg.maxraftstate {
cfg.t.Fatalf("persister.RaftStateSize() %v, but maxraftstate %v",
raft, cfg.maxraftstate)
}
if cfg.maxraftstate < 0 && snap > 0 {
cfg.t.Fatalf("maxraftstate is -1, but snapshot is non-empty!")
}
}
}
}
// controler server name for labrpc.
func (cfg *config) ctrlername(i int) string {
return "ctrler" + strconv.Itoa(i)
}
// shard server name for labrpc.
// i'th server of group gid.
func (cfg *config) servername(gid int, i int) string {
return "server-" + strconv.Itoa(gid) + "-" + strconv.Itoa(i)
}
func (cfg *config) makeClient() *Clerk {
cfg.mu.Lock()
defer cfg.mu.Unlock()
// ClientEnds to talk to controler service.
ends := make([]*labrpc.ClientEnd, cfg.nctrlers)
endnames := make([]string, cfg.n)
for j := 0; j < cfg.nctrlers; j++ {
endnames[j] = randstring(20)
ends[j] = cfg.net.MakeEnd(endnames[j])
cfg.net.Connect(endnames[j], cfg.ctrlername(j))
cfg.net.Enable(endnames[j], true)
}
ck := MakeClerk(ends, func(servername string) *labrpc.ClientEnd {
name := randstring(20)
end := cfg.net.MakeEnd(name)
cfg.net.Connect(name, servername)
cfg.net.Enable(name, true)
return end
})
cfg.clerks[ck] = endnames
cfg.nextClientId++
return ck
}
func (cfg *config) deleteClient(ck *Clerk) {
cfg.mu.Lock()
defer cfg.mu.Unlock()
v := cfg.clerks[ck]
for i := 0; i < len(v); i++ {
os.Remove(v[i])
}
delete(cfg.clerks, ck)
}
// Shutdown i'th server of gi'th group, by isolating it
func (cfg *config) ShutdownServer(gi int, i int) {
cfg.mu.Lock()
defer cfg.mu.Unlock()
gg := cfg.groups[gi]
// prevent this server from sending
for j := 0; j < len(gg.servers); j++ {
name := gg.endnames[i][j]
cfg.net.Enable(name, false)
}
for j := 0; j < len(gg.mendnames[i]); j++ {
name := gg.mendnames[i][j]
cfg.net.Enable(name, false)
}
// disable client connections to the server.
// it's important to do this before creating
// the new Persister in saved[i], to avoid
// the possibility of the server returning a
// positive reply to an Append but persisting
// the result in the superseded Persister.
cfg.net.DeleteServer(cfg.servername(gg.gid, i))
// a fresh persister, in case old instance
// continues to update the Persister.
// but copy old persister's content so that we always
// pass Make() the last persisted state.
if gg.saved[i] != nil {
gg.saved[i] = gg.saved[i].Copy()
}
kv := gg.servers[i]
if kv != nil {
cfg.mu.Unlock()
kv.Kill()
cfg.mu.Lock()
gg.servers[i] = nil
}
}
func (cfg *config) ShutdownGroup(gi int) {
for i := 0; i < cfg.n; i++ {
cfg.ShutdownServer(gi, i)
}
}
// start i'th server in gi'th group
func (cfg *config) StartServer(gi int, i int) {
cfg.mu.Lock()
gg := cfg.groups[gi]
// a fresh set of outgoing ClientEnd names
// to talk to other servers in this group.
gg.endnames[i] = make([]string, cfg.n)
for j := 0; j < cfg.n; j++ {
gg.endnames[i][j] = randstring(20)
}
// and the connections to other servers in this group.
ends := make([]*labrpc.ClientEnd, cfg.n)
for j := 0; j < cfg.n; j++ {
ends[j] = cfg.net.MakeEnd(gg.endnames[i][j])
cfg.net.Connect(gg.endnames[i][j], cfg.servername(gg.gid, j))
cfg.net.Enable(gg.endnames[i][j], true)
}
// ends to talk to shardctrler service
mends := make([]*labrpc.ClientEnd, cfg.nctrlers)
gg.mendnames[i] = make([]string, cfg.nctrlers)
for j := 0; j < cfg.nctrlers; j++ {
gg.mendnames[i][j] = randstring(20)
mends[j] = cfg.net.MakeEnd(gg.mendnames[i][j])
cfg.net.Connect(gg.mendnames[i][j], cfg.ctrlername(j))
cfg.net.Enable(gg.mendnames[i][j], true)
}
// a fresh persister, so old instance doesn't overwrite
// new instance's persisted state.
// give the fresh persister a copy of the old persister's
// state, so that the spec is that we pass StartKVServer()
// the last persisted state.
if gg.saved[i] != nil {
gg.saved[i] = gg.saved[i].Copy()
} else {
gg.saved[i] = raft.MakePersister()
}
cfg.mu.Unlock()
gg.servers[i] = StartServer(ends, i, gg.saved[i], cfg.maxraftstate,
gg.gid, mends,
func(servername string) *labrpc.ClientEnd {
name := randstring(20)
end := cfg.net.MakeEnd(name)
cfg.net.Connect(name, servername)
cfg.net.Enable(name, true)
return end
})
kvsvc := labrpc.MakeService(gg.servers[i])
rfsvc := labrpc.MakeService(gg.servers[i].rf)
srv := labrpc.MakeServer()
srv.AddService(kvsvc)
srv.AddService(rfsvc)
cfg.net.AddServer(cfg.servername(gg.gid, i), srv)
}
func (cfg *config) StartGroup(gi int) {
for i := 0; i < cfg.n; i++ {
cfg.StartServer(gi, i)
}
}
func (cfg *config) StartCtrlerserver(i int) {
// ClientEnds to talk to other controler replicas.
ends := make([]*labrpc.ClientEnd, cfg.nctrlers)
for j := 0; j < cfg.nctrlers; j++ {
endname := randstring(20)
ends[j] = cfg.net.MakeEnd(endname)
cfg.net.Connect(endname, cfg.ctrlername(j))
cfg.net.Enable(endname, true)
}
p := raft.MakePersister()
cfg.ctrlerservers[i] = shardctrler.StartServer(ends, i, p)
msvc := labrpc.MakeService(cfg.ctrlerservers[i])
rfsvc := labrpc.MakeService(cfg.ctrlerservers[i].Raft())
srv := labrpc.MakeServer()
srv.AddService(msvc)
srv.AddService(rfsvc)
cfg.net.AddServer(cfg.ctrlername(i), srv)
}
func (cfg *config) shardclerk() *shardctrler.Clerk {
// ClientEnds to talk to ctrler service.
ends := make([]*labrpc.ClientEnd, cfg.nctrlers)
for j := 0; j < cfg.nctrlers; j++ {
name := randstring(20)
ends[j] = cfg.net.MakeEnd(name)
cfg.net.Connect(name, cfg.ctrlername(j))
cfg.net.Enable(name, true)
}
return shardctrler.MakeClerk(ends)
}
// tell the shardctrler that a group is joining.
func (cfg *config) join(gi int) {
cfg.joinm([]int{gi})
}
func (cfg *config) joinm(gis []int) {
m := make(map[int][]string, len(gis))
for _, g := range gis {
gid := cfg.groups[g].gid
servernames := make([]string, cfg.n)
for i := 0; i < cfg.n; i++ {
servernames[i] = cfg.servername(gid, i)
}
m[gid] = servernames
}
cfg.mck.Join(m)
}
// tell the shardctrler that a group is leaving.
func (cfg *config) leave(gi int) {
cfg.leavem([]int{gi})
}
func (cfg *config) leavem(gis []int) {
gids := make([]int, 0, len(gis))
for _, g := range gis {
gids = append(gids, cfg.groups[g].gid)
}
cfg.mck.Leave(gids)
}
var ncpu_once sync.Once
func make_config(t *testing.T, n int, unreliable bool, maxraftstate int) *config {
ncpu_once.Do(func() {
if runtime.NumCPU() < 2 {
fmt.Printf("warning: only one CPU, which may conceal locking bugs\n")
}
rand.Seed(makeSeed())
})
runtime.GOMAXPROCS(4)
cfg := &config{}
cfg.t = t
cfg.maxraftstate = maxraftstate
cfg.net = labrpc.MakeNetwork()
cfg.start = time.Now()
// controler
cfg.nctrlers = 3
cfg.ctrlerservers = make([]*shardctrler.ShardCtrler, cfg.nctrlers)
for i := 0; i < cfg.nctrlers; i++ {
cfg.StartCtrlerserver(i)
}
cfg.mck = cfg.shardclerk()
cfg.ngroups = 3
cfg.groups = make([]*group, cfg.ngroups)
cfg.n = n
for gi := 0; gi < cfg.ngroups; gi++ {
gg := &group{}
cfg.groups[gi] = gg
gg.gid = 100 + gi
gg.servers = make([]*ShardKV, cfg.n)
gg.saved = make([]*raft.Persister, cfg.n)
gg.endnames = make([][]string, cfg.n)
gg.mendnames = make([][]string, cfg.nctrlers)
for i := 0; i < cfg.n; i++ {
cfg.StartServer(gi, i)
}
}
cfg.clerks = make(map[*Clerk][]string)
cfg.nextClientId = cfg.n + 1000 // client ids start 1000 above the highest serverid
cfg.net.Reliable(!unreliable)
return cfg
}

101
src/shardkv/server.go Normal file
View File

@ -0,0 +1,101 @@
package shardkv
import "6.824/labrpc"
import "6.824/raft"
import "sync"
import "6.824/labgob"
type Op struct {
// Your definitions here.
// Field names must start with capital letters,
// otherwise RPC will break.
}
type ShardKV struct {
mu sync.Mutex
me int
rf *raft.Raft
applyCh chan raft.ApplyMsg
make_end func(string) *labrpc.ClientEnd
gid int
ctrlers []*labrpc.ClientEnd
maxraftstate int // snapshot if log grows this big
// Your definitions here.
}
func (kv *ShardKV) Get(args *GetArgs, reply *GetReply) {
// Your code here.
}
func (kv *ShardKV) PutAppend(args *PutAppendArgs, reply *PutAppendReply) {
// Your code here.
}
//
// the tester calls Kill() when a ShardKV instance won't
// be needed again. you are not required to do anything
// in Kill(), but it might be convenient to (for example)
// turn off debug output from this instance.
//
func (kv *ShardKV) Kill() {
kv.rf.Kill()
// Your code here, if desired.
}
//
// servers[] contains the ports of the servers in this group.
//
// me is the index of the current server in servers[].
//
// the k/v server should store snapshots through the underlying Raft
// implementation, which should call persister.SaveStateAndSnapshot() to
// atomically save the Raft state along with the snapshot.
//
// the k/v server should snapshot when Raft's saved state exceeds
// maxraftstate bytes, in order to allow Raft to garbage-collect its
// log. if maxraftstate is -1, you don't need to snapshot.
//
// gid is this group's GID, for interacting with the shardctrler.
//
// pass ctrlers[] to shardctrler.MakeClerk() so you can send
// RPCs to the shardctrler.
//
// make_end(servername) turns a server name from a
// Config.Groups[gid][i] into a labrpc.ClientEnd on which you can
// send RPCs. You'll need this to send RPCs to other groups.
//
// look at client.go for examples of how to use ctrlers[]
// and make_end() to send RPCs to the group owning a specific shard.
//
// StartServer() must return quickly, so it should start goroutines
// for any long-running work.
//
func StartServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister, maxraftstate int, gid int, ctrlers []*labrpc.ClientEnd, make_end func(string) *labrpc.ClientEnd) *ShardKV {
// call labgob.Register on structures you want
// Go's RPC library to marshall/unmarshall.
labgob.Register(Op{})
kv := new(ShardKV)
kv.me = me
kv.maxraftstate = maxraftstate
kv.make_end = make_end
kv.gid = gid
kv.ctrlers = ctrlers
// Your initialization code here.
// Use something like this to talk to the shardctrler:
// kv.mck = shardctrler.MakeClerk(kv.ctrlers)
kv.applyCh = make(chan raft.ApplyMsg)
kv.rf = raft.Make(servers, me, persister, kv.applyCh)
return kv
}

948
src/shardkv/test_test.go Normal file
View File

@ -0,0 +1,948 @@
package shardkv
import "6.824/porcupine"
import "6.824/models"
import "testing"
import "strconv"
import "time"
import "fmt"
import "sync/atomic"
import "sync"
import "math/rand"
import "io/ioutil"
const linearizabilityCheckTimeout = 1 * time.Second
func check(t *testing.T, ck *Clerk, key string, value string) {
v := ck.Get(key)
if v != value {
t.Fatalf("Get(%v): expected:\n%v\nreceived:\n%v", key, value, v)
}
}
//
// test static 2-way sharding, without shard movement.
//
func TestStaticShards(t *testing.T) {
fmt.Printf("Test: static shards ...\n")
cfg := make_config(t, 3, false, -1)
defer cfg.cleanup()
ck := cfg.makeClient()
cfg.join(0)
cfg.join(1)
n := 10
ka := make([]string, n)
va := make([]string, n)
for i := 0; i < n; i++ {
ka[i] = strconv.Itoa(i) // ensure multiple shards
va[i] = randstring(20)
ck.Put(ka[i], va[i])
}
for i := 0; i < n; i++ {
check(t, ck, ka[i], va[i])
}
// make sure that the data really is sharded by
// shutting down one shard and checking that some
// Get()s don't succeed.
cfg.ShutdownGroup(1)
cfg.checklogs() // forbid snapshots
ch := make(chan string)
for xi := 0; xi < n; xi++ {
ck1 := cfg.makeClient() // only one call allowed per client
go func(i int) {
v := ck1.Get(ka[i])
if v != va[i] {
ch <- fmt.Sprintf("Get(%v): expected:\n%v\nreceived:\n%v", ka[i], va[i], v)
} else {
ch <- ""
}
}(xi)
}
// wait a bit, only about half the Gets should succeed.
ndone := 0
done := false
for done == false {
select {
case err := <-ch:
if err != "" {
t.Fatal(err)
}
ndone += 1
case <-time.After(time.Second * 2):
done = true
break
}
}
if ndone != 5 {
t.Fatalf("expected 5 completions with one shard dead; got %v\n", ndone)
}
// bring the crashed shard/group back to life.
cfg.StartGroup(1)
for i := 0; i < n; i++ {
check(t, ck, ka[i], va[i])
}
fmt.Printf(" ... Passed\n")
}
func TestJoinLeave(t *testing.T) {
fmt.Printf("Test: join then leave ...\n")
cfg := make_config(t, 3, false, -1)
defer cfg.cleanup()
ck := cfg.makeClient()
cfg.join(0)
n := 10
ka := make([]string, n)
va := make([]string, n)
for i := 0; i < n; i++ {
ka[i] = strconv.Itoa(i) // ensure multiple shards
va[i] = randstring(5)
ck.Put(ka[i], va[i])
}
for i := 0; i < n; i++ {
check(t, ck, ka[i], va[i])
}
cfg.join(1)
for i := 0; i < n; i++ {
check(t, ck, ka[i], va[i])
x := randstring(5)
ck.Append(ka[i], x)
va[i] += x
}
cfg.leave(0)
for i := 0; i < n; i++ {
check(t, ck, ka[i], va[i])
x := randstring(5)
ck.Append(ka[i], x)
va[i] += x
}
// allow time for shards to transfer.
time.Sleep(1 * time.Second)
cfg.checklogs()
cfg.ShutdownGroup(0)
for i := 0; i < n; i++ {
check(t, ck, ka[i], va[i])
}
fmt.Printf(" ... Passed\n")
}
func TestSnapshot(t *testing.T) {
fmt.Printf("Test: snapshots, join, and leave ...\n")
cfg := make_config(t, 3, false, 1000)
defer cfg.cleanup()
ck := cfg.makeClient()
cfg.join(0)
n := 30
ka := make([]string, n)
va := make([]string, n)
for i := 0; i < n; i++ {
ka[i] = strconv.Itoa(i) // ensure multiple shards
va[i] = randstring(20)
ck.Put(ka[i], va[i])
}
for i := 0; i < n; i++ {
check(t, ck, ka[i], va[i])
}
cfg.join(1)
cfg.join(2)
cfg.leave(0)
for i := 0; i < n; i++ {
check(t, ck, ka[i], va[i])
x := randstring(20)
ck.Append(ka[i], x)
va[i] += x
}
cfg.leave(1)
cfg.join(0)
for i := 0; i < n; i++ {
check(t, ck, ka[i], va[i])
x := randstring(20)
ck.Append(ka[i], x)
va[i] += x
}
time.Sleep(1 * time.Second)
for i := 0; i < n; i++ {
check(t, ck, ka[i], va[i])
}
time.Sleep(1 * time.Second)
cfg.checklogs()
cfg.ShutdownGroup(0)
cfg.ShutdownGroup(1)
cfg.ShutdownGroup(2)
cfg.StartGroup(0)
cfg.StartGroup(1)
cfg.StartGroup(2)
for i := 0; i < n; i++ {
check(t, ck, ka[i], va[i])
}
fmt.Printf(" ... Passed\n")
}
func TestMissChange(t *testing.T) {
fmt.Printf("Test: servers miss configuration changes...\n")
cfg := make_config(t, 3, false, 1000)
defer cfg.cleanup()
ck := cfg.makeClient()
cfg.join(0)
n := 10
ka := make([]string, n)
va := make([]string, n)
for i := 0; i < n; i++ {
ka[i] = strconv.Itoa(i) // ensure multiple shards
va[i] = randstring(20)
ck.Put(ka[i], va[i])
}
for i := 0; i < n; i++ {
check(t, ck, ka[i], va[i])
}
cfg.join(1)
cfg.ShutdownServer(0, 0)
cfg.ShutdownServer(1, 0)
cfg.ShutdownServer(2, 0)
cfg.join(2)
cfg.leave(1)
cfg.leave(0)
for i := 0; i < n; i++ {
check(t, ck, ka[i], va[i])
x := randstring(20)
ck.Append(ka[i], x)
va[i] += x
}
cfg.join(1)
for i := 0; i < n; i++ {
check(t, ck, ka[i], va[i])
x := randstring(20)
ck.Append(ka[i], x)
va[i] += x
}
cfg.StartServer(0, 0)
cfg.StartServer(1, 0)
cfg.StartServer(2, 0)
for i := 0; i < n; i++ {
check(t, ck, ka[i], va[i])
x := randstring(20)
ck.Append(ka[i], x)
va[i] += x
}
time.Sleep(2 * time.Second)
cfg.ShutdownServer(0, 1)
cfg.ShutdownServer(1, 1)
cfg.ShutdownServer(2, 1)
cfg.join(0)
cfg.leave(2)
for i := 0; i < n; i++ {
check(t, ck, ka[i], va[i])
x := randstring(20)
ck.Append(ka[i], x)
va[i] += x
}
cfg.StartServer(0, 1)
cfg.StartServer(1, 1)
cfg.StartServer(2, 1)
for i := 0; i < n; i++ {
check(t, ck, ka[i], va[i])
}
fmt.Printf(" ... Passed\n")
}
func TestConcurrent1(t *testing.T) {
fmt.Printf("Test: concurrent puts and configuration changes...\n")
cfg := make_config(t, 3, false, 100)
defer cfg.cleanup()
ck := cfg.makeClient()
cfg.join(0)
n := 10
ka := make([]string, n)
va := make([]string, n)
for i := 0; i < n; i++ {
ka[i] = strconv.Itoa(i) // ensure multiple shards
va[i] = randstring(5)
ck.Put(ka[i], va[i])
}
var done int32
ch := make(chan bool)
ff := func(i int) {
defer func() { ch <- true }()
ck1 := cfg.makeClient()
for atomic.LoadInt32(&done) == 0 {
x := randstring(5)
ck1.Append(ka[i], x)
va[i] += x
time.Sleep(10 * time.Millisecond)
}
}
for i := 0; i < n; i++ {
go ff(i)
}
time.Sleep(150 * time.Millisecond)
cfg.join(1)
time.Sleep(500 * time.Millisecond)
cfg.join(2)
time.Sleep(500 * time.Millisecond)
cfg.leave(0)
cfg.ShutdownGroup(0)
time.Sleep(100 * time.Millisecond)
cfg.ShutdownGroup(1)
time.Sleep(100 * time.Millisecond)
cfg.ShutdownGroup(2)
cfg.leave(2)
time.Sleep(100 * time.Millisecond)
cfg.StartGroup(0)
cfg.StartGroup(1)
cfg.StartGroup(2)
time.Sleep(100 * time.Millisecond)
cfg.join(0)
cfg.leave(1)
time.Sleep(500 * time.Millisecond)
cfg.join(1)
time.Sleep(1 * time.Second)
atomic.StoreInt32(&done, 1)
for i := 0; i < n; i++ {
<-ch
}
for i := 0; i < n; i++ {
check(t, ck, ka[i], va[i])
}
fmt.Printf(" ... Passed\n")
}
//
// this tests the various sources from which a re-starting
// group might need to fetch shard contents.
//
func TestConcurrent2(t *testing.T) {
fmt.Printf("Test: more concurrent puts and configuration changes...\n")
cfg := make_config(t, 3, false, -1)
defer cfg.cleanup()
ck := cfg.makeClient()
cfg.join(1)
cfg.join(0)
cfg.join(2)
n := 10
ka := make([]string, n)
va := make([]string, n)
for i := 0; i < n; i++ {
ka[i] = strconv.Itoa(i) // ensure multiple shards
va[i] = randstring(1)
ck.Put(ka[i], va[i])
}
var done int32
ch := make(chan bool)
ff := func(i int, ck1 *Clerk) {
defer func() { ch <- true }()
for atomic.LoadInt32(&done) == 0 {
x := randstring(1)
ck1.Append(ka[i], x)
va[i] += x
time.Sleep(50 * time.Millisecond)
}
}
for i := 0; i < n; i++ {
ck1 := cfg.makeClient()
go ff(i, ck1)
}
cfg.leave(0)
cfg.leave(2)
time.Sleep(3000 * time.Millisecond)
cfg.join(0)
cfg.join(2)
cfg.leave(1)
time.Sleep(3000 * time.Millisecond)
cfg.join(1)
cfg.leave(0)
cfg.leave(2)
time.Sleep(3000 * time.Millisecond)
cfg.ShutdownGroup(1)
cfg.ShutdownGroup(2)
time.Sleep(1000 * time.Millisecond)
cfg.StartGroup(1)
cfg.StartGroup(2)
time.Sleep(2 * time.Second)
atomic.StoreInt32(&done, 1)
for i := 0; i < n; i++ {
<-ch
}
for i := 0; i < n; i++ {
check(t, ck, ka[i], va[i])
}
fmt.Printf(" ... Passed\n")
}
func TestConcurrent3(t *testing.T) {
fmt.Printf("Test: concurrent configuration change and restart...\n")
cfg := make_config(t, 3, false, 300)
defer cfg.cleanup()
ck := cfg.makeClient()
cfg.join(0)
n := 10
ka := make([]string, n)
va := make([]string, n)
for i := 0; i < n; i++ {
ka[i] = strconv.Itoa(i)
va[i] = randstring(1)
ck.Put(ka[i], va[i])
}
var done int32
ch := make(chan bool)
ff := func(i int, ck1 *Clerk) {
defer func() { ch <- true }()
for atomic.LoadInt32(&done) == 0 {
x := randstring(1)
ck1.Append(ka[i], x)
va[i] += x
}
}
for i := 0; i < n; i++ {
ck1 := cfg.makeClient()
go ff(i, ck1)
}
t0 := time.Now()
for time.Since(t0) < 12*time.Second {
cfg.join(2)
cfg.join(1)
time.Sleep(time.Duration(rand.Int()%900) * time.Millisecond)
cfg.ShutdownGroup(0)
cfg.ShutdownGroup(1)
cfg.ShutdownGroup(2)
cfg.StartGroup(0)
cfg.StartGroup(1)
cfg.StartGroup(2)
time.Sleep(time.Duration(rand.Int()%900) * time.Millisecond)
cfg.leave(1)
cfg.leave(2)
time.Sleep(time.Duration(rand.Int()%900) * time.Millisecond)
}
time.Sleep(2 * time.Second)
atomic.StoreInt32(&done, 1)
for i := 0; i < n; i++ {
<-ch
}
for i := 0; i < n; i++ {
check(t, ck, ka[i], va[i])
}
fmt.Printf(" ... Passed\n")
}
func TestUnreliable1(t *testing.T) {
fmt.Printf("Test: unreliable 1...\n")
cfg := make_config(t, 3, true, 100)
defer cfg.cleanup()
ck := cfg.makeClient()
cfg.join(0)
n := 10
ka := make([]string, n)
va := make([]string, n)
for i := 0; i < n; i++ {
ka[i] = strconv.Itoa(i) // ensure multiple shards
va[i] = randstring(5)
ck.Put(ka[i], va[i])
}
cfg.join(1)
cfg.join(2)
cfg.leave(0)
for ii := 0; ii < n*2; ii++ {
i := ii % n
check(t, ck, ka[i], va[i])
x := randstring(5)
ck.Append(ka[i], x)
va[i] += x
}
cfg.join(0)
cfg.leave(1)
for ii := 0; ii < n*2; ii++ {
i := ii % n
check(t, ck, ka[i], va[i])
}
fmt.Printf(" ... Passed\n")
}
func TestUnreliable2(t *testing.T) {
fmt.Printf("Test: unreliable 2...\n")
cfg := make_config(t, 3, true, 100)
defer cfg.cleanup()
ck := cfg.makeClient()
cfg.join(0)
n := 10
ka := make([]string, n)
va := make([]string, n)
for i := 0; i < n; i++ {
ka[i] = strconv.Itoa(i) // ensure multiple shards
va[i] = randstring(5)
ck.Put(ka[i], va[i])
}
var done int32
ch := make(chan bool)
ff := func(i int) {
defer func() { ch <- true }()
ck1 := cfg.makeClient()
for atomic.LoadInt32(&done) == 0 {
x := randstring(5)
ck1.Append(ka[i], x)
va[i] += x
}
}
for i := 0; i < n; i++ {
go ff(i)
}
time.Sleep(150 * time.Millisecond)
cfg.join(1)
time.Sleep(500 * time.Millisecond)
cfg.join(2)
time.Sleep(500 * time.Millisecond)
cfg.leave(0)
time.Sleep(500 * time.Millisecond)
cfg.leave(1)
time.Sleep(500 * time.Millisecond)
cfg.join(1)
cfg.join(0)
time.Sleep(2 * time.Second)
atomic.StoreInt32(&done, 1)
cfg.net.Reliable(true)
for i := 0; i < n; i++ {
<-ch
}
for i := 0; i < n; i++ {
check(t, ck, ka[i], va[i])
}
fmt.Printf(" ... Passed\n")
}
func TestUnreliable3(t *testing.T) {
fmt.Printf("Test: unreliable 3...\n")
cfg := make_config(t, 3, true, 100)
defer cfg.cleanup()
begin := time.Now()
var operations []porcupine.Operation
var opMu sync.Mutex
ck := cfg.makeClient()
cfg.join(0)
n := 10
ka := make([]string, n)
va := make([]string, n)
for i := 0; i < n; i++ {
ka[i] = strconv.Itoa(i) // ensure multiple shards
va[i] = randstring(5)
start := int64(time.Since(begin))
ck.Put(ka[i], va[i])
end := int64(time.Since(begin))
inp := models.KvInput{Op: 1, Key: ka[i], Value: va[i]}
var out models.KvOutput
op := porcupine.Operation{Input: inp, Call: start, Output: out, Return: end, ClientId: 0}
operations = append(operations, op)
}
var done int32
ch := make(chan bool)
ff := func(i int) {
defer func() { ch <- true }()
ck1 := cfg.makeClient()
for atomic.LoadInt32(&done) == 0 {
ki := rand.Int() % n
nv := randstring(5)
var inp models.KvInput
var out models.KvOutput
start := int64(time.Since(begin))
if (rand.Int() % 1000) < 500 {
ck1.Append(ka[ki], nv)
inp = models.KvInput{Op: 2, Key: ka[ki], Value: nv}
} else if (rand.Int() % 1000) < 100 {
ck1.Put(ka[ki], nv)
inp = models.KvInput{Op: 1, Key: ka[ki], Value: nv}
} else {
v := ck1.Get(ka[ki])
inp = models.KvInput{Op: 0, Key: ka[ki]}
out = models.KvOutput{Value: v}
}
end := int64(time.Since(begin))
op := porcupine.Operation{Input: inp, Call: start, Output: out, Return: end, ClientId: i}
opMu.Lock()
operations = append(operations, op)
opMu.Unlock()
}
}
for i := 0; i < n; i++ {
go ff(i)
}
time.Sleep(150 * time.Millisecond)
cfg.join(1)
time.Sleep(500 * time.Millisecond)
cfg.join(2)
time.Sleep(500 * time.Millisecond)
cfg.leave(0)
time.Sleep(500 * time.Millisecond)
cfg.leave(1)
time.Sleep(500 * time.Millisecond)
cfg.join(1)
cfg.join(0)
time.Sleep(2 * time.Second)
atomic.StoreInt32(&done, 1)
cfg.net.Reliable(true)
for i := 0; i < n; i++ {
<-ch
}
res, info := porcupine.CheckOperationsVerbose(models.KvModel, operations, linearizabilityCheckTimeout)
if res == porcupine.Illegal {
file, err := ioutil.TempFile("", "*.html")
if err != nil {
fmt.Printf("info: failed to create temp file for visualization")
} else {
err = porcupine.Visualize(models.KvModel, info, file)
if err != nil {
fmt.Printf("info: failed to write history visualization to %s\n", file.Name())
} else {
fmt.Printf("info: wrote history visualization to %s\n", file.Name())
}
}
t.Fatal("history is not linearizable")
} else if res == porcupine.Unknown {
fmt.Println("info: linearizability check timed out, assuming history is ok")
}
fmt.Printf(" ... Passed\n")
}
//
// optional test to see whether servers are deleting
// shards for which they are no longer responsible.
//
func TestChallenge1Delete(t *testing.T) {
fmt.Printf("Test: shard deletion (challenge 1) ...\n")
// "1" means force snapshot after every log entry.
cfg := make_config(t, 3, false, 1)
defer cfg.cleanup()
ck := cfg.makeClient()
cfg.join(0)
// 30,000 bytes of total values.
n := 30
ka := make([]string, n)
va := make([]string, n)
for i := 0; i < n; i++ {
ka[i] = strconv.Itoa(i)
va[i] = randstring(1000)
ck.Put(ka[i], va[i])
}
for i := 0; i < 3; i++ {
check(t, ck, ka[i], va[i])
}
for iters := 0; iters < 2; iters++ {
cfg.join(1)
cfg.leave(0)
cfg.join(2)
time.Sleep(3 * time.Second)
for i := 0; i < 3; i++ {
check(t, ck, ka[i], va[i])
}
cfg.leave(1)
cfg.join(0)
cfg.leave(2)
time.Sleep(3 * time.Second)
for i := 0; i < 3; i++ {
check(t, ck, ka[i], va[i])
}
}
cfg.join(1)
cfg.join(2)
time.Sleep(1 * time.Second)
for i := 0; i < 3; i++ {
check(t, ck, ka[i], va[i])
}
time.Sleep(1 * time.Second)
for i := 0; i < 3; i++ {
check(t, ck, ka[i], va[i])
}
time.Sleep(1 * time.Second)
for i := 0; i < 3; i++ {
check(t, ck, ka[i], va[i])
}
total := 0
for gi := 0; gi < cfg.ngroups; gi++ {
for i := 0; i < cfg.n; i++ {
raft := cfg.groups[gi].saved[i].RaftStateSize()
snap := len(cfg.groups[gi].saved[i].ReadSnapshot())
total += raft + snap
}
}
// 27 keys should be stored once.
// 3 keys should also be stored in client dup tables.
// everything on 3 replicas.
// plus slop.
expected := 3 * (((n - 3) * 1000) + 2*3*1000 + 6000)
if total > expected {
t.Fatalf("snapshot + persisted Raft state are too big: %v > %v\n", total, expected)
}
for i := 0; i < n; i++ {
check(t, ck, ka[i], va[i])
}
fmt.Printf(" ... Passed\n")
}
//
// optional test to see whether servers can handle
// shards that are not affected by a config change
// while the config change is underway
//
func TestChallenge2Unaffected(t *testing.T) {
fmt.Printf("Test: unaffected shard access (challenge 2) ...\n")
cfg := make_config(t, 3, true, 100)
defer cfg.cleanup()
ck := cfg.makeClient()
// JOIN 100
cfg.join(0)
// Do a bunch of puts to keys in all shards
n := 10
ka := make([]string, n)
va := make([]string, n)
for i := 0; i < n; i++ {
ka[i] = strconv.Itoa(i) // ensure multiple shards
va[i] = "100"
ck.Put(ka[i], va[i])
}
// JOIN 101
cfg.join(1)
// QUERY to find shards now owned by 101
c := cfg.mck.Query(-1)
owned := make(map[int]bool, n)
for s, gid := range c.Shards {
owned[s] = gid == cfg.groups[1].gid
}
// Wait for migration to new config to complete, and for clients to
// start using this updated config. Gets to any key k such that
// owned[shard(k)] == true should now be served by group 101.
<-time.After(1 * time.Second)
for i := 0; i < n; i++ {
if owned[i] {
va[i] = "101"
ck.Put(ka[i], va[i])
}
}
// KILL 100
cfg.ShutdownGroup(0)
// LEAVE 100
// 101 doesn't get a chance to migrate things previously owned by 100
cfg.leave(0)
// Wait to make sure clients see new config
<-time.After(1 * time.Second)
// And finally: check that gets/puts for 101-owned keys still complete
for i := 0; i < n; i++ {
shard := int(ka[i][0]) % 10
if owned[shard] {
check(t, ck, ka[i], va[i])
ck.Put(ka[i], va[i]+"-1")
check(t, ck, ka[i], va[i]+"-1")
}
}
fmt.Printf(" ... Passed\n")
}
//
// optional test to see whether servers can handle operations on shards that
// have been received as a part of a config migration when the entire migration
// has not yet completed.
//
func TestChallenge2Partial(t *testing.T) {
fmt.Printf("Test: partial migration shard access (challenge 2) ...\n")
cfg := make_config(t, 3, true, 100)
defer cfg.cleanup()
ck := cfg.makeClient()
// JOIN 100 + 101 + 102
cfg.joinm([]int{0, 1, 2})
// Give the implementation some time to reconfigure
<-time.After(1 * time.Second)
// Do a bunch of puts to keys in all shards
n := 10
ka := make([]string, n)
va := make([]string, n)
for i := 0; i < n; i++ {
ka[i] = strconv.Itoa(i) // ensure multiple shards
va[i] = "100"
ck.Put(ka[i], va[i])
}
// QUERY to find shards owned by 102
c := cfg.mck.Query(-1)
owned := make(map[int]bool, n)
for s, gid := range c.Shards {
owned[s] = gid == cfg.groups[2].gid
}
// KILL 100
cfg.ShutdownGroup(0)
// LEAVE 100 + 102
// 101 can get old shards from 102, but not from 100. 101 should start
// serving shards that used to belong to 102 as soon as possible
cfg.leavem([]int{0, 2})
// Give the implementation some time to start reconfiguration
// And to migrate 102 -> 101
<-time.After(1 * time.Second)
// And finally: check that gets/puts for 101-owned keys now complete
for i := 0; i < n; i++ {
shard := key2shard(ka[i])
if owned[shard] {
check(t, ck, ka[i], va[i])
ck.Put(ka[i], va[i]+"-2")
check(t, ck, ka[i], va[i]+"-2")
}
}
fmt.Printf(" ... Passed\n")
}