What is the best way to implement global counters for a highly concurrent application? In my case I may have 10K-20K go routines performing \"work\", and I want to count th
see by yourself and let me know what you think.
src/test/helpers/helpers.go
package helpers
type CounterIncrementStruct struct {
bucket string
value int
}
type CounterQueryStruct struct {
bucket string
channel chan int
}
var counter map[string]int
var counterIncrementChan chan CounterIncrementStruct
var counterQueryChan chan CounterQueryStruct
var counterListChan chan chan map[string]int
func CounterInitialize() {
counter = make(map[string]int)
counterIncrementChan = make(chan CounterIncrementStruct, 0)
counterQueryChan = make(chan CounterQueryStruct, 100)
counterListChan = make(chan chan map[string]int, 100)
go goCounterWriter()
}
func goCounterWriter() {
for {
select {
case ci := <-counterIncrementChan:
if len(ci.bucket) == 0 {
return
}
counter[ci.bucket] += ci.value
break
case cq := <-counterQueryChan:
val, found := counter[cq.bucket]
if found {
cq.channel <- val
} else {
cq.channel <- -1
}
break
case cl := <-counterListChan:
nm := make(map[string]int)
for k, v := range counter {
nm[k] = v
}
cl <- nm
break
}
}
}
func CounterIncrement(bucket string, counter int) {
if len(bucket) == 0 || counter == 0 {
return
}
counterIncrementChan <- CounterIncrementStruct{bucket, counter}
}
func CounterQuery(bucket string) int {
if len(bucket) == 0 {
return -1
}
reply := make(chan int)
counterQueryChan <- CounterQueryStruct{bucket, reply}
return <-reply
}
func CounterList() map[string]int {
reply := make(chan map[string]int)
counterListChan <- reply
return <-reply
}
src/test/distributed/distributed.go
package distributed
type Counter struct {
buckets map[string]int
incrQ chan incrQ
readQ chan readQ
sumQ chan chan int
}
func New() Counter {
c := Counter{
buckets: make(map[string]int, 100),
incrQ: make(chan incrQ, 1000),
readQ: make(chan readQ, 0),
sumQ: make(chan chan int, 0),
}
go c.run()
return c
}
func (c Counter) run() {
for {
select {
case a := <-c.readQ:
a.res <- c.buckets[a.bucket]
case a := <-c.sumQ:
var sum int
for _, cnt := range c.buckets {
sum += cnt
}
a <- sum
case a := <-c.incrQ:
c.buckets[a.bucket] += a.count
}
}
}
func (c Counter) Get(bucket string) int {
res := make(chan int)
c.readQ <- readQ{bucket: bucket, res: res}
return <-res
}
func (c Counter) Sum() int {
res := make(chan int)
c.sumQ <- res
return <-res
}
type readQ struct {
bucket string
res chan int
}
type incrQ struct {
bucket string
count int
}
func (c Counter) Agent(bucket string, limit int) *Agent {
a := &Agent{
bucket: bucket,
limit: limit,
sendIncr: c.incrQ,
}
return a
}
type Agent struct {
bucket string
limit int
count int
sendIncr chan incrQ
}
func (a *Agent) Incr(n int) {
a.count += n
if a.count > a.limit {
select {
case a.sendIncr <- incrQ{bucket: a.bucket, count: a.count}:
a.count = 0
default:
}
}
}
func (a *Agent) Done() {
a.sendIncr <- incrQ{bucket: a.bucket, count: a.count}
a.count = 0
}
src/test/helpers_test.go
package counters
import (
"sync"
"testing"
)
var mux sync.Mutex
var m map[string]int
func bmIncrement(bucket string, value int) {
mux.Lock()
m[bucket] += value
mux.Unlock()
}
func BenchmarkMutex(b *testing.B) {
b.StopTimer()
m = make(map[string]int)
buckets := []string{
"abc123",
"def456",
"ghi789",
}
b.StartTimer()
var wg sync.WaitGroup
wg.Add(b.N)
for i := 0; i < b.N; i++ {
go func() {
for _, b := range buckets {
bmIncrement(b, 5)
}
for _, b := range buckets {
bmIncrement(b, 5)
}
wg.Done()
}()
}
wg.Wait()
}
src/test/distributed_test.go
package counters
import (
"sync"
"test/counters/distributed"
"testing"
)
func BenchmarkDistributed(b *testing.B) {
b.StopTimer()
counter := distributed.New()
agents := []*distributed.Agent{
counter.Agent("abc123", 100),
counter.Agent("def456", 100),
counter.Agent("ghi789", 100),
}
b.StartTimer()
var wg sync.WaitGroup
wg.Add(b.N)
for i := 0; i < b.N; i++ {
go func() {
for _, a := range agents {
a.Incr(5)
}
for _, a := range agents {
a.Incr(5)
}
wg.Done()
}()
}
for _, a := range agents {
a.Done()
}
wg.Wait()
}
results
$ go test --bench=. --count 10 -benchmem
goos: linux
goarch: amd64
pkg: test/counters
BenchmarkDistributed-4 3356620 351 ns/op 24 B/op 0 allocs/op
BenchmarkDistributed-4 3414073 368 ns/op 11 B/op 0 allocs/op
BenchmarkDistributed-4 3371878 374 ns/op 7 B/op 0 allocs/op
BenchmarkDistributed-4 3240631 387 ns/op 3 B/op 0 allocs/op
BenchmarkDistributed-4 3169230 389 ns/op 2 B/op 0 allocs/op
BenchmarkDistributed-4 3177606 386 ns/op 0 B/op 0 allocs/op
BenchmarkDistributed-4 3064552 390 ns/op 0 B/op 0 allocs/op
BenchmarkDistributed-4 3065877 409 ns/op 2 B/op 0 allocs/op
BenchmarkDistributed-4 2924686 400 ns/op 1 B/op 0 allocs/op
BenchmarkDistributed-4 3049873 389 ns/op 0 B/op 0 allocs/op
BenchmarkMutex-4 1000000 1106 ns/op 17 B/op 0 allocs/op
BenchmarkMutex-4 948331 1246 ns/op 9 B/op 0 allocs/op
BenchmarkMutex-4 1000000 1244 ns/op 12 B/op 0 allocs/op
BenchmarkMutex-4 1000000 1246 ns/op 11 B/op 0 allocs/op
BenchmarkMutex-4 1000000 1228 ns/op 1 B/op 0 allocs/op
BenchmarkMutex-4 1000000 1235 ns/op 2 B/op 0 allocs/op
BenchmarkMutex-4 1000000 1244 ns/op 1 B/op 0 allocs/op
BenchmarkMutex-4 1000000 1214 ns/op 0 B/op 0 allocs/op
BenchmarkMutex-4 956024 1233 ns/op 0 B/op 0 allocs/op
BenchmarkMutex-4 1000000 1213 ns/op 0 B/op 0 allocs/op
PASS
ok test/counters 37.461s
If you change the limit value to 1000, the code gets much faster, instantly without worries
$ go test --bench=. --count 10 -benchmem
goos: linux
goarch: amd64
pkg: test/counters
BenchmarkDistributed-4 5463523 221 ns/op 0 B/op 0 allocs/op
BenchmarkDistributed-4 5455981 220 ns/op 0 B/op 0 allocs/op
BenchmarkDistributed-4 5591240 213 ns/op 0 B/op 0 allocs/op
BenchmarkDistributed-4 5277915 212 ns/op 0 B/op 0 allocs/op
BenchmarkDistributed-4 5430421 213 ns/op 0 B/op 0 allocs/op
BenchmarkDistributed-4 5374153 226 ns/op 0 B/op 0 allocs/op
BenchmarkDistributed-4 5656743 219 ns/op 0 B/op 0 allocs/op
BenchmarkDistributed-4 5337343 211 ns/op 0 B/op 0 allocs/op
BenchmarkDistributed-4 5353845 217 ns/op 0 B/op 0 allocs/op
BenchmarkDistributed-4 5416137 217 ns/op 0 B/op 0 allocs/op
BenchmarkMutex-4 1000000 1002 ns/op 135 B/op 0 allocs/op
BenchmarkMutex-4 1253211 1141 ns/op 58 B/op 0 allocs/op
BenchmarkMutex-4 1000000 1261 ns/op 3 B/op 0 allocs/op
BenchmarkMutex-4 987345 1678 ns/op 59 B/op 0 allocs/op
BenchmarkMutex-4 925371 1247 ns/op 0 B/op 0 allocs/op
BenchmarkMutex-4 1000000 1259 ns/op 2 B/op 0 allocs/op
BenchmarkMutex-4 978800 1248 ns/op 0 B/op 0 allocs/op
BenchmarkMutex-4 982144 1213 ns/op 0 B/op 0 allocs/op
BenchmarkMutex-4 975681 1254 ns/op 0 B/op 0 allocs/op
BenchmarkMutex-4 994789 1205 ns/op 0 B/op 0 allocs/op
PASS
ok test/counters 34.314s
Changing Counter.incrQ length will also greatly affect performance, though it is more memory.