fleet/server/service/jitter.go

85 lines
3.1 KiB
Go
Raw Normal View History

package service
import (
"sync"
"time"
)
// jitterHashTable implements a data structure that allows a fleet to generate a static jitter value
// that is properly balanced. Balance in this context means that hosts would be distributed uniformly
// across the total jitter time so there are no spikes.
// The way this structure works is as follows:
// Given an amount of buckets, we want to place hosts in buckets evenly. So we don't want bucket 0 to
// have 1000 hosts, and all the other buckets 0. If there were 1000 buckets, and 1000 hosts, we should
// end up with 1 per bucket.
// The total amount of online hosts is unknown, so first it assumes that amount of buckets >= amount
// of total hosts (maxCapacity of 1 per bucket). Once we have more hosts than buckets, then we
// increase the maxCapacity by 1 for all buckets, and start placing hosts.
// Hosts that have been placed in a bucket remain in that bucket for as long as the fleet instance is
// running.
// The preferred bucket for a host is the one at (host id % bucketCount). If that bucket is full, the
// next one will be tried. If all buckets are full, then capacity gets increased and the bucket
// selection process restarts.
// Once a bucket is found, the index for the bucket (going from 0 to bucketCount) will be the amount of
// minutes added to the host check in time.
// For example: at a 1hr interval, and the default 10% max jitter percent. That allows hosts to
// distribute within 6 minutes around the hour mark. We would have 6 buckets in that case.
// In the worst possible case that all hosts start at the same time, max jitter percent can be set to
// 100, and this method will distribute hosts evenly.
// The main caveat of this approach is that it works at the fleet instance. So depending on what
// instance gets chosen by the load balancer, the jitter might be different. However, load tests have
// shown that the distribution in practice is pretty balance even when all hosts try to check in at
// the same time.
type jitterHashTable struct {
mu sync.Mutex
maxCapacity int
bucketCount int
buckets map[int]int
cache map[uint]time.Duration
}
func newJitterHashTable(bucketCount int) *jitterHashTable {
if bucketCount == 0 {
bucketCount = 1
}
return &jitterHashTable{
maxCapacity: 1,
bucketCount: bucketCount,
buckets: make(map[int]int),
cache: make(map[uint]time.Duration),
}
}
func (jh *jitterHashTable) jitterForHost(hostID uint) time.Duration {
// if no jitter is configured just return 0
if jh.bucketCount <= 1 {
return 0
}
jh.mu.Lock()
if jitter, ok := jh.cache[hostID]; ok {
jh.mu.Unlock()
return jitter
}
for i := 0; i < jh.bucketCount; i++ {
possibleBucket := (int(hostID) + i) % jh.bucketCount
// if the next bucket has capacity, great!
if jh.buckets[possibleBucket] < jh.maxCapacity {
jh.buckets[possibleBucket]++
jitter := time.Duration(possibleBucket) * time.Minute
jh.cache[hostID] = jitter
jh.mu.Unlock()
return jitter
}
}
// otherwise, bump the capacity and restart the process
jh.maxCapacity++
jh.mu.Unlock()
return jh.jitterForHost(hostID)
}