-
Notifications
You must be signed in to change notification settings - Fork 377
/
sketch.go
148 lines (131 loc) · 3.28 KB
/
sketch.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
/*
* Copyright 2019 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ristretto
import (
"fmt"
"math/rand"
"time"
)
// cmSketch is a Count-Min sketch implementation with 4-bit counters, heavily
// based on Damian Gryski's CM4 [1].
//
// [1]: https://github.com/dgryski/go-tinylfu/blob/master/cm4.go
type cmSketch struct {
rows [cmDepth]cmRow
seed [cmDepth]uint64
mask uint64
}
const (
// cmDepth is the number of counter copies to store (think of it as rows).
cmDepth = 4
)
func newCmSketch(numCounters int64) *cmSketch {
if numCounters == 0 {
panic("cmSketch: bad numCounters")
}
// Get the next power of 2 for better cache performance.
numCounters = next2Power(numCounters)
sketch := &cmSketch{mask: uint64(numCounters - 1)}
// Initialize rows of counters and seeds.
// Cryptographic precision not needed
source := rand.New(rand.NewSource(time.Now().UnixNano())) //nolint:gosec
for i := 0; i < cmDepth; i++ {
sketch.seed[i] = source.Uint64()
sketch.rows[i] = newCmRow(numCounters)
}
return sketch
}
// Increment increments the count(ers) for the specified key.
func (s *cmSketch) Increment(hashed uint64) {
for i := range s.rows {
s.rows[i].increment((hashed ^ s.seed[i]) & s.mask)
}
}
// Estimate returns the value of the specified key.
func (s *cmSketch) Estimate(hashed uint64) int64 {
min := byte(255)
for i := range s.rows {
val := s.rows[i].get((hashed ^ s.seed[i]) & s.mask)
if val < min {
min = val
}
}
return int64(min)
}
// Reset halves all counter values.
func (s *cmSketch) Reset() {
for _, r := range s.rows {
r.reset()
}
}
// Clear zeroes all counters.
func (s *cmSketch) Clear() {
for _, r := range s.rows {
r.clear()
}
}
// cmRow is a row of bytes, with each byte holding two counters.
type cmRow []byte
func newCmRow(numCounters int64) cmRow {
return make(cmRow, numCounters/2)
}
func (r cmRow) get(n uint64) byte {
return (r[n/2] >> ((n & 1) * 4)) & 0x0f
}
func (r cmRow) increment(n uint64) {
// Index of the counter.
i := n / 2
// Shift distance (even 0, odd 4).
s := (n & 1) * 4
// Counter value.
v := (r[i] >> s) & 0x0f
// Only increment if not max value (overflow wrap is bad for LFU).
if v < 15 {
r[i] += 1 << s
}
}
func (r cmRow) reset() {
// Halve each counter.
for i := range r {
r[i] = (r[i] >> 1) & 0x77
}
}
func (r cmRow) clear() {
// Zero each counter.
for i := range r {
r[i] = 0
}
}
func (r cmRow) string() string {
s := ""
for i := uint64(0); i < uint64(len(r)*2); i++ {
s += fmt.Sprintf("%02d ", (r[(i/2)]>>((i&1)*4))&0x0f)
}
s = s[:len(s)-1]
return s
}
// next2Power rounds x up to the next power of 2, if it's not already one.
func next2Power(x int64) int64 {
x--
x |= x >> 1
x |= x >> 2
x |= x >> 4
x |= x >> 8
x |= x >> 16
x |= x >> 32
x++
return x
}