Skip to content

Commit

Permalink
refactor big key
Browse files Browse the repository at this point in the history
  • Loading branch information
HDT3213 committed Aug 26, 2023
1 parent 9f1f2c8 commit 887c2ca
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 66 deletions.
1 change: 0 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,4 @@ go 1.16

require (
github.com/bytedance/sonic v1.8.7 // indirect
github.com/emirpasic/gods v1.18.1
)
64 changes: 17 additions & 47 deletions helper/bigkey.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,61 +4,33 @@ import (
"encoding/csv"
"errors"
"fmt"
"github.com/emirpasic/gods/sets/treeset"
"github.com/hdt3213/rdb/bytefmt"
"github.com/hdt3213/rdb/core"
"github.com/hdt3213/rdb/model"
"os"
"sort"
"strconv"
)

type redisTreeSet struct {
set *treeset.Set
type topList struct {
list []model.RedisObject
capacity int
}

func (h *redisTreeSet) GetMin() model.RedisObject {
iter := h.set.Iterator()
iter.End()
if iter.Prev() {
raw := iter.Value()
return raw.(model.RedisObject)
}
return nil
}

// Append new object into tree set
// time complexity: O(n*log(m)), n is number of redis object, m is heap capacity. m if far less than n
func (h *redisTreeSet) Append(x model.RedisObject) {
if h.set.Size() < h.capacity {
h.set.Add(x)
return
}
// if heap is full && x.Size > minSize, then pop min
min := h.GetMin()
if min.GetSize() < x.GetSize() {
h.set.Remove(min)
h.set.Add(x)
}
}

func (h *redisTreeSet) Dump() []model.RedisObject {
result := make([]model.RedisObject, 0, h.set.Size())
iter := h.set.Iterator()
for iter.Next() {
result = append(result, iter.Value().(model.RedisObject))
func (tl *topList) add(x model.RedisObject) {
index := sort.Search(len(tl.list), func(i int) bool {
return tl.list[i].GetSize() <= x.GetSize()
})
tl.list = append(tl.list, x)
copy(tl.list[index+1:], tl.list[index:])
tl.list[index] = x
if len(tl.list) > tl.capacity {
tl.list = tl.list[:tl.capacity]
}
return result
}

func newRedisHeap(cap int) *redisTreeSet {
s := treeset.NewWith(func(a, b interface{}) int {
o1 := a.(model.RedisObject)
o2 := b.(model.RedisObject)
return o2.GetSize() - o1.GetSize() // desc order
})
return &redisTreeSet{
set: s,
func newRedisHeap(cap int) *topList {
return &topList{
capacity: cap,
}
}
Expand All @@ -83,9 +55,9 @@ func FindBiggestKeys(rdbFilename string, topN int, output *os.File, options ...i
if dec, err = wrapDecoder(dec, options...); err != nil {
return err
}
topList := newRedisHeap(topN)
top := newRedisHeap(topN)
err = dec.Parse(func(object model.RedisObject) bool {
topList.Append(object)
top.add(object)
return true
})
if err != nil {
Expand All @@ -97,9 +69,7 @@ func FindBiggestKeys(rdbFilename string, topN int, output *os.File, options ...i
}
csvWriter := csv.NewWriter(output)
defer csvWriter.Flush()
iter := topList.set.Iterator()
for iter.Next() {
object := iter.Value().(model.RedisObject)
for _, object := range top.list {
err = csvWriter.Write([]string{
strconv.Itoa(object.GetDBIndex()),
object.GetKey(),
Expand Down
29 changes: 11 additions & 18 deletions helper/bigkey_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,12 @@ import (
"testing"
)

func TestRedisHeap_Append(t *testing.T) {
sizeMap := make(map[int]struct{}) // The behavior when encountering objects of the same size is undefined
func TestTopList(t *testing.T) {
topN := 100
n := topN * 10
objects := make([]model.RedisObject, 0)
for i := 0; i < n; i++ {
var size int
for {
size = rand.Intn(n * 10)
if _, ok := sizeMap[size]; !ok {
sizeMap[size] = struct{}{}
break
}
}
size := rand.Intn(n * 10)
o := &model.StringObject{
BaseObject: &model.BaseObject{
Key: strconv.Itoa(i),
Expand All @@ -34,18 +26,19 @@ func TestRedisHeap_Append(t *testing.T) {
}
topList := newRedisHeap(topN)
for _, o := range objects {
topList.Append(o)
topList.add(o)
}
actual := topList.Dump()
sort.Slice(objects, func(i, j int) bool {
return objects[i].GetSize() > objects[j].GetSize()
})
expect := objects[0:topN]
for i := 0; i < topN; i++ {
o1 := actual[i]
o2 := expect[i]
if o1.GetSize() != o2.GetSize() {
t.Errorf("wrong answer at index: %d", i)
if len(topList.list) != topN {
t.Error("wrong top list size")
}
for i, actual := range topList.list {
expect := objects[i]
if actual.GetSize() != expect.GetSize() {
t.Error("wrong top list")
return
}
}
}
Expand Down

0 comments on commit 887c2ca

Please sign in to comment.