diff --git a/go.mod b/go.mod index 8a20d0b..3a0a20c 100644 --- a/go.mod +++ b/go.mod @@ -4,5 +4,4 @@ go 1.16 require ( github.com/bytedance/sonic v1.8.7 // indirect - github.com/emirpasic/gods v1.18.1 ) diff --git a/helper/bigkey.go b/helper/bigkey.go index 350cdf7..027420e 100644 --- a/helper/bigkey.go +++ b/helper/bigkey.go @@ -4,61 +4,33 @@ import ( "encoding/csv" "errors" "fmt" - "github.com/emirpasic/gods/sets/treeset" "github.com/hdt3213/rdb/bytefmt" "github.com/hdt3213/rdb/core" "github.com/hdt3213/rdb/model" "os" + "sort" "strconv" ) -type redisTreeSet struct { - set *treeset.Set +type topList struct { + list []model.RedisObject capacity int } -func (h *redisTreeSet) GetMin() model.RedisObject { - iter := h.set.Iterator() - iter.End() - if iter.Prev() { - raw := iter.Value() - return raw.(model.RedisObject) - } - return nil -} - -// Append new object into tree set -// time complexity: O(n*log(m)), n is number of redis object, m is heap capacity. m if far less than n -func (h *redisTreeSet) Append(x model.RedisObject) { - if h.set.Size() < h.capacity { - h.set.Add(x) - return - } - // if heap is full && x.Size > minSize, then pop min - min := h.GetMin() - if min.GetSize() < x.GetSize() { - h.set.Remove(min) - h.set.Add(x) - } -} - -func (h *redisTreeSet) Dump() []model.RedisObject { - result := make([]model.RedisObject, 0, h.set.Size()) - iter := h.set.Iterator() - for iter.Next() { - result = append(result, iter.Value().(model.RedisObject)) +func (tl *topList) add(x model.RedisObject) { + index := sort.Search(len(tl.list), func(i int) bool { + return tl.list[i].GetSize() <= x.GetSize() + }) + tl.list = append(tl.list, x) + copy(tl.list[index+1:], tl.list[index:]) + tl.list[index] = x + if len(tl.list) > tl.capacity { + tl.list = tl.list[:tl.capacity] } - return result } -func newRedisHeap(cap int) *redisTreeSet { - s := treeset.NewWith(func(a, b interface{}) int { - o1 := a.(model.RedisObject) - o2 := b.(model.RedisObject) - return o2.GetSize() - o1.GetSize() // desc order - }) - return &redisTreeSet{ - set: s, +func newRedisHeap(cap int) *topList { + return &topList{ capacity: cap, } } @@ -83,9 +55,9 @@ func FindBiggestKeys(rdbFilename string, topN int, output *os.File, options ...i if dec, err = wrapDecoder(dec, options...); err != nil { return err } - topList := newRedisHeap(topN) + top := newRedisHeap(topN) err = dec.Parse(func(object model.RedisObject) bool { - topList.Append(object) + top.add(object) return true }) if err != nil { @@ -97,9 +69,7 @@ func FindBiggestKeys(rdbFilename string, topN int, output *os.File, options ...i } csvWriter := csv.NewWriter(output) defer csvWriter.Flush() - iter := topList.set.Iterator() - for iter.Next() { - object := iter.Value().(model.RedisObject) + for _, object := range top.list { err = csvWriter.Write([]string{ strconv.Itoa(object.GetDBIndex()), object.GetKey(), diff --git a/helper/bigkey_test.go b/helper/bigkey_test.go index 3f8858d..662b6a5 100644 --- a/helper/bigkey_test.go +++ b/helper/bigkey_test.go @@ -10,20 +10,12 @@ import ( "testing" ) -func TestRedisHeap_Append(t *testing.T) { - sizeMap := make(map[int]struct{}) // The behavior when encountering objects of the same size is undefined +func TestTopList(t *testing.T) { topN := 100 n := topN * 10 objects := make([]model.RedisObject, 0) for i := 0; i < n; i++ { - var size int - for { - size = rand.Intn(n * 10) - if _, ok := sizeMap[size]; !ok { - sizeMap[size] = struct{}{} - break - } - } + size := rand.Intn(n * 10) o := &model.StringObject{ BaseObject: &model.BaseObject{ Key: strconv.Itoa(i), @@ -34,18 +26,19 @@ func TestRedisHeap_Append(t *testing.T) { } topList := newRedisHeap(topN) for _, o := range objects { - topList.Append(o) + topList.add(o) } - actual := topList.Dump() sort.Slice(objects, func(i, j int) bool { return objects[i].GetSize() > objects[j].GetSize() }) - expect := objects[0:topN] - for i := 0; i < topN; i++ { - o1 := actual[i] - o2 := expect[i] - if o1.GetSize() != o2.GetSize() { - t.Errorf("wrong answer at index: %d", i) + if len(topList.list) != topN { + t.Error("wrong top list size") + } + for i, actual := range topList.list { + expect := objects[i] + if actual.GetSize() != expect.GetSize() { + t.Error("wrong top list") + return } } }