From 09004a9c261e5a625631d2fc11598527dc164dc0 Mon Sep 17 00:00:00 2001 From: Michelle Shepardson Date: Mon, 8 Jul 2024 19:54:29 +0000 Subject: [PATCH] Fix race on timer in util.queue. We're hitting an issue occasionally where Tabulator will stall out, which seems to be caused by the queue getting stuck during 'sleep()', likely because the timer channel has already been pulled from in the second select case, and the wait on receiving a value from the timer channel after timer.Stop() fails will block indefinitely. Instead, ensure that timer.Stop() is definitely called, but don't bother to drain or close the channel. (Note that timer.Stop() doesn't stop the channel, but it should be cleaned up due to being out of scope once sleep() completes). --- util/queue/queue.go | 4 +-- util/queue/queue_test.go | 57 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 3 deletions(-) diff --git a/util/queue/queue.go b/util/queue/queue.go index 33d7758eb..dcdd84dad 100644 --- a/util/queue/queue.go +++ b/util/queue/queue.go @@ -214,12 +214,10 @@ func (q *Queue) sleep(d time.Duration) { log.Debug("Sleeping...") } sleep := time.NewTimer(d) + defer sleep.Stop() start := time.Now() select { case <-q.signal: - if !sleep.Stop() { - <-sleep.C - } dur := time.Now().Sub(start) log := log.WithField("after", dur.Round(time.Millisecond)) switch { diff --git a/util/queue/queue_test.go b/util/queue/queue_test.go index 7da929828..ecf4a014b 100644 --- a/util/queue/queue_test.go +++ b/util/queue/queue_test.go @@ -622,3 +622,60 @@ func TestPriorityQueue(t *testing.T) { }) } } + +func TestSleep(t *testing.T) { + cases := []struct { + name string + sleep time.Duration + rouseAfter time.Duration // if specified, rouse after this time + wantRouse bool + }{ + { + name: "basic", + sleep: 100 * time.Millisecond, + wantRouse: false, + }, + { + name: "rouse during sleep", + sleep: 1 * time.Minute, + rouseAfter: 100 * time.Millisecond, + wantRouse: true, + }, + { + name: "rouse after sleep", + sleep: 100 * time.Millisecond, + rouseAfter: 1 * time.Second, + wantRouse: false, + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + var q Queue + q.Init(logrus.WithField("name", tc.name), []string{"hi", "there"}, time.Now()) + + var slept, roused bool + var lock sync.Mutex + go func(rouseAfter time.Duration) { + if rouseAfter == 0 { + return + } + time.Sleep(rouseAfter) + q.rouse() + lock.Lock() + if !slept { + roused = true + } + lock.Unlock() + }(tc.rouseAfter) + + q.sleep(tc.sleep) + lock.Lock() + slept = true + lock.Unlock() + + if tc.wantRouse != roused { + t.Errorf("sleep() roused incorrectly (with sleep %q and rouse after %q): want rouse = %t, got %t", tc.sleep, tc.rouseAfter, tc.wantRouse, roused) + } + }) + } +}