fix(blockbuilder): min job size (#15617)

Co-authored-by: Ashwanth <[email protected]>
grafana · Jan 7, 2025 · c43aa56 · c43aa56
1 parent 55f2dd8
commit c43aa56
Show file tree

Hide file tree

Showing 5 changed files with 77 additions and 20 deletions.
diff --git a/docs/sources/shared/configuration.md b/docs/sources/shared/configuration.md
@@ -216,7 +216,7 @@ block_scheduler:
 
   # How often the scheduler should plan jobs.
   # CLI flag: -block-scheduler.interval
-  [interval: <duration> | default = 5m]
+  [interval: <duration> | default = 15m]
 
   # Lookback period used by the scheduler to plan jobs when the consumer group
   # has no commits. 0 consumes from the start of the partition.

diff --git a/pkg/blockbuilder/builder/tsdb.go b/pkg/blockbuilder/builder/tsdb.go
@@ -320,7 +320,7 @@ func (u *uploader) Put(ctx context.Context, db tsdbWithID) error {
 		return err
 	}
 
-	return client.PutObject(ctx, db.id.Path(), buf)
+	return client.PutObject(ctx, buildFileName(db.id.Path()), buf)
 }
 
 func buildFileName(indexName string) string {

diff --git a/pkg/blockbuilder/scheduler/scheduler.go b/pkg/blockbuilder/scheduler/scheduler.go
@@ -35,7 +35,7 @@ type Config struct {
 }
 
 func (cfg *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
-	f.DurationVar(&cfg.Interval, prefix+"interval", 5*time.Minute, "How often the scheduler should plan jobs.")
+	f.DurationVar(&cfg.Interval, prefix+"interval", 15*time.Minute, "How often the scheduler should plan jobs.")
 	f.StringVar(&cfg.ConsumerGroup, prefix+"consumer-group", "block-scheduler", "Consumer group used by block scheduler to track the last consumed offset.")
 	f.DurationVar(&cfg.LookbackPeriod, prefix+"lookback-period", 0, "Lookback period used by the scheduler to plan jobs when the consumer group has no commits. 0 consumes from the start of the partition.")
 	f.StringVar(
@@ -149,15 +149,27 @@ func (s *BlockScheduler) runOnce(ctx context.Context) error {
 
 	s.publishLagMetrics(lag)
 
-	jobs, err := s.planner.Plan(ctx, 1) // TODO(owen-d): parallelize work within a partition
+	// TODO(owen-d): parallelize work within a partition
+	// TODO(owen-d): skip small jobs unless they're stale,
+	//  e.g. a partition which is no longer being written to shouldn't be orphaned
+	jobs, err := s.planner.Plan(ctx, 1, 0)
 	if err != nil {
 		level.Error(s.logger).Log("msg", "failed to plan jobs", "err", err)
 	}
+	level.Info(s.logger).Log("msg", "planned jobs", "count", len(jobs))
 
 	for _, job := range jobs {
 		// TODO: end offset keeps moving each time we plan jobs, maybe we should not use it as part of the job ID
 
 		added, status, err := s.idempotentEnqueue(job)
+		level.Info(s.logger).Log(
+			"msg", "enqueued job",
+			"added", added,
+			"status", status.String(),
+			"err", err,
+			"partition", job.Job.Partition(),
+			"num_offsets", job.Offsets().Max-job.Offsets().Min,
+		)
 
 		// if we've either added or encountered an error, move on; we're done this cycle
 		if added || err != nil {
@@ -253,7 +265,9 @@ func (s *BlockScheduler) HandleCompleteJob(ctx context.Context, job *types.Job,
 
 			// TODO(owen-d): cleaner way to enqueue next job for this partition,
 			// don't make it part of the response cycle to job completion, etc.
-			jobs, err := s.planner.Plan(ctx, 1)
+			// NB(owen-d): only immediately enqueue another job for this partition if]
+			// the job is full. Otherwise, we'd repeatedly enqueue tiny jobs with a few records.
+			jobs, err := s.planner.Plan(ctx, 1, int(s.cfg.TargetRecordCount))
 			if err != nil {
 				level.Error(logger).Log("msg", "failed to plan subsequent jobs", "err", err)
 			}

diff --git a/pkg/blockbuilder/scheduler/strategy.go b/pkg/blockbuilder/scheduler/strategy.go
@@ -19,7 +19,7 @@ type OffsetReader interface {
 
 type Planner interface {
 	Name() string
-	Plan(ctx context.Context, maxJobsPerPartition int) ([]*JobWithMetadata, error)
+	Plan(ctx context.Context, maxJobsPerPartition int, minOffsetsPerJob int) ([]*JobWithMetadata, error)
 }
 
 const (
@@ -51,7 +51,8 @@ func (p *RecordCountPlanner) Name() string {
 	return RecordCountStrategy
 }
 
-func (p *RecordCountPlanner) Plan(ctx context.Context, maxJobsPerPartition int) ([]*JobWithMetadata, error) {
+func (p *RecordCountPlanner) Plan(ctx context.Context, maxJobsPerPartition int, minOffsetsPerJob int) ([]*JobWithMetadata, error) {
+	level.Info(p.logger).Log("msg", "planning jobs", "max_jobs_per_partition", maxJobsPerPartition, "target_record_count", p.targetRecordCount, "lookback_period", p.lookbackPeriod.String())
 	offsets, err := p.offsetReader.GroupLag(ctx, p.lookbackPeriod)
 	if err != nil {
 		level.Error(p.logger).Log("msg", "failed to get group lag", "err", err)
@@ -80,6 +81,12 @@ func (p *RecordCountPlanner) Plan(ctx context.Context, maxJobsPerPartition int)
 			}
 
 			currentEnd := min(currentStart+p.targetRecordCount, endOffset)
+
+			// Skip creating job if it's smaller than minimum size
+			if currentEnd-currentStart < int64(minOffsetsPerJob) {
+				break
+			}
+
 			job := NewJobWithMetadata(
 				types.NewJob(partitionOffset.Partition, types.Offsets{
 					Min: currentStart,

diff --git a/pkg/blockbuilder/scheduler/strategy_test.go b/pkg/blockbuilder/scheduler/strategy_test.go
@@ -30,14 +30,16 @@ func compareJobs(t *testing.T, expected, actual *JobWithMetadata) {
 
 func TestRecordCountPlanner_Plan(t *testing.T) {
 	for _, tc := range []struct {
-		name         string
-		recordCount  int64
-		expectedJobs []*JobWithMetadata
-		groupLag     map[int32]kadm.GroupMemberLag
+		name             string
+		recordCount      int64
+		minOffsetsPerJob int
+		expectedJobs     []*JobWithMetadata
+		groupLag         map[int32]kadm.GroupMemberLag
 	}{
 		{
-			name:        "single partition, single job",
-			recordCount: 100,
+			name:             "single partition, single job",
+			recordCount:      100,
+			minOffsetsPerJob: 0,
 			groupLag: map[int32]kadm.GroupMemberLag{
 				0: {
 					Commit: kadm.Offset{
@@ -57,8 +59,9 @@ func TestRecordCountPlanner_Plan(t *testing.T) {
 			},
 		},
 		{
-			name:        "single partition, multiple jobs",
-			recordCount: 50,
+			name:             "single partition, multiple jobs",
+			recordCount:      50,
+			minOffsetsPerJob: 0,
 			groupLag: map[int32]kadm.GroupMemberLag{
 				0: {
 					Commit: kadm.Offset{
@@ -82,8 +85,9 @@ func TestRecordCountPlanner_Plan(t *testing.T) {
 			},
 		},
 		{
-			name:        "multiple partitions",
-			recordCount: 100,
+			name:             "multiple partitions",
+			recordCount:      100,
+			minOffsetsPerJob: 0,
 			groupLag: map[int32]kadm.GroupMemberLag{
 				0: {
 					Commit: kadm.Offset{
@@ -120,8 +124,9 @@ func TestRecordCountPlanner_Plan(t *testing.T) {
 			},
 		},
 		{
-			name:        "no lag",
-			recordCount: 100,
+			name:             "no lag",
+			recordCount:      100,
+			minOffsetsPerJob: 0,
 			groupLag: map[int32]kadm.GroupMemberLag{
 				0: {
 					Commit: kadm.Offset{
@@ -135,6 +140,37 @@ func TestRecordCountPlanner_Plan(t *testing.T) {
 			},
 			expectedJobs: nil,
 		},
+		{
+			name:             "skip small jobs",
+			recordCount:      100,
+			minOffsetsPerJob: 40,
+			groupLag: map[int32]kadm.GroupMemberLag{
+				0: {
+					Commit: kadm.Offset{
+						At: 100,
+					},
+					End: kadm.ListedOffset{
+						Offset: 130, // Only 30 records available, less than minimum
+					},
+					Partition: 0,
+				},
+				1: {
+					Commit: kadm.Offset{
+						At: 200,
+					},
+					End: kadm.ListedOffset{
+						Offset: 300, // 100 records available, more than minimum
+					},
+					Partition: 1,
+				},
+			},
+			expectedJobs: []*JobWithMetadata{
+				NewJobWithMetadata(
+					types.NewJob(1, types.Offsets{Min: 201, Max: 300}),
+					99, // priority is total remaining: 300-201
+				),
+			},
+		},
 	} {
 		t.Run(tc.name, func(t *testing.T) {
 			mockReader := &mockOffsetReader{
@@ -147,7 +183,7 @@ func TestRecordCountPlanner_Plan(t *testing.T) {
 			}
 			require.NoError(t, cfg.Validate())
 			planner := NewRecordCountPlanner(mockReader, tc.recordCount, time.Hour, log.NewNopLogger())
-			jobs, err := planner.Plan(context.Background(), 0)
+			jobs, err := planner.Plan(context.Background(), 0, tc.minOffsetsPerJob)
 			require.NoError(t, err)
 
 			require.Equal(t, len(tc.expectedJobs), len(jobs))