From 73648360ff2291d529f7dfc032108a1db1066268 Mon Sep 17 00:00:00 2001 From: Ben Ye Date: Wed, 7 Aug 2024 11:02:31 -0700 Subject: [PATCH] Only increment ruler warning eval metric for non PromQL warnings (#7592) --- CHANGELOG.md | 1 + cmd/thanos/rule.go | 31 ++++++++++++++++----- cmd/thanos/rule_test.go | 60 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 85 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index adfa54b03e..a12bcd6616 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re ### Fixed * [#7511](https://github.com/thanos-io/thanos/pull/7511) Query Frontend: fix doubled gzip compression for response body. +* [#7592](https://github.com/thanos-io/thanos/pull/7592) Ruler: Only increment `thanos_rule_evaluation_with_warnings_total` metric for non PromQL warnings. ### Added diff --git a/cmd/thanos/rule.go b/cmd/thanos/rule.go index 09c928fb6e..6c8fbea6e9 100644 --- a/cmd/thanos/rule.go +++ b/cmd/thanos/rule.go @@ -40,12 +40,12 @@ import ( "github.com/prometheus/prometheus/tsdb" "github.com/prometheus/prometheus/tsdb/agent" "github.com/prometheus/prometheus/tsdb/wlog" + "gopkg.in/yaml.v2" "github.com/thanos-io/objstore" "github.com/thanos-io/objstore/client" objstoretracing "github.com/thanos-io/objstore/tracing/opentracing" "github.com/thanos-io/promql-engine/execution/parse" - "gopkg.in/yaml.v2" "github.com/thanos-io/thanos/pkg/alert" v1 "github.com/thanos-io/thanos/pkg/api/rule" @@ -54,6 +54,7 @@ import ( "github.com/thanos-io/thanos/pkg/component" "github.com/thanos-io/thanos/pkg/discovery/dns" "github.com/thanos-io/thanos/pkg/errutil" + "github.com/thanos-io/thanos/pkg/extannotations" "github.com/thanos-io/thanos/pkg/extgrpc" "github.com/thanos-io/thanos/pkg/extkingpin" "github.com/thanos-io/thanos/pkg/extprom" @@ -292,7 +293,7 @@ func newRuleMetrics(reg *prometheus.Registry) *RuleMetrics { m.ruleEvalWarnings = factory.NewCounterVec( prometheus.CounterOpts{ Name: "thanos_rule_evaluation_with_warnings_total", - Help: "The total number of rule evaluation that were successful but had warnings which can indicate partial error.", + Help: "The total number of rule evaluation that were successful but had non PromQL warnings which can indicate partial error.", }, []string{"strategy"}, ) m.ruleEvalWarnings.WithLabelValues(strings.ToLower(storepb.PartialResponseStrategy_ABORT.String())) @@ -939,6 +940,8 @@ func queryFuncCreator( level.Error(logger).Log("err", err, "query", qs) continue } + + warns = filterOutPromQLWarnings(warns, logger, qs) if len(warns) > 0 { ruleEvalWarnings.WithLabelValues(strings.ToLower(partialResponseStrategy.String())).Inc() // TODO(bwplotka): Propagate those to UI, probably requires changing rule manager code ): @@ -970,12 +973,13 @@ func queryFuncCreator( continue } - if len(result.Warnings) > 0 { + warnings := make([]string, 0, len(result.Warnings)) + for _, warn := range result.Warnings { + warnings = append(warnings, warn.Error()) + } + warnings = filterOutPromQLWarnings(warnings, logger, qs) + if len(warnings) > 0 { ruleEvalWarnings.WithLabelValues(strings.ToLower(partialResponseStrategy.String())).Inc() - warnings := make([]string, 0, len(result.Warnings)) - for _, w := range result.Warnings { - warnings = append(warnings, w.Error()) - } level.Warn(logger).Log("warnings", strings.Join(warnings, ", "), "query", qs) } @@ -1081,3 +1085,16 @@ func validateTemplate(tmplStr string) error { } return nil } + +// Filter out PromQL related warnings from warning response and keep store related warnings only. +func filterOutPromQLWarnings(warns []string, logger log.Logger, query string) []string { + storeWarnings := make([]string, 0, len(warns)) + for _, warn := range warns { + if extannotations.IsPromQLAnnotation(warn) { + level.Warn(logger).Log("warning", warn, "query", query) + continue + } + storeWarnings = append(storeWarnings, warn) + } + return storeWarnings +} diff --git a/cmd/thanos/rule_test.go b/cmd/thanos/rule_test.go index 097b66e65b..28d1421fa8 100644 --- a/cmd/thanos/rule_test.go +++ b/cmd/thanos/rule_test.go @@ -7,6 +7,10 @@ import ( "testing" "github.com/efficientgo/core/testutil" + "github.com/go-kit/log" + "github.com/prometheus/prometheus/util/annotations" + + "github.com/thanos-io/thanos/pkg/extpromql" ) func Test_parseFlagLabels(t *testing.T) { @@ -110,3 +114,59 @@ func Test_tableLinkForExpression(t *testing.T) { testutil.Equals(t, resStr, td.expectStr) } } + +func TestFilterOutPromQLWarnings(t *testing.T) { + logger := log.NewNopLogger() + query := "foo" + expr, err := extpromql.ParseExpr(`rate(prometheus_build_info[5m])`) + testutil.Ok(t, err) + possibleCounterInfo := annotations.NewPossibleNonCounterInfo("foo", expr.PositionRange()) + badBucketLabelWarning := annotations.NewBadBucketLabelWarning("foo", "0.99", expr.PositionRange()) + for _, tc := range []struct { + name string + warnings []string + expected []string + }{ + { + name: "nil warning", + expected: make([]string, 0), + }, + { + name: "empty warning", + warnings: make([]string, 0), + expected: make([]string, 0), + }, + { + name: "no PromQL warning", + warnings: []string{ + "some_warning_message", + }, + expected: []string{ + "some_warning_message", + }, + }, + { + name: "PromQL warning", + warnings: []string{ + possibleCounterInfo.Error(), + }, + expected: make([]string, 0), + }, + { + name: "filter out all PromQL warnings", + warnings: []string{ + possibleCounterInfo.Error(), + badBucketLabelWarning.Error(), + "some_warning_message", + }, + expected: []string{ + "some_warning_message", + }, + }, + } { + t.Run(tc.name, func(t *testing.T) { + output := filterOutPromQLWarnings(tc.warnings, logger, query) + testutil.Equals(t, tc.expected, output) + }) + } +}