Skip to content

Commit

Permalink
k8s-stack: release 0.30.1
Browse files Browse the repository at this point in the history
  • Loading branch information
AndrewChubatiuk committed Dec 3, 2024
1 parent 556bffe commit c7b1cdc
Show file tree
Hide file tree
Showing 8 changed files with 59 additions and 26 deletions.
1 change: 1 addition & 0 deletions charts/victoria-metrics-k8s-stack/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
## Next release

- fail template if both vmcluster and vmsingle are enabled
- synced rules and dashboards

## 0.30.0

Expand Down
2 changes: 1 addition & 1 deletion charts/victoria-metrics-k8s-stack/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ apiVersion: v2
name: victoria-metrics-k8s-stack
description: Kubernetes monitoring on VictoriaMetrics stack. Includes VictoriaMetrics Operator, Grafana dashboards, ServiceScrapes and VMRules
type: application
version: 0.30.0
version: 0.30.1
appVersion: v1.107.0
sources:
- https://github.com/VictoriaMetrics/helm-charts
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3867,7 +3867,7 @@ panels:
uid: $ds
description: 'Shows the approx time needed to reach 100% of disk capacity for at least one vmstorage node based on the following params:
* free disk space;
* free disk space (after -storage.minFreeDiskSpaceBytes);
* row ingestion rate;
Expand Down Expand Up @@ -7264,7 +7264,7 @@ panels:
uid: $ds
description: 'Shows the approx time needed to reach 100% of disk capacity based on the following params:
* free disk space;
* free disk space (after -storage.minFreeDiskSpaceBytes);
* row ingestion rate;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3430,7 +3430,7 @@ panels:
uid: $ds
description: 'Shows the approx time needed to reach 100% of disk capacity based on the following params:
* free disk space;
* free disk space (after -storage.minFreeDiskSpaceBytes);
* row ingestion rate;
Expand Down Expand Up @@ -3521,7 +3521,7 @@ panels:
- datasource:
type: {{ $defaultDatasource }}
uid: $ds
description: Shows amount of on-disk space occupied by data points and the remaining disk space at `-storageDataPath`
description: Shows amount of on-disk space occupied by data points and the remaining disk space at `-storageDataPath`. Calculation accounts for -storage.minFreeDiskSpaceBytes
fieldConfig:
defaults:
color:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ rules:
severity: warning
- alert: KubeDaemonSetRolloutStuck
annotations:
description: 'DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} has not finished or progressed for at least 15 minutes.'
description: 'DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} has not finished or progressed for at least 15m.'
runbook_url: '{{ $runbookUrl }}/kubernetes/kubedaemonsetrolloutstuck'
summary: 'DaemonSet rollout is stuck.'
condition: '{{ true }}'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ name: kubernetes-system-apiserver
rules:
- alert: KubeClientCertificateExpiration
annotations:
description: 'A client certificate used to authenticate to kubernetes apiserver is expiring in less than 7.0 days.'
description: 'A client certificate used to authenticate to kubernetes apiserver is expiring in less than 7.0 days on cluster {{`{{`}} $labels.cluster {{`}}`}}.'
runbook_url: '{{ $runbookUrl }}/kubernetes/kubeclientcertificateexpiration'
summary: 'Client certificate is about to expire.'
condition: '{{ true }}'
Expand All @@ -20,7 +20,7 @@ rules:
severity: warning
- alert: KubeClientCertificateExpiration
annotations:
description: 'A client certificate used to authenticate to kubernetes apiserver is expiring in less than 24.0 hours.'
description: 'A client certificate used to authenticate to kubernetes apiserver is expiring in less than 24.0 hours on cluster {{`{{`}} $labels.cluster {{`}}`}}.'
runbook_url: '{{ $runbookUrl }}/kubernetes/kubeclientcertificateexpiration'
summary: 'Client certificate is about to expire.'
condition: '{{ true }}'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,33 @@ rules:
summary: 'Instance {{`{{`}} $labels.instance {{`}}`}} will run out of disk space in 3 days'
condition: '{{ true }}'
expr: |-
vm_free_disk_space_bytes / ignoring(path)
sum(vm_free_disk_space_bytes) without(path) /
(
rate(vm_rows_added_to_storage_total[1d])
* scalar(
sum(vm_data_size_bytes{type!~"indexdb.*"}) /
sum(vm_rows{type!~"indexdb.*"})
)
rate(vm_rows_added_to_storage_total[1d]) * (
sum(vm_data_size_bytes{type!~"indexdb.*"}) without(type) /
sum(vm_rows{type!~"indexdb.*"}) without(type)
)
) < 3 * 24 * 3600 > 0
for: 30m
labels:
severity: critical
- alert: NodeBecomesReadonlyIn3Days
annotations:
dashboard: '{{ $host }}/d/oS7Bi_0Wz?viewPanel=113&var-instance={{`{{`}} $labels.instance {{`}}`}}'
description: "Taking into account current ingestion rate, free disk space and -storage.minFreeDiskSpaceBytes instance {{`{{`}} $labels.instance {{`}}`}} will remain writable for {{`{{`}} $value | humanizeDuration {{`}}`}}.\n Consider to limit the ingestion rate, decrease retention or scale the disk space up if possible."
summary: 'Instance {{`{{`}} $labels.instance {{`}}`}} will become read-only in 3 days'
condition: '{{ true }}'
expr: |-
sum(vm_free_disk_space_bytes - vm_free_disk_space_limit_bytes) without(path) /
(
rate(vm_rows_added_to_storage_total[1d]) * (
sum(vm_data_size_bytes{type!~"indexdb.*"}) without(type) /
sum(vm_rows{type!~"indexdb.*"}) without(type)
)
) < 3 * 24 * 3600 > 0
for: 30m
labels:
severity: warning
- alert: DiskRunsOutOfSpace
annotations:
dashboard: '{{ $host }}/d/oS7Bi_0Wz?viewPanel=200&var-instance={{`{{`}} $labels.instance {{`}}`}}'
Expand Down Expand Up @@ -87,9 +103,9 @@ rules:
condition: '{{ true }}'
expr: |-
(
sum(rate(vm_new_timeseries_created_total[5m]))
sum(rate(vm_new_timeseries_created_total[5m])) by (job,{{ $clusterLabel }})
/
sum(rate(vm_rows_inserted_total[5m]))
sum(rate(vm_rows_inserted_total[5m])) by (job,{{ $clusterLabel }})
) > 0.1
for: 15m
labels:
Expand All @@ -101,9 +117,9 @@ rules:
summary: 'Too high number of new series created over last 24h'
condition: '{{ true }}'
expr: |-
sum(increase(vm_new_timeseries_created_total[24h]))
sum(increase(vm_new_timeseries_created_total[24h])) by (job,{{ $clusterLabel }})
>
(sum(vm_cache_entries{type="storage/hour_metric_ids"}) * 3)
(sum(vm_cache_entries{type="storage/hour_metric_ids"}) by (job,{{ $clusterLabel }}) * 3)
for: 15m
labels:
severity: warning
Expand All @@ -115,9 +131,9 @@ rules:
condition: '{{ true }}'
expr: |-
(
sum(rate(vm_slow_row_inserts_total[5m]))
sum(rate(vm_slow_row_inserts_total[5m])) by (job,{{ $clusterLabel }})
/
sum(rate(vm_rows_inserted_total[5m]))
sum(rate(vm_rows_inserted_total[5m])) by (job,{{ $clusterLabel }})
) > 0.05
for: 15m
labels:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,33 @@ rules:
summary: 'Instance {{`{{`}} $labels.instance {{`}}`}} will run out of disk space soon'
condition: '{{ true }}'
expr: |-
vm_free_disk_space_bytes / ignoring(path)
sum(vm_free_disk_space_bytes) without(path) /
(
rate(vm_rows_added_to_storage_total[1d])
* scalar(
sum(vm_data_size_bytes{type!~"indexdb.*"}) /
sum(vm_rows{type!~"indexdb.*"})
)
rate(vm_rows_added_to_storage_total[1d]) * (
sum(vm_data_size_bytes{type!~"indexdb.*"}) without(type) /
sum(vm_rows{type!~"indexdb.*"}) without(type)
)
) < 3 * 24 * 3600 > 0
for: 30m
labels:
severity: critical
- alert: NodeBecomesReadonlyIn3Days
annotations:
dashboard: '{{ $host }}/d/oS7Bi_0Wz?viewPanel=113&var-instance={{`{{`}} $labels.instance {{`}}`}}'
description: "Taking into account current ingestion rate and free disk space instance {{`{{`}} $labels.instance {{`}}`}} is writable for {{`{{`}} $value | humanizeDuration {{`}}`}}.\n Consider to limit the ingestion rate, decrease retention or scale the disk space up if possible."
summary: 'Instance {{`{{`}} $labels.instance {{`}}`}} will become read-only in 3 days'
condition: '{{ true }}'
expr: |-
sum(vm_free_disk_space_bytes - vm_free_disk_space_limit_bytes) without(path) /
(
rate(vm_rows_added_to_storage_total[1d]) * (
sum(vm_data_size_bytes{type!~"indexdb.*"}) without(type) /
sum(vm_rows{type!~"indexdb.*"}) without(type)
)
) < 3 * 24 * 3600 > 0
for: 30m
labels:
severity: warning
- alert: DiskRunsOutOfSpace
annotations:
dashboard: '{{ $host }}/d/wNf0q_kZk?viewPanel=53&var-instance={{`{{`}} $labels.instance {{`}}`}}'
Expand Down

0 comments on commit c7b1cdc

Please sign in to comment.