diff --git a/monitoring/mongodb/alerts.test.yaml b/monitoring/mongodb/alerts.test.yaml index 8d41031996..c779720ee1 100644 --- a/monitoring/mongodb/alerts.test.yaml +++ b/monitoring/mongodb/alerts.test.yaml @@ -670,4 +670,46 @@ tests: MongoDB replica set `data-db-mongodb-sharded-shard-1` is not in the expected state. It does not have the expected number of SECONDARY members. Please ensure that all instances are running properly. - summary: MongoDB replica set out of sync \ No newline at end of file + summary: MongoDB replica set out of sync + + - name: MongoDbCompactionNeeded + interval: 5m + input_series: + # Compaction-needed DB: 80% disk usage, 40% free ratio, 12 GB free (>10 GiB) + - series: mongodb_dbstats_fsUsedSize{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-0", database="needs-compaction"} + values: 8e9x13 + - series: mongodb_dbstats_fsTotalSize{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-0", database="needs-compaction"} + values: 1e10x13 + - series: mongodb_dbstats_totalFreeStorageSize{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-0", database="needs-compaction"} + values: 12e9x13 + - series: mongodb_dbstats_totalSize{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-0", database="needs-compaction"} + values: 30e9x13 + # Healthy DB on same pod: same 80% disk usage but only 1 GB free (below both ratio and absolute thresholds) + - series: mongodb_dbstats_fsUsedSize{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-0", database="healthy-db"} + values: 8e9x13 + - series: mongodb_dbstats_fsTotalSize{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-0", database="healthy-db"} + values: 1e10x13 + - series: mongodb_dbstats_totalFreeStorageSize{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-0", database="healthy-db"} + values: 1e9x13 + - series: mongodb_dbstats_totalSize{namespace="zenko", pod="data-db-mongodb-sharded-shard0-data-0", database="healthy-db"} + values: 30e9x13 + alert_rule_test: + # for: 1h not yet satisfied + - alertname: MongoDbCompactionNeeded + eval_time: 10m + exp_alerts: [] + - alertname: MongoDbCompactionNeeded + eval_time: 55m + exp_alerts: [] + # for: 1h satisfied; only the needs-compaction DB fires + - alertname: MongoDbCompactionNeeded + eval_time: 65m + exp_alerts: + - exp_labels: + severity: warning + namespace: zenko + pod: data-db-mongodb-sharded-shard0-data-0 + database: needs-compaction + exp_annotations: + description: "MongoDB pod `data-db-mongodb-sharded-shard0-data-0` database `needs-compaction` has accumulated significant reclaimable storage while the underlying filesystem is filling up. Consider running compaction to recover disk space." + summary: MongoDB compaction needed diff --git a/monitoring/mongodb/alerts.yaml b/monitoring/mongodb/alerts.yaml index 6e272d25ec..ac09c4e8fb 100644 --- a/monitoring/mongodb/alerts.yaml +++ b/monitoring/mongodb/alerts.yaml @@ -27,6 +27,15 @@ x-inputs: - name: replicationLagOplogSizeThreshold type: config value: 0.5 + - name: compactionDiskUsageThreshold + type: config + value: 0.7 + - name: compactionFreeStorageRatioThreshold + type: config + value: 0.3 + - name: compactionFreeStorageAbsoluteThreshold + type: config + value: 10 * 1024 * 1024 * 1024 # 10 GiB, in bytes groups: - name: MongoDb @@ -270,6 +279,27 @@ groups: description: "MongoDB pod `{{ $labels.pod }}` has been in the 'STARTUP2' state for more than 1 hour. Please ensure that the instance is running properly." summary: MongoDB node in STARTUP2 state for too long + - alert: MongoDbCompactionNeeded + expr: | + ( + mongodb_dbstats_fsUsedSize{namespace="${namespace}",pod=~"${service}.*"} + / mongodb_dbstats_fsTotalSize{namespace="${namespace}",pod=~"${service}.*"} + ) > ${compactionDiskUsageThreshold} + and + ( + mongodb_dbstats_totalFreeStorageSize{namespace="${namespace}",pod=~"${service}.*"} + / mongodb_dbstats_totalSize{namespace="${namespace}",pod=~"${service}.*"} + ) > ${compactionFreeStorageRatioThreshold} + and + mongodb_dbstats_totalFreeStorageSize{namespace="${namespace}",pod=~"${service}.*"} + > ${compactionFreeStorageAbsoluteThreshold} + for: 1h + labels: + severity: warning + annotations: + description: "MongoDB pod `{{ $labels.pod }}` database `{{ $labels.database }}` has accumulated significant reclaimable storage while the underlying filesystem is filling up. Consider running compaction to recover disk space." + summary: MongoDB compaction needed + - alert: MongoDbRSNotSynced expr: | count by (rs_nm, statefulset) ( diff --git a/solution-base/mongodb/charts/mongodb-sharded/values.yaml b/solution-base/mongodb/charts/mongodb-sharded/values.yaml index 0aa3089c79..cfdfc9a2f6 100644 --- a/solution-base/mongodb/charts/mongodb-sharded/values.yaml +++ b/solution-base/mongodb/charts/mongodb-sharded/values.yaml @@ -1815,7 +1815,7 @@ metrics: ## @param metrics.extraArgs String with extra arguments to the metrics exporter ## ref: https://github.com/percona/mongodb_exporter/blob/main/main.go ## - extraArgs: "--collector.diagnosticdata --collector.replicasetstatus --collector.dbstats --collector.topmetrics --compatible-mode" + extraArgs: "--collector.diagnosticdata --collector.replicasetstatus --collector.dbstats --collector.dbstatsfreestorage --collector.topmetrics --compatible-mode" ## @param metrics.resourcesPreset Set container resources according to one common preset (allowed values: none, nano, micro, small, medium, large, xlarge, 2xlarge). This is ignored if metrics.resources is set (metrics.resources is recommended for production). ## More information: https://github.com/bitnami/charts/blob/main/bitnami/common/templates/_resources.tpl#L15 ##