diff --git a/modules/services/grafana-dashboards/alerts.yml b/modules/services/grafana-dashboards/alerts.yml deleted file mode 100644 index b01bada..0000000 --- a/modules/services/grafana-dashboards/alerts.yml +++ /dev/null @@ -1,97 +0,0 @@ -apiVersion: 1 -groups: - - orgId: 1 - name: default - folder: ALARM - interval: 5m - rules: - - uid: ddin0kv0wnj0gd - title: Systemd Units State - condition: B - data: - - refId: D - relativeTimeRange: - from: 86400 - to: 0 - datasourceUid: PBFE396EC0B189D67 - model: - datasource: - type: prometheus - uid: PBFE396EC0B189D67 - expr: node_systemd_units{instance="127.0.0.1:9001",job="scrapema",state="failed"} - format: time_series - interval: "" - intervalFactor: 1 - intervalMs: 15000 - legendFormat: Failed - maxDataPoints: 43200 - refId: D - step: 240 - - refId: A - datasourceUid: __expr__ - model: - conditions: - - evaluator: - params: - - 0 - - 0 - type: gt - operator: - type: and - query: - params: [] - reducer: - params: [] - type: avg - type: query - datasource: - name: Expression - type: __expr__ - uid: __expr__ - expression: D - hide: false - intervalMs: 1000 - maxDataPoints: 43200 - reducer: last - refId: A - settings: - mode: dropNN - type: reduce - - refId: B - datasourceUid: __expr__ - model: - conditions: - - evaluator: - params: - - 0 - - 0 - type: gt - operator: - type: and - query: - params: [] - reducer: - params: [] - type: avg - type: query - datasource: - name: Expression - type: __expr__ - uid: __expr__ - expression: A - hide: false - intervalMs: 1000 - maxDataPoints: 43200 - refId: B - type: threshold - dashboardUid: rYdddlPWk - panelId: 298 - noDataState: NoData - execErrState: Error - for: 5m - annotations: - __dashboardUid__: rYdddlPWk - __panelId__: "298" - labels: {} - isPaused: false - diff --git a/modules/services/grafana.nix b/modules/services/grafana.nix index 356117c..5ef8b47 100644 --- a/modules/services/grafana.nix +++ b/modules/services/grafana.nix @@ -19,8 +19,8 @@ in }; provision.alerting.contactPoints.path = config.age.secrets.grafana-contact-points.path; - provision.alerting.policies.path = ./grafana-dashboards/notification-policies.yml; - provision.alerting.templates.path = ./grafana-dashboards/alerts.yml; + provision.alerting.policies.path = ./grafana/notification-policies.yml; + provision.alerting.templates.path = ./grafana/alerts.yml; provision.datasources.settings = { datasources = [ @@ -34,7 +34,7 @@ in }; provision.dashboards.settings.providers = [{ name = "provisioned-dashboards"; - options.path = ./grafana-dashboards; + options.path = ./grafana/dashboards; }]; }; diff --git a/modules/services/grafana/alerts.yml b/modules/services/grafana/alerts.yml new file mode 100644 index 0000000..4014906 --- /dev/null +++ b/modules/services/grafana/alerts.yml @@ -0,0 +1,342 @@ +apiVersion: 1 +groups: + - orgId: 1 + name: default + folder: ALARM + interval: 5m + rules: + - uid: ddin0kv0wnj0gd + title: Systemd Units State + condition: B + data: + - refId: D + relativeTimeRange: + from: 86400 + to: 0 + datasourceUid: PBFE396EC0B189D67 + model: + datasource: + type: prometheus + uid: PBFE396EC0B189D67 + expr: node_systemd_units{instance="127.0.0.1:9001",job="scrapema",state="failed"} + format: time_series + interval: "" + intervalFactor: 1 + intervalMs: 15000 + legendFormat: Failed + maxDataPoints: 43200 + refId: D + step: 240 + - refId: A + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 0 + - 0 + type: gt + operator: + type: and + query: + params: [] + reducer: + params: [] + type: avg + type: query + datasource: + name: Expression + type: __expr__ + uid: __expr__ + expression: D + hide: false + intervalMs: 1000 + maxDataPoints: 43200 + reducer: last + refId: A + settings: + mode: dropNN + type: reduce + - refId: B + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 0 + - 0 + type: gt + operator: + type: and + query: + params: [] + reducer: + params: [] + type: avg + type: query + datasource: + name: Expression + type: __expr__ + uid: __expr__ + expression: A + hide: false + intervalMs: 1000 + maxDataPoints: 43200 + refId: B + type: threshold + dashboardUid: rYdddlPWk + panelId: 298 + noDataState: NoData + execErrState: Error + for: 5m + annotations: + __dashboardUid__: rYdddlPWk + __panelId__: "298" + labels: {} + isPaused: false + - uid: adin55cdu3ocga + title: Memory Basic + condition: G + data: + - refId: A + relativeTimeRange: + from: 86400 + to: 0 + datasourceUid: PBFE396EC0B189D67 + model: + datasource: + type: prometheus + uid: PBFE396EC0B189D67 + expr: node_memory_MemTotal_bytes{instance="127.0.0.1:9001",job="scrapema"} + format: time_series + interval: "" + intervalFactor: 1 + intervalMs: 15000 + legendFormat: RAM Total + maxDataPoints: 43200 + refId: A + step: 240 + - refId: B + relativeTimeRange: + from: 86400 + to: 0 + datasourceUid: PBFE396EC0B189D67 + model: + datasource: + type: prometheus + uid: PBFE396EC0B189D67 + expr: node_memory_MemTotal_bytes{instance="127.0.0.1:9001",job="scrapema"} - node_memory_MemFree_bytes{instance="127.0.0.1:9001",job="scrapema"} - (node_memory_Cached_bytes{instance="127.0.0.1:9001",job="scrapema"} + node_memory_Buffers_bytes{instance="127.0.0.1:9001",job="scrapema"} + node_memory_SReclaimable_bytes{instance="127.0.0.1:9001",job="scrapema"}) + format: time_series + interval: "" + intervalFactor: 1 + intervalMs: 15000 + legendFormat: RAM Used + maxDataPoints: 43200 + refId: B + step: 240 + - refId: C + relativeTimeRange: + from: 86400 + to: 0 + datasourceUid: PBFE396EC0B189D67 + model: + datasource: + type: prometheus + uid: PBFE396EC0B189D67 + expr: node_memory_Cached_bytes{instance="127.0.0.1:9001",job="scrapema"} + node_memory_Buffers_bytes{instance="127.0.0.1:9001",job="scrapema"} + node_memory_SReclaimable_bytes{instance="127.0.0.1:9001",job="scrapema"} + format: time_series + interval: "" + intervalFactor: 1 + intervalMs: 15000 + legendFormat: RAM Cache + Buffer + maxDataPoints: 43200 + refId: C + step: 240 + - refId: D + relativeTimeRange: + from: 86400 + to: 0 + datasourceUid: PBFE396EC0B189D67 + model: + datasource: + type: prometheus + uid: PBFE396EC0B189D67 + expr: node_memory_MemFree_bytes{instance="127.0.0.1:9001",job="scrapema"} + format: time_series + interval: "" + intervalFactor: 1 + intervalMs: 15000 + legendFormat: RAM Free + maxDataPoints: 43200 + refId: D + step: 240 + - refId: E + relativeTimeRange: + from: 86400 + to: 0 + datasourceUid: PBFE396EC0B189D67 + model: + datasource: + type: prometheus + uid: PBFE396EC0B189D67 + expr: (node_memory_SwapTotal_bytes{instance="127.0.0.1:9001",job="scrapema"} - node_memory_SwapFree_bytes{instance="127.0.0.1:9001",job="scrapema"}) + format: time_series + interval: "" + intervalFactor: 1 + intervalMs: 15000 + legendFormat: SWAP Used + maxDataPoints: 43200 + refId: E + step: 240 + - refId: F + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: [] + type: gt + operator: + type: and + query: + params: + - F + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: D + intervalMs: 1000 + maxDataPoints: 43200 + reducer: last + refId: F + settings: + mode: dropNN + type: reduce + - refId: G + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 1e+09 + - 0 + type: lt + operator: + type: and + query: + params: [] + reducer: + params: [] + type: avg + type: query + datasource: + name: Expression + type: __expr__ + uid: __expr__ + expression: F + intervalMs: 1000 + maxDataPoints: 43200 + refId: G + type: threshold + dashboardUid: rYdddlPWk + panelId: 78 + noDataState: NoData + execErrState: Error + for: 5m + annotations: + __dashboardUid__: rYdddlPWk + __panelId__: "78" + labels: {} + isPaused: false + - uid: ddipptubkwe80f + title: Panel Title + condition: C + data: + - refId: A + relativeTimeRange: + from: 300 + to: 0 + datasourceUid: PBFE396EC0B189D67 + model: + datasource: + type: prometheus + uid: PBFE396EC0B189D67 + editorMode: code + exemplar: false + expr: time() - container_last_seen{name=~".+"} + format: time_series + instant: false + interval: "" + intervalMs: 15000 + legendFormat: '{{name}}' + maxDataPoints: 43200 + range: true + refId: A + - refId: B + relativeTimeRange: + from: 300 + to: 0 + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: [] + type: gt + operator: + type: and + query: + params: + - B + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: A + intervalMs: 1000 + maxDataPoints: 43200 + reducer: last + refId: B + type: reduce + - refId: C + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 60 + - 0 + type: gt + operator: + type: and + query: + params: [] + reducer: + params: [] + type: avg + type: query + datasource: + name: Expression + type: __expr__ + uid: __expr__ + expression: B + hide: false + intervalMs: 1000 + maxDataPoints: 43200 + refId: C + type: threshold + dashboardUid: cdijlo7pmmby8c + panelId: 2 + noDataState: NoData + execErrState: Error + for: 5m + annotations: + __dashboardUid__: cdijlo7pmmby8c + __panelId__: "2" + labels: {} + isPaused: false diff --git a/modules/services/grafana-dashboards/node-exporter-full.json b/modules/services/grafana/dashboards/node-exporter-full.json similarity index 100% rename from modules/services/grafana-dashboards/node-exporter-full.json rename to modules/services/grafana/dashboards/node-exporter-full.json diff --git a/modules/services/grafana-dashboards/notification-policies.yml b/modules/services/grafana/notification-policies.yml similarity index 100% rename from modules/services/grafana-dashboards/notification-policies.yml rename to modules/services/grafana/notification-policies.yml