add alerts
This commit is contained in:
342
modules/services/grafana/alerts.yml
Normal file
342
modules/services/grafana/alerts.yml
Normal file
@@ -0,0 +1,342 @@
|
||||
apiVersion: 1
|
||||
groups:
|
||||
- orgId: 1
|
||||
name: default
|
||||
folder: ALARM
|
||||
interval: 5m
|
||||
rules:
|
||||
- uid: ddin0kv0wnj0gd
|
||||
title: Systemd Units State
|
||||
condition: B
|
||||
data:
|
||||
- refId: D
|
||||
relativeTimeRange:
|
||||
from: 86400
|
||||
to: 0
|
||||
datasourceUid: PBFE396EC0B189D67
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: PBFE396EC0B189D67
|
||||
expr: node_systemd_units{instance="127.0.0.1:9001",job="scrapema",state="failed"}
|
||||
format: time_series
|
||||
interval: ""
|
||||
intervalFactor: 1
|
||||
intervalMs: 15000
|
||||
legendFormat: Failed
|
||||
maxDataPoints: 43200
|
||||
refId: D
|
||||
step: 240
|
||||
- refId: A
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
conditions:
|
||||
- evaluator:
|
||||
params:
|
||||
- 0
|
||||
- 0
|
||||
type: gt
|
||||
operator:
|
||||
type: and
|
||||
query:
|
||||
params: []
|
||||
reducer:
|
||||
params: []
|
||||
type: avg
|
||||
type: query
|
||||
datasource:
|
||||
name: Expression
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
expression: D
|
||||
hide: false
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
reducer: last
|
||||
refId: A
|
||||
settings:
|
||||
mode: dropNN
|
||||
type: reduce
|
||||
- refId: B
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
conditions:
|
||||
- evaluator:
|
||||
params:
|
||||
- 0
|
||||
- 0
|
||||
type: gt
|
||||
operator:
|
||||
type: and
|
||||
query:
|
||||
params: []
|
||||
reducer:
|
||||
params: []
|
||||
type: avg
|
||||
type: query
|
||||
datasource:
|
||||
name: Expression
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
expression: A
|
||||
hide: false
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
refId: B
|
||||
type: threshold
|
||||
dashboardUid: rYdddlPWk
|
||||
panelId: 298
|
||||
noDataState: NoData
|
||||
execErrState: Error
|
||||
for: 5m
|
||||
annotations:
|
||||
__dashboardUid__: rYdddlPWk
|
||||
__panelId__: "298"
|
||||
labels: {}
|
||||
isPaused: false
|
||||
- uid: adin55cdu3ocga
|
||||
title: Memory Basic
|
||||
condition: G
|
||||
data:
|
||||
- refId: A
|
||||
relativeTimeRange:
|
||||
from: 86400
|
||||
to: 0
|
||||
datasourceUid: PBFE396EC0B189D67
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: PBFE396EC0B189D67
|
||||
expr: node_memory_MemTotal_bytes{instance="127.0.0.1:9001",job="scrapema"}
|
||||
format: time_series
|
||||
interval: ""
|
||||
intervalFactor: 1
|
||||
intervalMs: 15000
|
||||
legendFormat: RAM Total
|
||||
maxDataPoints: 43200
|
||||
refId: A
|
||||
step: 240
|
||||
- refId: B
|
||||
relativeTimeRange:
|
||||
from: 86400
|
||||
to: 0
|
||||
datasourceUid: PBFE396EC0B189D67
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: PBFE396EC0B189D67
|
||||
expr: node_memory_MemTotal_bytes{instance="127.0.0.1:9001",job="scrapema"} - node_memory_MemFree_bytes{instance="127.0.0.1:9001",job="scrapema"} - (node_memory_Cached_bytes{instance="127.0.0.1:9001",job="scrapema"} + node_memory_Buffers_bytes{instance="127.0.0.1:9001",job="scrapema"} + node_memory_SReclaimable_bytes{instance="127.0.0.1:9001",job="scrapema"})
|
||||
format: time_series
|
||||
interval: ""
|
||||
intervalFactor: 1
|
||||
intervalMs: 15000
|
||||
legendFormat: RAM Used
|
||||
maxDataPoints: 43200
|
||||
refId: B
|
||||
step: 240
|
||||
- refId: C
|
||||
relativeTimeRange:
|
||||
from: 86400
|
||||
to: 0
|
||||
datasourceUid: PBFE396EC0B189D67
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: PBFE396EC0B189D67
|
||||
expr: node_memory_Cached_bytes{instance="127.0.0.1:9001",job="scrapema"} + node_memory_Buffers_bytes{instance="127.0.0.1:9001",job="scrapema"} + node_memory_SReclaimable_bytes{instance="127.0.0.1:9001",job="scrapema"}
|
||||
format: time_series
|
||||
interval: ""
|
||||
intervalFactor: 1
|
||||
intervalMs: 15000
|
||||
legendFormat: RAM Cache + Buffer
|
||||
maxDataPoints: 43200
|
||||
refId: C
|
||||
step: 240
|
||||
- refId: D
|
||||
relativeTimeRange:
|
||||
from: 86400
|
||||
to: 0
|
||||
datasourceUid: PBFE396EC0B189D67
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: PBFE396EC0B189D67
|
||||
expr: node_memory_MemFree_bytes{instance="127.0.0.1:9001",job="scrapema"}
|
||||
format: time_series
|
||||
interval: ""
|
||||
intervalFactor: 1
|
||||
intervalMs: 15000
|
||||
legendFormat: RAM Free
|
||||
maxDataPoints: 43200
|
||||
refId: D
|
||||
step: 240
|
||||
- refId: E
|
||||
relativeTimeRange:
|
||||
from: 86400
|
||||
to: 0
|
||||
datasourceUid: PBFE396EC0B189D67
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: PBFE396EC0B189D67
|
||||
expr: (node_memory_SwapTotal_bytes{instance="127.0.0.1:9001",job="scrapema"} - node_memory_SwapFree_bytes{instance="127.0.0.1:9001",job="scrapema"})
|
||||
format: time_series
|
||||
interval: ""
|
||||
intervalFactor: 1
|
||||
intervalMs: 15000
|
||||
legendFormat: SWAP Used
|
||||
maxDataPoints: 43200
|
||||
refId: E
|
||||
step: 240
|
||||
- refId: F
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
conditions:
|
||||
- evaluator:
|
||||
params: []
|
||||
type: gt
|
||||
operator:
|
||||
type: and
|
||||
query:
|
||||
params:
|
||||
- F
|
||||
reducer:
|
||||
params: []
|
||||
type: last
|
||||
type: query
|
||||
datasource:
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
expression: D
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
reducer: last
|
||||
refId: F
|
||||
settings:
|
||||
mode: dropNN
|
||||
type: reduce
|
||||
- refId: G
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
conditions:
|
||||
- evaluator:
|
||||
params:
|
||||
- 1e+09
|
||||
- 0
|
||||
type: lt
|
||||
operator:
|
||||
type: and
|
||||
query:
|
||||
params: []
|
||||
reducer:
|
||||
params: []
|
||||
type: avg
|
||||
type: query
|
||||
datasource:
|
||||
name: Expression
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
expression: F
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
refId: G
|
||||
type: threshold
|
||||
dashboardUid: rYdddlPWk
|
||||
panelId: 78
|
||||
noDataState: NoData
|
||||
execErrState: Error
|
||||
for: 5m
|
||||
annotations:
|
||||
__dashboardUid__: rYdddlPWk
|
||||
__panelId__: "78"
|
||||
labels: {}
|
||||
isPaused: false
|
||||
- uid: ddipptubkwe80f
|
||||
title: Panel Title
|
||||
condition: C
|
||||
data:
|
||||
- refId: A
|
||||
relativeTimeRange:
|
||||
from: 300
|
||||
to: 0
|
||||
datasourceUid: PBFE396EC0B189D67
|
||||
model:
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: PBFE396EC0B189D67
|
||||
editorMode: code
|
||||
exemplar: false
|
||||
expr: time() - container_last_seen{name=~".+"}
|
||||
format: time_series
|
||||
instant: false
|
||||
interval: ""
|
||||
intervalMs: 15000
|
||||
legendFormat: '{{name}}'
|
||||
maxDataPoints: 43200
|
||||
range: true
|
||||
refId: A
|
||||
- refId: B
|
||||
relativeTimeRange:
|
||||
from: 300
|
||||
to: 0
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
conditions:
|
||||
- evaluator:
|
||||
params: []
|
||||
type: gt
|
||||
operator:
|
||||
type: and
|
||||
query:
|
||||
params:
|
||||
- B
|
||||
reducer:
|
||||
params: []
|
||||
type: last
|
||||
type: query
|
||||
datasource:
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
expression: A
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
reducer: last
|
||||
refId: B
|
||||
type: reduce
|
||||
- refId: C
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
conditions:
|
||||
- evaluator:
|
||||
params:
|
||||
- 60
|
||||
- 0
|
||||
type: gt
|
||||
operator:
|
||||
type: and
|
||||
query:
|
||||
params: []
|
||||
reducer:
|
||||
params: []
|
||||
type: avg
|
||||
type: query
|
||||
datasource:
|
||||
name: Expression
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
expression: B
|
||||
hide: false
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
refId: C
|
||||
type: threshold
|
||||
dashboardUid: cdijlo7pmmby8c
|
||||
panelId: 2
|
||||
noDataState: NoData
|
||||
execErrState: Error
|
||||
for: 5m
|
||||
annotations:
|
||||
__dashboardUid__: cdijlo7pmmby8c
|
||||
__panelId__: "2"
|
||||
labels: {}
|
||||
isPaused: false
|
||||
23870
modules/services/grafana/dashboards/node-exporter-full.json
Executable file
23870
modules/services/grafana/dashboards/node-exporter-full.json
Executable file
File diff suppressed because it is too large
Load Diff
7
modules/services/grafana/notification-policies.yml
Normal file
7
modules/services/grafana/notification-policies.yml
Normal file
@@ -0,0 +1,7 @@
|
||||
apiVersion: 1
|
||||
policies:
|
||||
- orgId: 1
|
||||
receiver: discord
|
||||
group_by:
|
||||
- grafana_folder
|
||||
- alertname
|
||||
Reference in New Issue
Block a user