由于 webhook 需要同步,尝试在特定时间点将所有触发警报重新发送到 webhook。Prometheus 和 alertmanager 部署在 gcp 中的 kubernetes 集群中。
任何想法如何做到这一点。repeat_intervall 仅在 12 小时后重新发送一个特定警报,但需要一次发送所有警报。
找到附加的我的警报管理器配置
config:
global:
resolve_timeout: 5m
route:
group_by: ['job']
group_wait: 30s
group_interval: 5m
repeat_interval: 12h
receiver: 'null'
routes:
- match:
alertname: Watchdog
receiver: 'null'
- match_re:
severity: ".*"
receiver: 'webhook-prod1'
continue: true
- match_re:
severity: ".*"
receiver: 'webhook-prod2'
receivers:
- name: 'null'
- name: 'webhook-prod1'
webhook_configs:
- send_resolved: true
url: 'http://xx.xx.xx.xx:9090/probe/webhook/prometheus/'
http_config:
basic_auth:
username: "username"
password: <secret>
- name: 'webhook-prod2'
webhook_configs:
- send_resolved: true
url: 'http://xx.xx.xx.xx:9090/probe/webhook/prometheus/'
http_config:
basic_auth:
username: "username"
password: <secret>
templates:
- '/etc/alertmanager/config/*.tmpl'