~xenrox/ansible: grafana: Upstream config update, use alertmanager integration

2 files changed, 75 insertions(+), 5 deletions(-)

M roles/grafana/files/prometheus.yml
M roles/grafana/templates/grafana.ini.j2

M roles/grafana/files/prometheus.yml => roles/grafana/files/prometheus.yml +6 -0

@@ 5,3 5,9 @@ datasources:
     type: prometheus
     access: proxy
     url: http://localhost:9090
+  - name: Alertmanager
+    type: alertmanager
+    access: proxy
+    url: http://localhost:9093
+    jsonData:
+      implementation: prometheus

M roles/grafana/templates/grafana.ini.j2 => roles/grafana/templates/grafana.ini.j2 +69 -5

@@ 178,6 178,12 @@ enable_gzip = true
 # If enabled and user is not anonymous, data proxy will add X-Grafana-User header with username into the request, default is false.
 ;send_user_header = false
 
+# Limit the amount of bytes that will be read/accepted from responses of outgoing HTTP requests.
+;response_limit = 0
+
+# Limits the number of rows that Grafana will process from SQL data sources.
+;row_limit = 1000000
+
 #################################### Analytics ####################################
 [analytics]
 # Server reporting, sends usage counters to stats.grafana.org every 24 hours.


@@ 240,7 246,7 @@ cookie_secure = true
 strict_transport_security = true
 
 # Sets how long a browser should cache HSTS. Only applied if strict_transport_security is enabled.
-strict_transport_security_max_age_seconds = 86400
+;strict_transport_security_max_age_seconds = 86400
 
 # Set to true if to enable HSTS preloading option. Only applied if strict_transport_security is enabled.
 ;strict_transport_security_preload = false


@@ 483,11 489,14 @@ email_attribute_path = email
 auth_url = https://keycloak.xenrox.net/auth/realms/xenrox/protocol/openid-connect/auth
 token_url = https://keycloak.xenrox.net/auth/realms/xenrox/protocol/openid-connect/token
 api_url = https://keycloak.xenrox.net/auth/realms/xenrox/protocol/openid-connect/userinfo
+;teams_url =
 ;allowed_domains =
 ;team_ids =
 ;allowed_organizations =
 role_attribute_path = contains(roles[*], 'grafana_admin') && 'Admin'
 role_attribute_strict = true
+;groups_attribute_path =
+;team_ids_attribute_path =
 ;tls_skip_verify_insecure = false
 ;tls_client_cert =
 ;tls_client_key =


@@ 697,11 706,67 @@ role_attribute_strict = true
 # global limit of alerts
 ;global_alert_rule = -1
 
+#################################### Unified Alerting ####################
+[unified_alerting]
+#Enable the Unified Alerting sub-system and interface. When enabled we'll migrate all of your alert rules and notification channels to the new system. New alert rules will be created and your notification channels will be converted into an Alertmanager configuration. Previous data is preserved to enable backwards compatibility but new data is removed.```
+enabled = true
+
+# Comma-separated list of organization IDs for which to disable unified alerting. Only supported if unified alerting is enabled.
+;disabled_orgs =
+
+# Specify the frequency of polling for admin config changes.
+# The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m.
+;admin_config_poll_interval = 60s
+
+# Specify the frequency of polling for Alertmanager config changes.
+# The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m.
+;alertmanager_config_poll_interval = 60s
+
+# Listen address/hostname and port to receive unified alerting messages for other Grafana instances. The port is used for both TCP and UDP. It is assumed other Grafana instances are also running on the same port. The default value is `0.0.0.0:9094`.
+;ha_listen_address = "0.0.0.0:9094"
+
+# Listen address/hostname and port to receive unified alerting messages for other Grafana instances. The port is used for both TCP and UDP. It is assumed other Grafana instances are also running on the same port. The default value is `0.0.0.0:9094`.
+;ha_advertise_address = ""
+
+# Comma-separated list of initial instances (in a format of host:port) that will form the HA cluster. Configuring this setting will enable High Availability mode for alerting.
+;ha_peers = ""
+
+# Time to wait for an instance to send a notification via the Alertmanager. In HA, each Grafana instance will
+# be assigned a position (e.g. 0, 1). We then multiply this position with the timeout to indicate how long should
+# each instance wait before sending the notification to take into account replication lag.
+# The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m.
+;ha_peer_timeout = "15s"
+
+# The interval between sending gossip messages. By lowering this value (more frequent) gossip messages are propagated
+# across cluster more quickly at the expense of increased bandwidth usage.
+# The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m.
+;ha_gossip_interval = "200ms"
+
+# The interval between gossip full state syncs. Setting this interval lower (more frequent) will increase convergence speeds
+# across larger clusters at the expense of increased bandwidth usage.
+# The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m.
+;ha_push_pull_interval = "60s"
+
+# Enable or disable alerting rule execution. The alerting UI remains visible. This option has a legacy version in the `[alerting]` section that takes precedence.
+;execute_alerts = true
+
+# Alert evaluation timeout when fetching data from the datasource. This option has a legacy version in the `[alerting]` section that takes precedence.
+# The timeout string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m.
+;evaluation_timeout = 30s
+
+# Number of times we'll attempt to evaluate an alert rule before giving up on that evaluation. This option has a legacy version in the `[alerting]` section that takes precedence.
+;max_attempts = 3
+
+# Minimum interval to enforce between rule evaluations. Rules will be adjusted if they are less than this value  or if they are not multiple of the scheduler interval (10s). Higher values can help with resource management as we'll schedule fewer evaluations over time. This option has a legacy version in the `[alerting]` section that takes precedence.
+# The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m.
+;min_interval = 10s
+
 #################################### Alerting ############################
 [alerting]
-# Disable alerting engine & UI features
-;enabled = true
-# Makes it possible to turn off alert rule execution but alerting UI is visible
+# Disable legacy alerting engine & UI features
+enabled = false
+
+# Makes it possible to turn off alert execution but alerting UI is visible
 ;execute_alerts = true
 
 # Default setting for new alert rules. Defaults to categorize error and timeouts as alerting. (alerting, keep_state)


@@ 714,7 779,6 @@ role_attribute_strict = true
 # This limit will protect the server from render overloading and make sure notifications are sent out quickly
 ;concurrent_render_limit = 5
 
-
 # Default setting for alert calculation timeout. Default value is 30
 ;evaluation_timeout_seconds = 30