apiVersion: v1 kind: ConfigMap metadata: name: openobserve-alerts-bootstrap namespace: openobserve data: bootstrap.sh: | #!/usr/bin/env sh set -eu ORG_ID="${ORG_ID:-default}" BASE_URL="${BASE_URL:-http://o2-openobserve-standalone.openobserve.svc.cluster.local:5080}" STREAM_NAME="${STREAM_NAME:-default}" TELEGRAM_CHAT_ID="${TELEGRAM_CHAT_ID:-}" if [ -z "${ZO_ROOT_USER_EMAIL:-}" ] || [ -z "${ZO_ROOT_USER_PASSWORD:-}" ]; then echo "missing ZO_ROOT_USER_EMAIL / ZO_ROOT_USER_PASSWORD" exit 1 fi if [ -z "$TELEGRAM_CHAT_ID" ]; then echo "missing TELEGRAM_CHAT_ID" exit 1 fi AUTH="$(printf '%s:%s' "$ZO_ROOT_USER_EMAIL" "$ZO_ROOT_USER_PASSWORD" | base64 | tr -d '\n')" auth_hdr="Authorization: Basic $AUTH" api() { # shellcheck disable=SC2068 curl -sfS -H "$auth_hdr" -H "Content-Type: application/json" "$@" } ensure_template() { template_name="$1" template_type="$2" # http or email title="$3" body="$4" is_default="$5" # true/false payload="$(jq -n \ --arg name "$template_name" \ --arg type "$template_type" \ --arg title "$title" \ --arg body "$body" \ --argjson isDefault "$is_default" \ '{name: $name, type: $type, title: $title, body: $body, isDefault: $isDefault}')" # Upsert: PUT works for existing, and also works as create in newer versions. if api -X PUT "$BASE_URL/api/$ORG_ID/alerts/templates/$template_name" -d "$payload" >/dev/null 2>&1; then echo "upserted template=$template_name" return 0 fi api -X POST "$BASE_URL/api/$ORG_ID/alerts/templates" -d "$payload" >/dev/null echo "created template=$template_name" } ensure_alert() { alert_name="$1" sql="$2" period_minutes="$3" frequency_minutes="$4" silence_minutes="$5" row_template="$6" existing_id="$( api "$BASE_URL/api/v2/$ORG_ID/alerts" \ | jq -r --arg n "$alert_name" '.list[] | select(.name == $n) | .alert_id' \ | head -n 1 )" payload="$(jq -n \ --arg name "$alert_name" \ --arg stream "$STREAM_NAME" \ --arg sql "$sql" \ --argjson period "$period_minutes" \ --argjson frequency "$frequency_minutes" \ --argjson silence "$silence_minutes" \ --arg row_template "$row_template" \ '{ name: $name, stream_type: "logs", stream_name: $stream, is_real_time: false, enabled: true, tz_offset: 330, destinations: ["nxtgauge_telegram"], row_template: $row_template, row_template_type: "String", query_condition: { type: "sql", sql: $sql }, trigger_condition: { period: $period, operator: ">=", threshold: 1, frequency: $frequency, frequency_type: "minutes", silence: $silence } }')" if [ -n "$existing_id" ] && [ "$existing_id" != "null" ]; then resp="$(api -X PUT "$BASE_URL/api/v2/$ORG_ID/alerts/$existing_id" -d "$payload")" || { echo "failed updating alert=$alert_name id=$existing_id" exit 1 } code="$(echo "$resp" | jq -r '.code // empty')" if [ "$code" != "200" ]; then echo "failed updating alert=$alert_name id=$existing_id resp=$resp" exit 1 fi echo "updated alert=$alert_name id=$existing_id" else resp="$(api -X POST "$BASE_URL/api/v2/$ORG_ID/alerts" -d "$payload")" || { echo "failed creating alert=$alert_name" exit 1 } code="$(echo "$resp" | jq -r '.code // empty')" if [ "$code" != "200" ]; then echo "failed creating alert=$alert_name resp=$resp" exit 1 fi echo "created alert=$alert_name" fi } # Telegram template includes useful debugging context + top rows. # Uses OpenObserve built-in variables: {alert_url}, {alert_count}, {rows:5}, etc. telegram_body="$(printf '{\"chat_id\":\"%s\",\"text\":\"ALERT {alert_name}\\\\norg={org_name} stream={stream_type}/{stream_name}\\\\ncount={alert_count} window={alert_start_time}..{alert_end_time}\\\\n\\\\n{rows:5}\\\\n\\\\nOpen: {alert_url}\"}' "$TELEGRAM_CHAT_ID")" ensure_template "telegram_nxtgauge" "http" "" "$telegram_body" true ensure_alert \ "k8s-image-pull-failures" \ "SELECT k8s_namespace_name, k8s_pod_name, body_object_reason, body_object_message FROM \"default\" WHERE body_object_message ILIKE '%ErrImagePull%' OR body_object_message ILIKE '%ImagePullBackOff%' OR body_object_message ILIKE '%Failed to pull image%' ORDER BY _timestamp DESC LIMIT 50" \ 5 1 30 \ "{k8s_namespace_name}/{k8s_pod_name} {body_object_reason}: {body_object_message}" ensure_alert \ "k8s-crashloopbackoff" \ "SELECT k8s_namespace_name, k8s_pod_name, body_object_reason, body_object_message FROM \"default\" WHERE body_object_message ILIKE '%CrashLoopBackOff%' OR body_object_message ILIKE '%Back-off restarting failed container%' ORDER BY _timestamp DESC LIMIT 50" \ 5 1 30 \ "{k8s_namespace_name}/{k8s_pod_name} {body_object_reason}: {body_object_message}" ensure_alert \ "k8s-volume-mount-failures" \ "SELECT k8s_namespace_name, k8s_pod_name, body_object_reason, body_object_message FROM \"default\" WHERE body_object_message ILIKE '%FailedMount%' OR body_object_message ILIKE '%FailedAttachVolume%' OR body_object_message ILIKE '%MountVolume%' ORDER BY _timestamp DESC LIMIT 50" \ 10 2 60 \ "{k8s_namespace_name}/{k8s_pod_name} {body_object_reason}: {body_object_message}" ensure_alert \ "argocd-errors" \ "SELECT k8s_pod_name, k8s_container_name, substring(body, 1, 220) AS msg FROM \"default\" WHERE k8s_namespace_name = 'argocd' AND (body ILIKE '%level=error%' OR body ILIKE '%ERROR%' OR body ILIKE '%ComparisonError%' OR body ILIKE '%SyncFailed%') ORDER BY _timestamp DESC LIMIT 50" \ 10 2 30 \ "argocd/{k8s_pod_name} {k8s_container_name}: {msg}" ensure_alert \ "woodpecker-errors" \ "SELECT k8s_pod_name, k8s_container_name, substring(body, 1, 220) AS msg FROM \"default\" WHERE k8s_namespace_name = 'woodpecker' AND (body ILIKE '%error%' OR body ILIKE '%ERROR%' OR body ILIKE '%failed%') ORDER BY _timestamp DESC LIMIT 50" \ 10 2 30 \ "woodpecker/{k8s_pod_name} {k8s_container_name}: {msg}" ensure_alert \ "registry-errors" \ "SELECT k8s_pod_name, k8s_container_name, substring(body, 1, 220) AS msg FROM \"default\" WHERE k8s_namespace_name = 'registry' AND (body ILIKE '%error%' OR body ILIKE '%ERROR%' OR body ILIKE '%413%' OR body ILIKE '%payload too large%') ORDER BY _timestamp DESC LIMIT 50" \ 10 2 60 \ "registry/{k8s_pod_name} {k8s_container_name}: {msg}"