fix(observability): use internal platform endpoints and reduce k8s pod false positives

This commit is contained in:
Ashwin Kumar Sivakumar 2026-04-13 23:22:02 +05:30
parent 8844a0481d
commit fa005ba881

View file

@ -115,7 +115,7 @@ spec:
reason = waiting.get("reason", reason)
message = waiting.get("message", message)
if phase != "Running" or crash:
if phase in ("Pending", "Failed", "Unknown") or crash:
pod_issues += 1
records.append(
{
@ -130,12 +130,12 @@ spec:
)
endpoints = [
("frontend", "https://test121.nxtgauge.com/"),
("admin", "https://admin.nxtgauge.com/"),
("api-health", "https://api.nxtgauge.com/health"),
("woodpecker", "https://ci.nxtgauge.com/"),
("argocd-server", "http://argocd-server.argocd.svc.cluster.local/healthz"),
("openobserve", "https://logs.nxtgauge.com/"),
("frontend-svc", "http://nxtgauge-frontend-solid.nxtgauge.svc.cluster.local/"),
("admin-svc", "http://nxtgauge-admin-solid.nxtgauge.svc.cluster.local/"),
("api-gateway-svc", "http://nxtgauge-rust-gateway.nxtgauge.svc.cluster.local:9100/health"),
("woodpecker-svc", "http://woodpecker-server.woodpecker.svc.cluster.local/"),
("argocd-metrics", "http://argocd-server-metrics.argocd.svc.cluster.local:8083/metrics"),
("openobserve-svc", "http://o2-openobserve-standalone.openobserve.svc.cluster.local:5080/healthz"),
]
for name, url in endpoints:
rec = check_url(name, url)