Treat monitoring API response of `-1` as an error
mrjones-plip opened this issue · comments
Right now if we API will return -1 for a bunch of metrics when errors are thrown by the CouchDb queries to retrieve required values. However, when this happened for a production instance, date.uptime
and date.current
were correctly incrementing, so Watchdog didn't think anything was wrong.
Instead, we should fire an alert to indicate that something is wrong.
In the outage itself, the monitoring API returned this:
{
"version": {
"app": "",
"node": "v16.20.0",
"couchdb": ""
},
"couchdb": {
"medic": {
"name": "",
"update_sequence": -1,
"doc_count": -1,
"doc_del_count": -1,
"fragmentation": -1
},
"sentinel": {
"name": "",
"update_sequence": -1,
"doc_count": -1,
"doc_del_count": -1,
"fragmentation": -1
},
"usersmeta": {
"name": "",
"update_sequence": -1,
"doc_count": -1,
"doc_del_count": -1,
"fragmentation": -1
},
"users": {
"name": "",
"update_sequence": -1,
"doc_count": -1,
"doc_del_count": -1,
"fragmentation": -1
}
},
"date": {
"current": 1698210046488,
"uptime": 967259.799207221
},
"sentinel": {
"backlog": -1
},
"messaging": {
"outgoing": {
"total": {
"due": -1,
"scheduled": -1,
"muted": -1,
"failed": -1,
"delivered": -1
},
"seven_days": {
"due": -1,
"scheduled": -1,
"muted": -1,
"failed": -1,
"delivered": -1
},
"last_hundred": {
"pending": {
"pending": -1,
"forwarded-to-gateway": -1,
"received-by-gateway": -1,
"forwarded-by-gateway": -1
},
"final": {
"sent": -1,
"delivered": -1,
"failed": -1
},
"muted": {
"denied": -1,
"cleared": -1,
"muted": -1,
"duplicate": -1
}
}
}
},
"outbound_push": {
"backlog": -1
},
"feedback": {
"count": -1
},
"conflict": {
"count": -1
},
"replication_limit": {
"count": -1
},
"connected_users": {
"count": -1
}
}
which in turn looked like 0
value for everything instead of -1