From 5a913dcac180dfd9e79a448aed9426fdd102ddca Mon Sep 17 00:00:00 2001 From: consultoria-as Date: Wed, 29 Apr 2026 07:10:01 +0000 Subject: [PATCH] feat(monitoring): add Grafana dashboards for PostgreSQL, Redis, System, App - nexus-postgresql.json: connections, transactions, cache hit, WAL, slow queries, table bloat - nexus-redis.json: memory, commands/sec, clients, cache hit, keyspace hits/misses, evicted keys - nexus-system.json: CPU, memory, disk, network, load average - nexus-gunicorn.json: request rate, response time, workers, 5xx errors, memory per worker - dashboards.yml: auto-provisioning config --- .../provisioning/dashboards/dashboards.yml | 11 ++ .../dashboards/nexus-gunicorn.json | 149 ++++++++++++++ .../dashboards/nexus-postgresql.json | 185 ++++++++++++++++++ .../provisioning/dashboards/nexus-redis.json | 173 ++++++++++++++++ .../provisioning/dashboards/nexus-system.json | 164 ++++++++++++++++ 5 files changed, 682 insertions(+) create mode 100644 docker/grafana/provisioning/dashboards/dashboards.yml create mode 100644 docker/grafana/provisioning/dashboards/nexus-gunicorn.json create mode 100644 docker/grafana/provisioning/dashboards/nexus-postgresql.json create mode 100644 docker/grafana/provisioning/dashboards/nexus-redis.json create mode 100644 docker/grafana/provisioning/dashboards/nexus-system.json diff --git a/docker/grafana/provisioning/dashboards/dashboards.yml b/docker/grafana/provisioning/dashboards/dashboards.yml new file mode 100644 index 0000000..281cf86 --- /dev/null +++ b/docker/grafana/provisioning/dashboards/dashboards.yml @@ -0,0 +1,11 @@ +apiVersion: 1 +providers: + - name: 'nexus-dashboards' + orgId: 1 + folder: 'Nexus' + type: file + disableDeletion: false + updateIntervalSeconds: 10 + allowUiUpdates: true + options: + path: /etc/grafana/provisioning/dashboards diff --git a/docker/grafana/provisioning/dashboards/nexus-gunicorn.json b/docker/grafana/provisioning/dashboards/nexus-gunicorn.json new file mode 100644 index 0000000..5de73b5 --- /dev/null +++ b/docker/grafana/provisioning/dashboards/nexus-gunicorn.json @@ -0,0 +1,149 @@ +{ + "uid": "nexus-app", + "title": "Nexus — Application", + "tags": ["gunicorn", "flask"], + "timezone": "browser", + "schemaVersion": 36, + "refresh": "30s", + "time": { + "from": "now-1h", + "to": "now" + }, + "templating": { + "list": [ + { + "name": "datasource", + "type": "datasource", + "query": "prometheus", + "current": { + "selected": false, + "text": "Prometheus", + "value": "Prometheus" + } + } + ] + }, + "panels": [ + { + "id": 1, + "title": "Request Rate (nginx)", + "type": "timeseries", + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 0}, + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "targets": [ + { + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "expr": "rate(nginx_http_requests_total[5m])", + "legendFormat": "Requests/sec", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "custom": {"drawStyle": "line", "lineWidth": 2, "fillOpacity": 10}, + "unit": "reqps", + "min": 0 + }, + "overrides": [] + } + }, + { + "id": 2, + "title": "Response Time (nginx)", + "type": "timeseries", + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 0}, + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "targets": [ + { + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "expr": "histogram_quantile(0.95, sum(rate(nginx_http_request_duration_seconds_bucket[5m])) by (le))", + "legendFormat": "p95", + "refId": "A" + }, + { + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "expr": "histogram_quantile(0.99, sum(rate(nginx_http_request_duration_seconds_bucket[5m])) by (le))", + "legendFormat": "p99", + "refId": "B" + } + ], + "fieldConfig": { + "defaults": { + "custom": {"drawStyle": "line", "lineWidth": 2, "fillOpacity": 10}, + "unit": "s", + "min": 0 + }, + "overrides": [] + } + }, + { + "id": 3, + "title": "Active Workers", + "type": "timeseries", + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 8}, + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "targets": [ + { + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "expr": "count by (instance) (node_processes_state{state=\"S\", cmdline=~\".*gunicorn.*\"})", + "legendFormat": "Workers {{instance}}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "custom": {"drawStyle": "line", "lineWidth": 2, "fillOpacity": 10}, + "unit": "short", + "min": 0 + }, + "overrides": [] + } + }, + { + "id": 4, + "title": "5xx Errors", + "type": "timeseries", + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 8}, + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "targets": [ + { + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "expr": "rate(nginx_http_requests_total{status=~\"5..\"}[5m])", + "legendFormat": "5xx/sec", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "custom": {"drawStyle": "line", "lineWidth": 2, "fillOpacity": 10}, + "unit": "reqps", + "min": 0 + }, + "overrides": [] + } + }, + { + "id": 5, + "title": "Memory per Worker", + "type": "timeseries", + "gridPos": {"h": 8, "w": 24, "x": 0, "y": 16}, + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "targets": [ + { + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "expr": "process_resident_memory_bytes{cmdline=~\".*gunicorn.*\"} / 1024 / 1024", + "legendFormat": "{{cmdline}}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "custom": {"drawStyle": "line", "lineWidth": 2, "fillOpacity": 10}, + "unit": "mbytes", + "min": 0 + }, + "overrides": [] + } + } + ] +} diff --git a/docker/grafana/provisioning/dashboards/nexus-postgresql.json b/docker/grafana/provisioning/dashboards/nexus-postgresql.json new file mode 100644 index 0000000..46cf3e8 --- /dev/null +++ b/docker/grafana/provisioning/dashboards/nexus-postgresql.json @@ -0,0 +1,185 @@ +{ + "uid": "nexus-postgresql", + "title": "Nexus — PostgreSQL", + "tags": ["postgres", "database"], + "timezone": "browser", + "schemaVersion": 36, + "refresh": "30s", + "time": { + "from": "now-1h", + "to": "now" + }, + "templating": { + "list": [ + { + "name": "datasource", + "type": "datasource", + "query": "prometheus", + "current": { + "selected": false, + "text": "Prometheus", + "value": "Prometheus" + } + } + ] + }, + "panels": [ + { + "id": 1, + "title": "Active Connections", + "type": "timeseries", + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 0}, + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "targets": [ + { + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "expr": "pg_stat_activity_count", + "legendFormat": "Connections", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "custom": {"drawStyle": "line", "lineWidth": 2, "fillOpacity": 10}, + "unit": "short", + "min": 0 + }, + "overrides": [] + } + }, + { + "id": 2, + "title": "Transactions / sec", + "type": "timeseries", + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 0}, + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "targets": [ + { + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "expr": "rate(pg_stat_database_xact_commit[5m])", + "legendFormat": "Commits", + "refId": "A" + }, + { + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "expr": "rate(pg_stat_database_xact_rollback[5m])", + "legendFormat": "Rollbacks", + "refId": "B" + } + ], + "fieldConfig": { + "defaults": { + "custom": {"drawStyle": "line", "lineWidth": 2, "fillOpacity": 10}, + "unit": "tps", + "min": 0 + }, + "overrides": [] + } + }, + { + "id": 3, + "title": "Cache Hit Ratio", + "type": "timeseries", + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 8}, + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "targets": [ + { + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "expr": "pg_stat_database_blks_hit / (pg_stat_database_blks_hit + pg_stat_database_blks_read)", + "legendFormat": "Hit Ratio", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "custom": {"drawStyle": "line", "lineWidth": 2, "fillOpacity": 10}, + "unit": "percentunit", + "min": 0, + "max": 1 + }, + "overrides": [] + } + }, + { + "id": 4, + "title": "WAL Generation", + "type": "timeseries", + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 8}, + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "targets": [ + { + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "expr": "rate(pg_stat_bgwriter_buffers_backend_fsync[5m])", + "legendFormat": "Backend fsync", + "refId": "A" + }, + { + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "expr": "rate(pg_stat_bgwriter_buffers_backend[5m])", + "legendFormat": "Backend buffers", + "refId": "B" + } + ], + "fieldConfig": { + "defaults": { + "custom": {"drawStyle": "line", "lineWidth": 2, "fillOpacity": 10}, + "unit": "ops", + "min": 0 + }, + "overrides": [] + } + }, + { + "id": 5, + "title": "Slow Queries", + "type": "timeseries", + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 16}, + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "targets": [ + { + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "expr": "pg_stat_activity_count{state=\"active\"}", + "legendFormat": "Active queries", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "custom": {"drawStyle": "line", "lineWidth": 2, "fillOpacity": 10}, + "unit": "short", + "min": 0 + }, + "overrides": [] + } + }, + { + "id": 6, + "title": "Table Bloat", + "type": "timeseries", + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 16}, + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "targets": [ + { + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "expr": "pg_stat_user_tables_n_live_tup", + "legendFormat": "Live tuples", + "refId": "A" + }, + { + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "expr": "pg_stat_user_tables_n_dead_tup", + "legendFormat": "Dead tuples", + "refId": "B" + } + ], + "fieldConfig": { + "defaults": { + "custom": {"drawStyle": "line", "lineWidth": 2, "fillOpacity": 10}, + "unit": "short", + "min": 0 + }, + "overrides": [] + } + } + ] +} diff --git a/docker/grafana/provisioning/dashboards/nexus-redis.json b/docker/grafana/provisioning/dashboards/nexus-redis.json new file mode 100644 index 0000000..e72fc69 --- /dev/null +++ b/docker/grafana/provisioning/dashboards/nexus-redis.json @@ -0,0 +1,173 @@ +{ + "uid": "nexus-redis", + "title": "Nexus — Redis", + "tags": ["redis", "cache"], + "timezone": "browser", + "schemaVersion": 36, + "refresh": "30s", + "time": { + "from": "now-1h", + "to": "now" + }, + "templating": { + "list": [ + { + "name": "datasource", + "type": "datasource", + "query": "prometheus", + "current": { + "selected": false, + "text": "Prometheus", + "value": "Prometheus" + } + } + ] + }, + "panels": [ + { + "id": 1, + "title": "Memory Usage", + "type": "timeseries", + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 0}, + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "targets": [ + { + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "expr": "redis_memory_used_bytes", + "legendFormat": "Used memory", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "custom": {"drawStyle": "line", "lineWidth": 2, "fillOpacity": 10}, + "unit": "bytes", + "min": 0 + }, + "overrides": [] + } + }, + { + "id": 2, + "title": "Commands / sec", + "type": "timeseries", + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 0}, + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "targets": [ + { + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "expr": "rate(redis_commands_processed_total[5m])", + "legendFormat": "Commands/sec", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "custom": {"drawStyle": "line", "lineWidth": 2, "fillOpacity": 10}, + "unit": "cps", + "min": 0 + }, + "overrides": [] + } + }, + { + "id": 3, + "title": "Connected Clients", + "type": "timeseries", + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 8}, + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "targets": [ + { + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "expr": "redis_connected_clients", + "legendFormat": "Clients", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "custom": {"drawStyle": "line", "lineWidth": 2, "fillOpacity": 10}, + "unit": "short", + "min": 0 + }, + "overrides": [] + } + }, + { + "id": 4, + "title": "Cache Hit Ratio", + "type": "timeseries", + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 8}, + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "targets": [ + { + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "expr": "redis_keyspace_hits_total / (redis_keyspace_hits_total + redis_keyspace_misses_total)", + "legendFormat": "Hit Ratio", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "custom": {"drawStyle": "line", "lineWidth": 2, "fillOpacity": 10}, + "unit": "percentunit", + "min": 0, + "max": 1 + }, + "overrides": [] + } + }, + { + "id": 5, + "title": "Keyspace Hits / Misses", + "type": "timeseries", + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 16}, + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "targets": [ + { + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "expr": "rate(redis_keyspace_hits_total[5m])", + "legendFormat": "Hits/sec", + "refId": "A" + }, + { + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "expr": "rate(redis_keyspace_misses_total[5m])", + "legendFormat": "Misses/sec", + "refId": "B" + } + ], + "fieldConfig": { + "defaults": { + "custom": {"drawStyle": "line", "lineWidth": 2, "fillOpacity": 10}, + "unit": "cps", + "min": 0 + }, + "overrides": [] + } + }, + { + "id": 6, + "title": "Evicted Keys", + "type": "timeseries", + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 16}, + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "targets": [ + { + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "expr": "rate(redis_evicted_keys_total[5m])", + "legendFormat": "Evicted/sec", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "custom": {"drawStyle": "line", "lineWidth": 2, "fillOpacity": 10}, + "unit": "cps", + "min": 0 + }, + "overrides": [] + } + } + ] +} diff --git a/docker/grafana/provisioning/dashboards/nexus-system.json b/docker/grafana/provisioning/dashboards/nexus-system.json new file mode 100644 index 0000000..b62d6c7 --- /dev/null +++ b/docker/grafana/provisioning/dashboards/nexus-system.json @@ -0,0 +1,164 @@ +{ + "uid": "nexus-system", + "title": "Nexus — System", + "tags": ["node", "system"], + "timezone": "browser", + "schemaVersion": 36, + "refresh": "30s", + "time": { + "from": "now-1h", + "to": "now" + }, + "templating": { + "list": [ + { + "name": "datasource", + "type": "datasource", + "query": "prometheus", + "current": { + "selected": false, + "text": "Prometheus", + "value": "Prometheus" + } + } + ] + }, + "panels": [ + { + "id": 1, + "title": "CPU Usage %", + "type": "timeseries", + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 0}, + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "targets": [ + { + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "expr": "100 - (avg by(instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100)", + "legendFormat": "CPU %", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "custom": {"drawStyle": "line", "lineWidth": 2, "fillOpacity": 10}, + "unit": "percent", + "min": 0, + "max": 100 + }, + "overrides": [] + } + }, + { + "id": 2, + "title": "Memory Usage %", + "type": "timeseries", + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 0}, + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "targets": [ + { + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "expr": "100 * (1 - node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)", + "legendFormat": "Memory %", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "custom": {"drawStyle": "line", "lineWidth": 2, "fillOpacity": 10}, + "unit": "percent", + "min": 0, + "max": 100 + }, + "overrides": [] + } + }, + { + "id": 3, + "title": "Disk Usage %", + "type": "timeseries", + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 8}, + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "targets": [ + { + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "expr": "100 * (1 - node_filesystem_avail_bytes{fstype!~\"tmpfs|ramfs\"} / node_filesystem_size_bytes{fstype!~\"tmpfs|ramfs\"})", + "legendFormat": "{{mountpoint}}", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "custom": {"drawStyle": "line", "lineWidth": 2, "fillOpacity": 10}, + "unit": "percent", + "min": 0, + "max": 100 + }, + "overrides": [] + } + }, + { + "id": 4, + "title": "Network I/O", + "type": "timeseries", + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 8}, + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "targets": [ + { + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "expr": "rate(node_network_receive_bytes_total[5m])", + "legendFormat": "Receive {{device}}", + "refId": "A" + }, + { + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "expr": "rate(node_network_transmit_bytes_total[5m])", + "legendFormat": "Transmit {{device}}", + "refId": "B" + } + ], + "fieldConfig": { + "defaults": { + "custom": {"drawStyle": "line", "lineWidth": 2, "fillOpacity": 10}, + "unit": "Bps", + "min": 0 + }, + "overrides": [] + } + }, + { + "id": 5, + "title": "Load Average", + "type": "timeseries", + "gridPos": {"h": 8, "w": 24, "x": 0, "y": 16}, + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "targets": [ + { + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "expr": "node_load1", + "legendFormat": "1m load", + "refId": "A" + }, + { + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "expr": "node_load5", + "legendFormat": "5m load", + "refId": "B" + }, + { + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "expr": "node_load15", + "legendFormat": "15m load", + "refId": "C" + } + ], + "fieldConfig": { + "defaults": { + "custom": {"drawStyle": "line", "lineWidth": 2, "fillOpacity": 10}, + "unit": "short", + "min": 0 + }, + "overrides": [] + } + } + ] +}