/
Monitoring (Prometheus & Grafana)

Monitoring (Prometheus & Grafana)

UserWise natively supports Prometheus and Grafana for comprehensive system monitoring. Prometheus stats are provided by Discourse's prometheus_exporter, which offers a wide array of data points that can be tracked and visualized within Grafana.

The primary data points available in the context of UserWise are categorized as follows:

  • General: These metrics provide insights into overall system health, including memory usage, CPU utilization, and disk I/O.

  • Puma: These metrics track the performance of the Puma HTTP process, crucial for understanding request handling and response times.

  • Sidekiq: These metrics monitor the Sidekiq worker queue process, helping you track job processing efficiency and identify potential bottlenecks.

  • Redis: These metrics provide insights into the performance and health of your Redis instance, crucial for caching and job queue management.

  • PostgreSQL: These metrics track the performance of your PostgreSQL database, ensuring efficient data storage and retrieval.

Exposing Prometheus Exporter Metrics

The prometheus exporter is implemented on both Frontend and Backend pods. A NodePort service will need to be manually added for both the deployment/userwise-app-frontend and deployment/userwise-app-worker.

Frontend NodePort

The Frontend NodePort should look similar to the configuration below. Take care to notice the port 9394 which is the port the exporter is exposed on.

apiVersion: v1 kind: Service metadata: name: userwise-app-frontend-exporter labels: app: userwise tier: frontend type: exporter spec: selector: app: userwise tier: frontend ports: - name: exporter port: 9394 targetPort: 9394 type: NodePort

Worker NodePort

The Worker NodePort should look similar to the configuration below. Take care to notice the port 9395 which is the port the exporter is exposed on.

apiVersion: v1 kind: Service metadata: name: userwise-app-worker-exporter labels: app: userwise tier: frontend type: exporter spec: selector: app: userwise tier: worker ports: - name: exporter port: 9395 targetPort: 9395 type: NodePort

Grafana Dashboard

To get you started, UserWise provides a default Grafana dashboard that visualizes key system metrics. This dashboard serves as a valuable starting point for monitoring your UserWise instance and can be customized to meet your specific needs.

{ "annotations": { "list": [ { "builtIn": 1, "datasource": { "type": "grafana", "uid": "-- Grafana --" }, "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", "type": "dashboard" } ] }, "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, "id": 66, "links": [], "liveNow": false, "panels": [ { "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, "id": 8, "title": "Frontend (HTTP Handler & Dashboard)", "type": "row" }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "description": "The max requests that are waiting to be processed. Caused through thread saturation.\n\nA rise in this can indicate that there are not enough threads, processes, or pods available in your cluster.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unitScale": true }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 1 }, "id": 5, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "pluginVersion": "10.3.3", "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", "expr": "max(ruby_puma_request_backlog{service=\"userwise-frontend-exporter\"})", "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, "legendFormat": "Max Requests Backlogged (all pods)", "range": true, "refId": "A", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", "expr": "avg(ruby_puma_request_backlog{service=\"userwise-frontend-exporter\"})", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, "instant": false, "legendFormat": "Average Requests Backlogged (per pod)", "range": true, "refId": "B", "useBackend": false } ], "title": "HTTP Request Backlog Per Pod", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "description": "The average HTTP response time", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unitScale": true }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 1 }, "id": 1, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", "expr": "avg(ruby_http_request_duration_seconds)", "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, "legendFormat": "Avg HTTP Response Time", "range": true, "refId": "A", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", "expr": "max(ruby_http_request_duration_seconds)", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, "instant": false, "legendFormat": "Max HTTP Response Time", "range": true, "refId": "B", "useBackend": false } ], "title": "HTTP Response Times", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "description": "Describes the average health of the the connection pools, aggregated across all frontend processes", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unitScale": true }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 9 }, "id": 10, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", "expr": "avg(ruby_active_record_connection_pool_connections{service=\"userwise-frontend-exporter\"})", "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, "legendFormat": "Avg Active Connections", "range": true, "refId": "A", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", "expr": "avg(ruby_active_record_connection_pool_waiting{service=\"userwise-frontend-exporter\"})", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, "instant": false, "legendFormat": "Avg Waiting For Connection", "range": true, "refId": "B", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", "expr": "avg(ruby_active_record_connection_pool_size{service=\"userwise-frontend-exporter\"})", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, "instant": false, "legendFormat": "Avg Total Available", "range": true, "refId": "C", "useBackend": false } ], "title": "PSQL Connection Pool (Agg All Processes)", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "description": "The requests per minute, across all pods, processes & threads.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unitScale": true }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 9 }, "id": 12, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", "expr": "sum(increase(ruby_http_requests_total{service=\"userwise-frontend-exporter\"}[1m]))", "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, "legendFormat": "__auto", "range": true, "refId": "A", "useBackend": false } ], "title": "Requests per Minute", "type": "timeseries" }, { "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 17 }, "id": 7, "panels": [], "title": "Worker", "type": "row" }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "description": "Describes the average health of the the connection pools, aggregated across all worker processes", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unitScale": true }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 18 }, "id": 11, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", "expr": "avg(ruby_active_record_connection_pool_connections{service=\"userwise-worker\"})", "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, "legendFormat": "Avg Active Connections", "range": true, "refId": "A", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", "expr": "avg(ruby_active_record_connection_pool_waiting{service=\"userwise-worker\"})", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, "instant": false, "legendFormat": "Avg Waiting For Connection", "range": true, "refId": "B", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", "expr": "avg(ruby_active_record_connection_pool_size{service=\"userwise-worker\"})", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, "instant": false, "legendFormat": "Avg Total Available", "range": true, "refId": "C", "useBackend": false } ], "title": "PSQL Connection Pool (Agg All Processes)", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "description": "Queue backlog (enqueued, ready to be processed, but not processed yet).\n\nAn increasing queue can indicate insufficient worker pods or improper runtime configurations for your workload.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unitScale": true }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 18 }, "id": 13, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "pluginVersion": "10.3.3", "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", "expr": "avg by(queue) (ruby_sidekiq_queue_backlog{service=\"userwise-worker\"})", "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, "legendFormat": "__auto", "range": true, "refId": "A", "useBackend": false } ], "title": "Worker Queue Backlog", "type": "timeseries" }, { "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 26 }, "id": 6, "panels": [], "title": "Databases", "type": "row" }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "description": "The average response times for PSQL queries across all processes.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unitScale": true }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 27 }, "id": 3, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", "expr": "avg(ruby_http_request_sql_duration_seconds)", "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, "legendFormat": "__auto", "range": true, "refId": "A", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", "expr": "max(ruby_http_request_sql_duration_seconds)", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, "instant": false, "legendFormat": "__auto", "range": true, "refId": "B", "useBackend": false } ], "title": "PSQL Response Durations", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "description": "The average response times for Redis commands across all processes.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unitScale": true }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 27 }, "id": 2, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", "expr": "avg(ruby_http_request_redis_duration_seconds)", "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, "legendFormat": "__auto", "range": true, "refId": "A", "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", "expr": "max(ruby_http_request_redis_duration_seconds)", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, "instant": false, "legendFormat": "__auto", "range": true, "refId": "B", "useBackend": false } ], "title": "Redis Response Durations", "type": "timeseries" } ], "refresh": false, "schemaVersion": 39, "tags": [], "templating": { "list": [] }, "time": { "from": "now-1h", "to": "now" }, "timepicker": {}, "timezone": "", "title": "Responses", "uid": "aa9aef85-b303-4153-9a40-825647fa071f", "version": 17, "weekStart": "" }

Related content

Powered by UserWise