Monitoring (Prometheus & Grafana)
UserWise natively supports Prometheus and Grafana for comprehensive system monitoring. Prometheus stats are provided by Discourse's prometheus_exporter, which offers a wide array of data points that can be tracked and visualized within Grafana.
The primary data points available in the context of UserWise are categorized as follows:
General: These metrics provide insights into overall system health, including memory usage, CPU utilization, and disk I/O.
Puma: These metrics track the performance of the Puma HTTP process, crucial for understanding request handling and response times.
Sidekiq: These metrics monitor the Sidekiq worker queue process, helping you track job processing efficiency and identify potential bottlenecks.
Redis: These metrics provide insights into the performance and health of your Redis instance, crucial for caching and job queue management.
PostgreSQL: These metrics track the performance of your PostgreSQL database, ensuring efficient data storage and retrieval.
Exposing Prometheus Exporter Metrics
The prometheus exporter is implemented on both Frontend and Backend pods. A NodePort service will need to be manually added for both the deployment/userwise-app-frontend
and deployment/userwise-app-worker
.
Frontend NodePort
The Frontend NodePort should look similar to the configuration below. Take care to notice the port 9394
which is the port the exporter is exposed on.
apiVersion: v1
kind: Service
metadata:
name: userwise-app-frontend-exporter
labels:
app: userwise
tier: frontend
type: exporter
spec:
selector:
app: userwise
tier: frontend
ports:
- name: exporter
port: 9394
targetPort: 9394
type: NodePort
Worker NodePort
The Worker NodePort should look similar to the configuration below. Take care to notice the port 9395
which is the port the exporter is exposed on.
apiVersion: v1
kind: Service
metadata:
name: userwise-app-worker-exporter
labels:
app: userwise
tier: frontend
type: exporter
spec:
selector:
app: userwise
tier: worker
ports:
- name: exporter
port: 9395
targetPort: 9395
type: NodePort
Grafana Dashboard
To get you started, UserWise provides a default Grafana dashboard that visualizes key system metrics. This dashboard serves as a valuable starting point for monitoring your UserWise instance and can be customized to meet your specific needs.
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 66,
"links": [],
"liveNow": false,
"panels": [
{
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 8,
"title": "Frontend (HTTP Handler & Dashboard)",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"description": "The max requests that are waiting to be processed. Caused through thread saturation.\n\nA rise in this can indicate that there are not enough threads, processes, or pods available in your cluster.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unitScale": true
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 1
},
"id": 5,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "10.3.3",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "max(ruby_puma_request_backlog{service=\"userwise-frontend-exporter\"})",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "Max Requests Backlogged (all pods)",
"range": true,
"refId": "A",
"useBackend": false
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "avg(ruby_puma_request_backlog{service=\"userwise-frontend-exporter\"})",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "Average Requests Backlogged (per pod)",
"range": true,
"refId": "B",
"useBackend": false
}
],
"title": "HTTP Request Backlog Per Pod",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"description": "The average HTTP response time",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unitScale": true
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 1
},
"id": 1,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "avg(ruby_http_request_duration_seconds)",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "Avg HTTP Response Time",
"range": true,
"refId": "A",
"useBackend": false
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "max(ruby_http_request_duration_seconds)",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "Max HTTP Response Time",
"range": true,
"refId": "B",
"useBackend": false
}
],
"title": "HTTP Response Times",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"description": "Describes the average health of the the connection pools, aggregated across all frontend processes",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unitScale": true
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 9
},
"id": 10,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "avg(ruby_active_record_connection_pool_connections{service=\"userwise-frontend-exporter\"})",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "Avg Active Connections",
"range": true,
"refId": "A",
"useBackend": false
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "avg(ruby_active_record_connection_pool_waiting{service=\"userwise-frontend-exporter\"})",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "Avg Waiting For Connection",
"range": true,
"refId": "B",
"useBackend": false
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "avg(ruby_active_record_connection_pool_size{service=\"userwise-frontend-exporter\"})",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "Avg Total Available",
"range": true,
"refId": "C",
"useBackend": false
}
],
"title": "PSQL Connection Pool (Agg All Processes)",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"description": "The requests per minute, across all pods, processes & threads.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unitScale": true
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 9
},
"id": 12,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "sum(increase(ruby_http_requests_total{service=\"userwise-frontend-exporter\"}[1m]))",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Requests per Minute",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 17
},
"id": 7,
"panels": [],
"title": "Worker",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"description": "Describes the average health of the the connection pools, aggregated across all worker processes",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unitScale": true
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 18
},
"id": 11,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "avg(ruby_active_record_connection_pool_connections{service=\"userwise-worker\"})",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "Avg Active Connections",
"range": true,
"refId": "A",
"useBackend": false
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "avg(ruby_active_record_connection_pool_waiting{service=\"userwise-worker\"})",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "Avg Waiting For Connection",
"range": true,
"refId": "B",
"useBackend": false
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "avg(ruby_active_record_connection_pool_size{service=\"userwise-worker\"})",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "Avg Total Available",
"range": true,
"refId": "C",
"useBackend": false
}
],
"title": "PSQL Connection Pool (Agg All Processes)",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"description": "Queue backlog (enqueued, ready to be processed, but not processed yet).\n\nAn increasing queue can indicate insufficient worker pods or improper runtime configurations for your workload.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unitScale": true
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 18
},
"id": 13,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "10.3.3",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "avg by(queue) (ruby_sidekiq_queue_backlog{service=\"userwise-worker\"})",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Worker Queue Backlog",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 26
},
"id": 6,
"panels": [],
"title": "Databases",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"description": "The average response times for PSQL queries across all processes.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unitScale": true
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 27
},
"id": 3,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "avg(ruby_http_request_sql_duration_seconds)",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "max(ruby_http_request_sql_duration_seconds)",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "B",
"useBackend": false
}
],
"title": "PSQL Response Durations",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"description": "The average response times for Redis commands across all processes.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unitScale": true
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 27
},
"id": 2,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "avg(ruby_http_request_redis_duration_seconds)",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "max(ruby_http_request_redis_duration_seconds)",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "B",
"useBackend": false
}
],
"title": "Redis Response Durations",
"type": "timeseries"
}
],
"refresh": false,
"schemaVersion": 39,
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "Responses",
"uid": "aa9aef85-b303-4153-9a40-825647fa071f",
"version": 17,
"weekStart": ""
}
Related content
Powered by UserWise