Monitoring Guide
Overview
This guide covers the monitoring and observability setup for the EyeNet system.
Metrics Collection
System Metrics
interface SystemMetrics {
cpu: {
usage: number;
temperature: number;
};
memory: {
total: number;
used: number;
free: number;
};
disk: {
total: number;
used: number;
free: number;
};
}
Network Metrics
interface NetworkMetrics {
bandwidth: {
inbound: number;
outbound: number;
};
latency: number;
packetLoss: number;
connections: number;
}
Monitoring Stack
Prometheus Setup
# prometheus.yml
global:
scrape_interval: 15s
evaluation_interval: 15s
scrape_configs:
- job_name: 'eyenet'
static_configs:
- targets: ['localhost:3000']
Grafana Dashboards
{
"dashboard": {
"id": null,
"title": "EyeNet Overview",
"panels": [
{
"title": "CPU Usage",
"type": "graph",
"datasource": "Prometheus",
"targets": [
{
"expr": "system_cpu_usage"
}
]
}
]
}
}
Logging
Log Levels
enum LogLevel {
ERROR = 'error',
WARN = 'warn',
INFO = 'info',
DEBUG = 'debug'
}
Log Format
interface LogEntry {
timestamp: string;
level: LogLevel;
message: string;
context: {
service: string;
operation: string;
[key: string]: any;
};
}
ELK Stack Configuration
# logstash.conf
input {
beats {
port => 5044
}
}
filter {
json {
source => "message"
}
}
output {
elasticsearch {
hosts => ["localhost:9200"]
index => "eyenet-logs-%{+YYYY.MM.dd}"
}
}
Alerting
Alert Rules
# alertmanager.yml
groups:
- name: eyenet
rules:
- alert: HighCPUUsage
expr: system_cpu_usage > 80
for: 5m
labels:
severity: warning
annotations:
summary: High CPU usage detected
Alert Channels
interface AlertChannel {
type: 'email' | 'slack' | 'webhook';
config: {
recipients?: string[];
webhook_url?: string;
channel?: string;
};
}
Health Checks
Endpoint Configuration
interface HealthCheck {
name: string;
endpoint: string;
interval: number;
timeout: number;
healthyThreshold: number;
unhealthyThreshold: number;
}
const healthChecks: HealthCheck[] = [
{
name: 'api',
endpoint: '/health',
interval: 30,
timeout: 5,
healthyThreshold: 2,
unhealthyThreshold: 3
}
];
Health Status
interface HealthStatus {
status: 'healthy' | 'unhealthy';
checks: {
[service: string]: {
status: 'up' | 'down';
latency: number;
lastCheck: Date;
};
};
}
Tracing
OpenTelemetry Configuration
import { NodeTracerProvider } from '@opentelemetry/node';
import { SimpleSpanProcessor } from '@opentelemetry/tracing';
import { JaegerExporter } from '@opentelemetry/exporter-jaeger';
const provider = new NodeTracerProvider();
const exporter = new JaegerExporter({
endpoint: 'http://localhost:14268/api/traces'
});
provider.addSpanProcessor(new SimpleSpanProcessor(exporter));
provider.register();
Performance Monitoring
Metrics to Monitor
-
Application Metrics
- Response times
- Error rates
- Request rates
- Active users
-
Infrastructure Metrics
- CPU usage
- Memory usage
- Disk I/O
- Network traffic
-
Business Metrics
- Active devices
- Alert frequency
- System uptime
- User activity
Performance Thresholds
const performanceThresholds = {
api: {
responseTime: 500, // ms
errorRate: 0.01, // 1%
availability: 0.99 // 99%
},
database: {
queryTime: 100, // ms
connections: 100,
utilization: 0.8 // 80%
},
cache: {
hitRate: 0.8, // 80%
evictionRate: 0.1 // 10%
}
};
Visualization
Grafana Dashboard Examples
-
System Overview
- CPU, Memory, Disk usage
- Network traffic
- Error rates
-
Network Devices
- Device status
- Connection quality
- Bandwidth usage
-
Application Performance
- Response times
- Error rates
- Active sessions
Backup Monitoring
Backup Status
interface BackupStatus {
lastBackup: Date;
status: 'success' | 'failed';
size: number;
duration: number;
location: string;
}
Backup Monitoring
const backupMonitoring = {
checks: [
{
type: 'database',
schedule: '0 0 * * *',
retention: 7
},
{
type: 'configuration',
schedule: '0 0 * * 0',
retention: 30
}
]
};
Security Monitoring
Security Metrics
interface SecurityMetrics {
failedLogins: number;
unauthorizedAccess: number;
suspiciousActivities: number;
vulnerabilities: {
high: number;
medium: number;
low: number;
};
}
Security Alerts
const securityAlerts = {
failedLogins: {
threshold: 5,
window: '5m',
action: 'notify'
},
unauthorizedAccess: {
threshold: 1,
window: '1m',
action: 'block'
}
};