Time Series Datenbanken
Time-Series Datenbanken
Time-Series Datenbanken sind optimiert für zeitgestempelte Daten. Lernen Sie InfluxDB, TimescaleDB und Prometheus für Metriken, IoT und Monitoring einzusetzen.
Was sind Time-Series Daten?
┌─────────────────────────────────────────────────────────────┐ │ TIME-SERIES DATA │ ├─────────────────────────────────────────────────────────────┤ │ │ │ Daten mit Zeitstempel, die sich kontinuierlich anhäufen │ │ │ │ ┌─────────────────────────────────────────────────────┐ │ │ │ timestamp │ metric │ value │ tags │ │ │ │─────────────────────┼───────────┼────────┼─────────│ │ │ │ 2024-01-15 10:00:00 │ cpu_usage │ 45.2 │ host=a │ │ │ │ 2024-01-15 10:00:01 │ cpu_usage │ 47.8 │ host=a │ │ │ │ 2024-01-15 10:00:02 │ cpu_usage │ 43.1 │ host=a │ │ │ │ 2024-01-15 10:00:00 │ cpu_usage │ 32.5 │ host=b │ │ │ │ ... │ ... │ ... │ ... │ │ │ └─────────────────────────────────────────────────────┘ │ │ │ │ CHARAKTERISTIKEN: │ │ • Zeitstempel ist Primary Key │ │ • Daten werden selten aktualisiert (append-only) │ │ • Hohe Schreibrate │ │ • Zeitbasierte Abfragen (letzte Stunde, Tag, etc.) │ │ • Aggregationen (AVG, MAX, MIN pro Zeitfenster) │ │ │ │ USE CASES: │ │ • Server/Application Monitoring │ │ • IoT Sensordaten │ │ • Finanzielle Zeitreihen (Aktienkurse) │ │ • User Analytics / Events │ │ • Log-Aggregation │ │ │ └─────────────────────────────────────────────────────────────┘
InfluxDB
# Docker docker run -d -p 8086:8086 \ -v influxdb-data:/var/lib/influxdb2 \ influxdb:2.7 # InfluxDB CLI influx setup \ --username admin \ --password password123 \ --org myorg \ --bucket metrics \ --force # Daten schreiben (Line Protocol) # measurement,tag1=value1,tag2=value2 field1=value1,field2=value2 timestamp influx write \ --bucket metrics \ --org myorg \ 'cpu,host=server01,region=eu usage=45.2,system=12.3 1705312800000000000'
// Flux Query Language // Daten der letzten Stunde from(bucket: "metrics") |> range(start: -1h) |> filter(fn: (r) => r._measurement == "cpu") |> filter(fn: (r) => r.host == "server01") // Durchschnitt pro 5 Minuten from(bucket: "metrics") |> range(start: -24h) |> filter(fn: (r) => r._measurement == "cpu") |> filter(fn: (r) => r._field == "usage") |> aggregateWindow(every: 5m, fn: mean) // Gruppiert nach Host from(bucket: "metrics") |> range(start: -1h) |> filter(fn: (r) => r._measurement == "cpu") |> group(columns: ["host"]) |> mean() // Downsampling für langfristige Speicherung from(bucket: "metrics") |> range(start: -30d) |> filter(fn: (r) => r._measurement == "cpu") |> aggregateWindow(every: 1h, fn: mean) |> to(bucket: "metrics_monthly", org: "myorg")
// Node.js Client
const { InfluxDB, Point } = require('@influxdata/influxdb-client');
const client = new InfluxDB({
url: 'http://localhost:8086',
token: 'your-token'
});
// Schreiben
const writeApi = client.getWriteApi('myorg', 'metrics');
const point = new Point('cpu')
.tag('host', 'server01')
.tag('region', 'eu')
.floatField('usage', 45.2)
.floatField('system', 12.3);
writeApi.writePoint(point);
await writeApi.close();
// Lesen
const queryApi = client.getQueryApi('myorg');
const query = `
from(bucket: "metrics")
|> range(start: -1h)
|> filter(fn: (r) => r._measurement == "cpu")
`;
for await (const { values, tableMeta } of queryApi.iterateRows(query)) {
const row = tableMeta.toObject(values);
console.log(row);
}
TimescaleDB (PostgreSQL Extension)
-- Docker
-- docker run -d -p 5432:5432 -e POSTGRES_PASSWORD=password timescale/timescaledb:latest-pg15
-- Extension aktivieren
CREATE EXTENSION IF NOT EXISTS timescaledb;
-- Normale Tabelle erstellen
CREATE TABLE metrics (
time TIMESTAMPTZ NOT NULL,
host TEXT NOT NULL,
cpu_usage DOUBLE PRECISION,
memory_usage DOUBLE PRECISION
);
-- In Hypertable konvertieren (automatische Partitionierung)
SELECT create_hypertable('metrics', 'time');
-- Optional: Chunk-Intervall setzen (Default: 7 Tage)
SELECT set_chunk_time_interval('metrics', INTERVAL '1 day');
-- Index für häufige Queries
CREATE INDEX ON metrics (host, time DESC);
-- Daten einfügen (wie normales SQL)
INSERT INTO metrics (time, host, cpu_usage, memory_usage)
VALUES
(NOW(), 'server01', 45.2, 62.1),
(NOW(), 'server02', 32.8, 55.3);
-- Standard SQL Queries funktionieren
SELECT * FROM metrics
WHERE host = 'server01'
AND time > NOW() - INTERVAL '1 hour'
ORDER BY time DESC
LIMIT 100;
-- Time Bucket Aggregation (TimescaleDB Funktion)
SELECT
time_bucket('5 minutes', time) AS bucket,
host,
AVG(cpu_usage) as avg_cpu,
MAX(cpu_usage) as max_cpu
FROM metrics
WHERE time > NOW() - INTERVAL '24 hours'
GROUP BY bucket, host
ORDER BY bucket DESC;
-- Continuous Aggregates (Materialized Views für Time-Series)
CREATE MATERIALIZED VIEW metrics_hourly
WITH (timescaledb.continuous) AS
SELECT
time_bucket('1 hour', time) AS bucket,
host,
AVG(cpu_usage) as avg_cpu,
AVG(memory_usage) as avg_memory
FROM metrics
GROUP BY bucket, host
WITH NO DATA;
-- Refresh Policy
SELECT add_continuous_aggregate_policy('metrics_hourly',
start_offset => INTERVAL '3 hours',
end_offset => INTERVAL '1 hour',
schedule_interval => INTERVAL '1 hour');
-- Automatische Datenlöschung (Retention)
SELECT add_retention_policy('metrics', INTERVAL '30 days');
Prometheus
# prometheus.yml
global:
scrape_interval: 15s
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
- job_name: 'node'
static_configs:
- targets: ['node-exporter:9100']
- job_name: 'myapp'
static_configs:
- targets: ['myapp:8080']
metrics_path: '/metrics'
// PromQL Queries
// Aktuelle CPU Auslastung
node_cpu_seconds_total{mode="idle"}
// Rate (Änderung pro Sekunde)
rate(http_requests_total[5m])
// Durchschnitt über Zeit
avg_over_time(node_memory_Active_bytes[1h])
// Aggregation über Labels
sum by (instance) (rate(http_requests_total[5m]))
// Percentiles (Histogramme)
histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m]))
// Alerting Rules (prometheus/rules.yml)
groups:
- name: example
rules:
- alert: HighCPU
expr: 100 - (avg by(instance)(rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 80
for: 5m
labels:
severity: warning
annotations:
summary: "High CPU usage on {{ $labels.instance }}"
// Node.js: Metriken exponieren
const client = require('prom-client');
// Default Metriken (CPU, Memory, etc.)
client.collectDefaultMetrics();
// Custom Counter
const httpRequestCounter = new client.Counter({
name: 'http_requests_total',
help: 'Total HTTP requests',
labelNames: ['method', 'path', 'status']
});
// Custom Histogram
const httpRequestDuration = new client.Histogram({
name: 'http_request_duration_seconds',
help: 'HTTP request duration',
labelNames: ['method', 'path'],
buckets: [0.01, 0.05, 0.1, 0.5, 1, 5]
});
// Middleware
app.use((req, res, next) => {
const end = httpRequestDuration.startTimer({ method: req.method, path: req.path });
res.on('finish', () => {
end();
httpRequestCounter.inc({ method: req.method, path: req.path, status: res.statusCode });
});
next();
});
// Metrics Endpoint
app.get('/metrics', async (req, res) => {
res.set('Content-Type', client.register.contentType);
res.end(await client.register.metrics());
});
Vergleich
| Datenbank | Stärken | Use Case |
|---|---|---|
| InfluxDB | Einfach, schnell, gute Kompression | IoT, allgemeine Metriken |
| TimescaleDB | SQL, PostgreSQL Ökosystem, JOINs | Wenn SQL benötigt, Hybrid-Daten |
| Prometheus | Pull-basiert, Alerting, Kubernetes | Infrastructure Monitoring |
| ClickHouse | Sehr schnell, Analytics | Große Datenmengen, Analytics |
Retention und Downsampling
DATEN-LEBENSZYKLUS ┌─────────────────────────────────────────────────────────────┐ │ │ │ RAW DATA (15s Intervall) │ │ ├── Letzte 7 Tage: Volle Auflösung │ │ │ │ │ ▼ │ │ AGGREGATED (5 Minuten) │ │ ├── 7-30 Tage: 5-Minuten Durchschnitte │ │ │ │ │ ▼ │ │ AGGREGATED (1 Stunde) │ │ ├── 30-365 Tage: Stündliche Durchschnitte │ │ │ │ │ ▼ │ │ DELETED │ │ └── Älter als 365 Tage: Gelöscht │ │ │ └─────────────────────────────────────────────────────────────┘ VORTEILE: • Speicherplatz sparen • Queries auf alte Daten schneller • Detaillierte aktuelle Daten behalten
-- TimescaleDB: Continuous Aggregates + Retention
-- Stündliche Aggregation
CREATE MATERIALIZED VIEW metrics_hourly
WITH (timescaledb.continuous) AS
SELECT
time_bucket('1 hour', time) AS bucket,
host,
AVG(value) as avg_value,
MAX(value) as max_value,
MIN(value) as min_value
FROM metrics
GROUP BY bucket, host;
-- Raw Data: 7 Tage behalten
SELECT add_retention_policy('metrics', INTERVAL '7 days');
-- Aggregierte Daten: 1 Jahr behalten
SELECT add_retention_policy('metrics_hourly', INTERVAL '365 days');
💡 Empfehlungen:
1. Monitoring: Prometheus + Grafana
2. IoT/Sensoren: InfluxDB oder TimescaleDB
3. SQL benötigt: TimescaleDB
4. Retention Policies von Anfang an planen
5. Downsampling für langfristige Speicherung
2. IoT/Sensoren: InfluxDB oder TimescaleDB
3. SQL benötigt: TimescaleDB
4. Retention Policies von Anfang an planen
5. Downsampling für langfristige Speicherung