From b617b8c48f23971e1c17a59e74ea7ed86d154378 Mon Sep 17 00:00:00 2001 From: jawhng Date: Wed, 4 Feb 2026 14:41:47 +0000 Subject: [PATCH] Initial commit: health monitoring scripts for Discord webhooks --- README.md | 39 +++++++++++++ health-collector.sh | 34 +++++++++++ health-report.sh | 139 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 212 insertions(+) create mode 100644 README.md create mode 100755 health-collector.sh create mode 100755 health-report.sh diff --git a/README.md b/README.md new file mode 100644 index 0000000..f0f311a --- /dev/null +++ b/README.md @@ -0,0 +1,39 @@ +# homelab-health-monitor + +Lightweight system health monitoring for homelab servers with Discord webhook notifications. + +## Features + +- Hourly stats collection (CPU, RAM, load, disk) +- Daily Discord reports with min/avg/max values +- SSD SMART health checks +- Journal error summaries +- Color-coded status (OK/WARNING/CRITICAL) +- Zero dependencies beyond bash, cron, and curl + +## Installation + +1. Copy scripts to `~/.local/bin/` +2. Create data directory: `mkdir -p ~/.local/share/health-monitor` +3. Edit `health-report.sh` and set your `WEBHOOK_URL` +4. Make executable: `chmod +x ~/.local/bin/health-*.sh` +5. Add to crontab: + +```cron +0 * * * * ~/.local/bin/health-collector.sh +0 9 * * * ~/.local/bin/health-report.sh +``` + +## Usage + +```bash +# Send report now +./health-report.sh + +# Only report if warnings/errors +./health-report.sh --errors-only +``` + +## License + +MIT diff --git a/health-collector.sh b/health-collector.sh new file mode 100755 index 0000000..0c2980c --- /dev/null +++ b/health-collector.sh @@ -0,0 +1,34 @@ +#!/bin/bash +# Collects system stats hourly - run via cron every hour +# Data stored in ~/.local/share/health-monitor/ + +DATA_DIR="$HOME/.local/share/health-monitor" +TODAY=$(date +%Y-%m-%d) +DATA_FILE="$DATA_DIR/stats-$TODAY.csv" + +mkdir -p "$DATA_DIR" + +# Initialize CSV header if new file +if [ ! -f "$DATA_FILE" ]; then + echo "timestamp,cpu,mem,load1,load5,load15,disk_pct" > "$DATA_FILE" +fi + +# CPU usage (1 second sample) +CPU=$(top -bn2 -d0.5 | grep "Cpu(s)" | tail -1 | awk '{printf "%.1f", 100 - $8}') + +# Memory percentage +MEM=$(free | awk '/Mem:/ {printf "%.1f", $3/$2 * 100}') + +# Load averages +read LOAD1 LOAD5 LOAD15 _ < /proc/loadavg + +# Disk usage +DISK=$(df / | awk 'NR==2 {gsub(/%/,""); print $5}') + +# Timestamp +TS=$(date +%H:%M) + +echo "$TS,$CPU,$MEM,$LOAD1,$LOAD5,$LOAD15,$DISK" >> "$DATA_FILE" + +# Cleanup old files (keep 7 days) +find "$DATA_DIR" -name "stats-*.csv" -mtime +7 -delete 2>/dev/null diff --git a/health-report.sh b/health-report.sh new file mode 100755 index 0000000..12d6b78 --- /dev/null +++ b/health-report.sh @@ -0,0 +1,139 @@ +#!/bin/bash +# System Health Report - Sends daily summary to Discord +# Usage: ./health-report.sh [--errors-only] + +WEBHOOK_URL="${DISCORD_WEBHOOK_URL:-YOUR_WEBHOOK_URL_HERE}" +DATA_DIR="$HOME/.local/share/health-monitor" +ERRORS_ONLY="${1:-}" + +# Colors for Discord embeds +COLOR_OK=3066993 # Green +COLOR_WARN=16776960 # Yellow +COLOR_ERROR=15158332 # Red + +# Thresholds +DISK_WARN=80 +DISK_CRIT=90 +MEM_WARN=80 +MEM_CRIT=90 + +# Gather basic info +HOSTNAME=$(hostname) +UPTIME=$(uptime -p | sed 's/up //') + +# Calculate stats from collected data (today + yesterday for context) +YESTERDAY=$(date -d "yesterday" +%Y-%m-%d 2>/dev/null || date -v-1d +%Y-%m-%d 2>/dev/null) +TODAY=$(date +%Y-%m-%d) + +# Combine recent data files +STATS_DATA="" +for f in "$DATA_DIR/stats-$YESTERDAY.csv" "$DATA_DIR/stats-$TODAY.csv"; do + [ -f "$f" ] && STATS_DATA+=$(tail -n +2 "$f")$'\n' +done + +# Calculate min/avg/max if we have data +if [ -n "$STATS_DATA" ] && [ $(echo "$STATS_DATA" | grep -c .) -gt 0 ]; then + CPU_STATS=$(echo "$STATS_DATA" | awk -F',' 'NF>=2 && $2~/^[0-9]/ {sum+=$2; if(min=="" || $2max)max=$2; n++} END {if(n>0) printf "%.0f / %.0f / %.0f", min, sum/n, max; else print "N/A"}') + MEM_STATS=$(echo "$STATS_DATA" | awk -F',' 'NF>=3 && $3~/^[0-9]/ {sum+=$3; if(min=="" || $3max)max=$3; n++} END {if(n>0) printf "%.0f / %.0f / %.0f", min, sum/n, max; else print "N/A"}') + LOAD_STATS=$(echo "$STATS_DATA" | awk -F',' 'NF>=4 && $4~/^[0-9]/ {sum+=$4; if(min=="" || $4max)max=$4; n++} END {if(n>0) printf "%.2f / %.2f / %.2f", min, sum/n, max; else print "N/A"}') + SAMPLES=$(echo "$STATS_DATA" | grep -c .) + + # Get max values for threshold checks + MEM_MAX=$(echo "$STATS_DATA" | awk -F',' 'NF>=3 && $3~/^[0-9]/ {if($3>max)max=$3} END {print int(max)}') + [ -z "$MEM_MAX" ] && MEM_MAX=0 +else + CPU_STATS="No data yet" + MEM_STATS="No data yet" + LOAD_STATS="No data yet" + SAMPLES=0 + MEM_MAX=0 +fi + +# Current disk usage +DISK_PCT=$(df / | awk 'NR==2 {gsub(/%/,""); print $5}') +DISK_DETAIL=$(df -h / | awk 'NR==2 {printf "%s / %s", $3, $2}') + +# SSD SMART status +SMART_STATUS="Unknown" +if command -v smartctl &>/dev/null; then + SMART_OUT=$(smartctl -H /dev/sda 2>&1) + if echo "$SMART_OUT" | grep -q "PASSED"; then + SMART_STATUS="PASSED ✓" + elif echo "$SMART_OUT" | grep -q "FAILED"; then + SMART_STATUS="FAILED ✗" + elif echo "$SMART_OUT" | grep -qi "permission"; then + SMART_STATUS="Needs root" + fi +fi + +# Recent errors from journal (last 24h) +ERRORS="" +if command -v journalctl &>/dev/null; then + ERRORS=$(journalctl -p err -S "24 hours ago" --no-pager -q 2>/dev/null | tail -10) +fi +ERROR_COUNT=$(echo "$ERRORS" | grep -c . 2>/dev/null || echo 0) +[ -z "$ERRORS" ] && ERROR_COUNT=0 + +# Determine overall status +STATUS="OK" +COLOR=$COLOR_OK + +if [ "$DISK_PCT" -ge "$DISK_CRIT" ] || [ "$MEM_MAX" -ge "$MEM_CRIT" ]; then + STATUS="CRITICAL" + COLOR=$COLOR_ERROR +elif [ "$DISK_PCT" -ge "$DISK_WARN" ] || [ "$MEM_MAX" -ge "$MEM_WARN" ] || [ "$ERROR_COUNT" -gt 0 ]; then + STATUS="WARNING" + COLOR=$COLOR_WARN +fi + +if echo "$SMART_STATUS" | grep -q "FAILED"; then + STATUS="CRITICAL" + COLOR=$COLOR_ERROR +fi + +# Skip if errors-only mode and everything is OK +if [ "$ERRORS_ONLY" = "--errors-only" ] && [ "$STATUS" = "OK" ]; then + exit 0 +fi + +# Format errors for Discord +ERROR_FIELD="" +if [ "$ERROR_COUNT" -gt 0 ] && [ -n "$ERRORS" ]; then + # Escape and truncate errors + ERRORS_CLEAN=$(echo "$ERRORS" | head -5 | sed 's/`/'"'"'/g' | tr '\n' '|' | sed 's/|/\\n/g' | cut -c1-400) + ERROR_FIELD=',{"name": "📋 Recent Errors", "value": "```'"$ERRORS_CLEAN"'```", "inline": false}' +fi + +# Build Discord embed JSON +PAYLOAD=$(cat <