Initial commit: health monitoring scripts for Discord webhooks

This commit is contained in:
2026-02-04 14:41:47 +00:00
commit b617b8c48f
3 changed files with 212 additions and 0 deletions

39
README.md Normal file
View File

@@ -0,0 +1,39 @@
# homelab-health-monitor
Lightweight system health monitoring for homelab servers with Discord webhook notifications.
## Features
- Hourly stats collection (CPU, RAM, load, disk)
- Daily Discord reports with min/avg/max values
- SSD SMART health checks
- Journal error summaries
- Color-coded status (OK/WARNING/CRITICAL)
- Zero dependencies beyond bash, cron, and curl
## Installation
1. Copy scripts to `~/.local/bin/`
2. Create data directory: `mkdir -p ~/.local/share/health-monitor`
3. Edit `health-report.sh` and set your `WEBHOOK_URL`
4. Make executable: `chmod +x ~/.local/bin/health-*.sh`
5. Add to crontab:
```cron
0 * * * * ~/.local/bin/health-collector.sh
0 9 * * * ~/.local/bin/health-report.sh
```
## Usage
```bash
# Send report now
./health-report.sh
# Only report if warnings/errors
./health-report.sh --errors-only
```
## License
MIT

34
health-collector.sh Executable file
View File

@@ -0,0 +1,34 @@
#!/bin/bash
# Collects system stats hourly - run via cron every hour
# Data stored in ~/.local/share/health-monitor/
DATA_DIR="$HOME/.local/share/health-monitor"
TODAY=$(date +%Y-%m-%d)
DATA_FILE="$DATA_DIR/stats-$TODAY.csv"
mkdir -p "$DATA_DIR"
# Initialize CSV header if new file
if [ ! -f "$DATA_FILE" ]; then
echo "timestamp,cpu,mem,load1,load5,load15,disk_pct" > "$DATA_FILE"
fi
# CPU usage (1 second sample)
CPU=$(top -bn2 -d0.5 | grep "Cpu(s)" | tail -1 | awk '{printf "%.1f", 100 - $8}')
# Memory percentage
MEM=$(free | awk '/Mem:/ {printf "%.1f", $3/$2 * 100}')
# Load averages
read LOAD1 LOAD5 LOAD15 _ < /proc/loadavg
# Disk usage
DISK=$(df / | awk 'NR==2 {gsub(/%/,""); print $5}')
# Timestamp
TS=$(date +%H:%M)
echo "$TS,$CPU,$MEM,$LOAD1,$LOAD5,$LOAD15,$DISK" >> "$DATA_FILE"
# Cleanup old files (keep 7 days)
find "$DATA_DIR" -name "stats-*.csv" -mtime +7 -delete 2>/dev/null

139
health-report.sh Executable file
View File

@@ -0,0 +1,139 @@
#!/bin/bash
# System Health Report - Sends daily summary to Discord
# Usage: ./health-report.sh [--errors-only]
WEBHOOK_URL="${DISCORD_WEBHOOK_URL:-YOUR_WEBHOOK_URL_HERE}"
DATA_DIR="$HOME/.local/share/health-monitor"
ERRORS_ONLY="${1:-}"
# Colors for Discord embeds
COLOR_OK=3066993 # Green
COLOR_WARN=16776960 # Yellow
COLOR_ERROR=15158332 # Red
# Thresholds
DISK_WARN=80
DISK_CRIT=90
MEM_WARN=80
MEM_CRIT=90
# Gather basic info
HOSTNAME=$(hostname)
UPTIME=$(uptime -p | sed 's/up //')
# Calculate stats from collected data (today + yesterday for context)
YESTERDAY=$(date -d "yesterday" +%Y-%m-%d 2>/dev/null || date -v-1d +%Y-%m-%d 2>/dev/null)
TODAY=$(date +%Y-%m-%d)
# Combine recent data files
STATS_DATA=""
for f in "$DATA_DIR/stats-$YESTERDAY.csv" "$DATA_DIR/stats-$TODAY.csv"; do
[ -f "$f" ] && STATS_DATA+=$(tail -n +2 "$f")$'\n'
done
# Calculate min/avg/max if we have data
if [ -n "$STATS_DATA" ] && [ $(echo "$STATS_DATA" | grep -c .) -gt 0 ]; then
CPU_STATS=$(echo "$STATS_DATA" | awk -F',' 'NF>=2 && $2~/^[0-9]/ {sum+=$2; if(min=="" || $2<min)min=$2; if($2>max)max=$2; n++} END {if(n>0) printf "%.0f / %.0f / %.0f", min, sum/n, max; else print "N/A"}')
MEM_STATS=$(echo "$STATS_DATA" | awk -F',' 'NF>=3 && $3~/^[0-9]/ {sum+=$3; if(min=="" || $3<min)min=$3; if($3>max)max=$3; n++} END {if(n>0) printf "%.0f / %.0f / %.0f", min, sum/n, max; else print "N/A"}')
LOAD_STATS=$(echo "$STATS_DATA" | awk -F',' 'NF>=4 && $4~/^[0-9]/ {sum+=$4; if(min=="" || $4<min)min=$4; if($4>max)max=$4; n++} END {if(n>0) printf "%.2f / %.2f / %.2f", min, sum/n, max; else print "N/A"}')
SAMPLES=$(echo "$STATS_DATA" | grep -c .)
# Get max values for threshold checks
MEM_MAX=$(echo "$STATS_DATA" | awk -F',' 'NF>=3 && $3~/^[0-9]/ {if($3>max)max=$3} END {print int(max)}')
[ -z "$MEM_MAX" ] && MEM_MAX=0
else
CPU_STATS="No data yet"
MEM_STATS="No data yet"
LOAD_STATS="No data yet"
SAMPLES=0
MEM_MAX=0
fi
# Current disk usage
DISK_PCT=$(df / | awk 'NR==2 {gsub(/%/,""); print $5}')
DISK_DETAIL=$(df -h / | awk 'NR==2 {printf "%s / %s", $3, $2}')
# SSD SMART status
SMART_STATUS="Unknown"
if command -v smartctl &>/dev/null; then
SMART_OUT=$(smartctl -H /dev/sda 2>&1)
if echo "$SMART_OUT" | grep -q "PASSED"; then
SMART_STATUS="PASSED ✓"
elif echo "$SMART_OUT" | grep -q "FAILED"; then
SMART_STATUS="FAILED ✗"
elif echo "$SMART_OUT" | grep -qi "permission"; then
SMART_STATUS="Needs root"
fi
fi
# Recent errors from journal (last 24h)
ERRORS=""
if command -v journalctl &>/dev/null; then
ERRORS=$(journalctl -p err -S "24 hours ago" --no-pager -q 2>/dev/null | tail -10)
fi
ERROR_COUNT=$(echo "$ERRORS" | grep -c . 2>/dev/null || echo 0)
[ -z "$ERRORS" ] && ERROR_COUNT=0
# Determine overall status
STATUS="OK"
COLOR=$COLOR_OK
if [ "$DISK_PCT" -ge "$DISK_CRIT" ] || [ "$MEM_MAX" -ge "$MEM_CRIT" ]; then
STATUS="CRITICAL"
COLOR=$COLOR_ERROR
elif [ "$DISK_PCT" -ge "$DISK_WARN" ] || [ "$MEM_MAX" -ge "$MEM_WARN" ] || [ "$ERROR_COUNT" -gt 0 ]; then
STATUS="WARNING"
COLOR=$COLOR_WARN
fi
if echo "$SMART_STATUS" | grep -q "FAILED"; then
STATUS="CRITICAL"
COLOR=$COLOR_ERROR
fi
# Skip if errors-only mode and everything is OK
if [ "$ERRORS_ONLY" = "--errors-only" ] && [ "$STATUS" = "OK" ]; then
exit 0
fi
# Format errors for Discord
ERROR_FIELD=""
if [ "$ERROR_COUNT" -gt 0 ] && [ -n "$ERRORS" ]; then
# Escape and truncate errors
ERRORS_CLEAN=$(echo "$ERRORS" | head -5 | sed 's/`/'"'"'/g' | tr '\n' '|' | sed 's/|/\\n/g' | cut -c1-400)
ERROR_FIELD=',{"name": "📋 Recent Errors", "value": "```'"$ERRORS_CLEAN"'```", "inline": false}'
fi
# Build Discord embed JSON
PAYLOAD=$(cat <<EOF
{
"embeds": [{
"title": "🖥️ ${HOSTNAME} - Health Report",
"color": ${COLOR},
"fields": [
{"name": "Status", "value": "**${STATUS}**", "inline": true},
{"name": "Uptime", "value": "${UPTIME}", "inline": true},
{"name": "Samples", "value": "${SAMPLES} (24h)", "inline": true},
{"name": "💻 CPU % (min/avg/max)", "value": "${CPU_STATS}", "inline": true},
{"name": "🧠 RAM % (min/avg/max)", "value": "${MEM_STATS}", "inline": true},
{"name": "📊 Load (min/avg/max)", "value": "${LOAD_STATS}", "inline": true},
{"name": "💾 Disk /", "value": "${DISK_PCT}% (${DISK_DETAIL})", "inline": true},
{"name": "🔧 SSD Health", "value": "${SMART_STATUS}", "inline": true},
{"name": "⚠️ Errors (24h)", "value": "${ERROR_COUNT}", "inline": true}${ERROR_FIELD}
],
"footer": {"text": "$(date '+%Y-%m-%d %H:%M:%S')"}
}]
}
EOF
)
# Send to Discord
RESPONSE=$(curl -s -o /dev/null -w "%{http_code}" -H "Content-Type: application/json" -d "$PAYLOAD" "$WEBHOOK_URL")
if [ "$RESPONSE" = "204" ]; then
echo "Health report sent successfully"
else
echo "Failed to send report (HTTP $RESPONSE)"
echo "$PAYLOAD" | head -50
exit 1
fi