Initial commit: health monitoring scripts for Discord webhooks
This commit is contained in:
139
health-report.sh
Executable file
139
health-report.sh
Executable file
@@ -0,0 +1,139 @@
|
||||
#!/bin/bash
|
||||
# System Health Report - Sends daily summary to Discord
|
||||
# Usage: ./health-report.sh [--errors-only]
|
||||
|
||||
WEBHOOK_URL="${DISCORD_WEBHOOK_URL:-YOUR_WEBHOOK_URL_HERE}"
|
||||
DATA_DIR="$HOME/.local/share/health-monitor"
|
||||
ERRORS_ONLY="${1:-}"
|
||||
|
||||
# Colors for Discord embeds
|
||||
COLOR_OK=3066993 # Green
|
||||
COLOR_WARN=16776960 # Yellow
|
||||
COLOR_ERROR=15158332 # Red
|
||||
|
||||
# Thresholds
|
||||
DISK_WARN=80
|
||||
DISK_CRIT=90
|
||||
MEM_WARN=80
|
||||
MEM_CRIT=90
|
||||
|
||||
# Gather basic info
|
||||
HOSTNAME=$(hostname)
|
||||
UPTIME=$(uptime -p | sed 's/up //')
|
||||
|
||||
# Calculate stats from collected data (today + yesterday for context)
|
||||
YESTERDAY=$(date -d "yesterday" +%Y-%m-%d 2>/dev/null || date -v-1d +%Y-%m-%d 2>/dev/null)
|
||||
TODAY=$(date +%Y-%m-%d)
|
||||
|
||||
# Combine recent data files
|
||||
STATS_DATA=""
|
||||
for f in "$DATA_DIR/stats-$YESTERDAY.csv" "$DATA_DIR/stats-$TODAY.csv"; do
|
||||
[ -f "$f" ] && STATS_DATA+=$(tail -n +2 "$f")$'\n'
|
||||
done
|
||||
|
||||
# Calculate min/avg/max if we have data
|
||||
if [ -n "$STATS_DATA" ] && [ $(echo "$STATS_DATA" | grep -c .) -gt 0 ]; then
|
||||
CPU_STATS=$(echo "$STATS_DATA" | awk -F',' 'NF>=2 && $2~/^[0-9]/ {sum+=$2; if(min=="" || $2<min)min=$2; if($2>max)max=$2; n++} END {if(n>0) printf "%.0f / %.0f / %.0f", min, sum/n, max; else print "N/A"}')
|
||||
MEM_STATS=$(echo "$STATS_DATA" | awk -F',' 'NF>=3 && $3~/^[0-9]/ {sum+=$3; if(min=="" || $3<min)min=$3; if($3>max)max=$3; n++} END {if(n>0) printf "%.0f / %.0f / %.0f", min, sum/n, max; else print "N/A"}')
|
||||
LOAD_STATS=$(echo "$STATS_DATA" | awk -F',' 'NF>=4 && $4~/^[0-9]/ {sum+=$4; if(min=="" || $4<min)min=$4; if($4>max)max=$4; n++} END {if(n>0) printf "%.2f / %.2f / %.2f", min, sum/n, max; else print "N/A"}')
|
||||
SAMPLES=$(echo "$STATS_DATA" | grep -c .)
|
||||
|
||||
# Get max values for threshold checks
|
||||
MEM_MAX=$(echo "$STATS_DATA" | awk -F',' 'NF>=3 && $3~/^[0-9]/ {if($3>max)max=$3} END {print int(max)}')
|
||||
[ -z "$MEM_MAX" ] && MEM_MAX=0
|
||||
else
|
||||
CPU_STATS="No data yet"
|
||||
MEM_STATS="No data yet"
|
||||
LOAD_STATS="No data yet"
|
||||
SAMPLES=0
|
||||
MEM_MAX=0
|
||||
fi
|
||||
|
||||
# Current disk usage
|
||||
DISK_PCT=$(df / | awk 'NR==2 {gsub(/%/,""); print $5}')
|
||||
DISK_DETAIL=$(df -h / | awk 'NR==2 {printf "%s / %s", $3, $2}')
|
||||
|
||||
# SSD SMART status
|
||||
SMART_STATUS="Unknown"
|
||||
if command -v smartctl &>/dev/null; then
|
||||
SMART_OUT=$(smartctl -H /dev/sda 2>&1)
|
||||
if echo "$SMART_OUT" | grep -q "PASSED"; then
|
||||
SMART_STATUS="PASSED ✓"
|
||||
elif echo "$SMART_OUT" | grep -q "FAILED"; then
|
||||
SMART_STATUS="FAILED ✗"
|
||||
elif echo "$SMART_OUT" | grep -qi "permission"; then
|
||||
SMART_STATUS="Needs root"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Recent errors from journal (last 24h)
|
||||
ERRORS=""
|
||||
if command -v journalctl &>/dev/null; then
|
||||
ERRORS=$(journalctl -p err -S "24 hours ago" --no-pager -q 2>/dev/null | tail -10)
|
||||
fi
|
||||
ERROR_COUNT=$(echo "$ERRORS" | grep -c . 2>/dev/null || echo 0)
|
||||
[ -z "$ERRORS" ] && ERROR_COUNT=0
|
||||
|
||||
# Determine overall status
|
||||
STATUS="OK"
|
||||
COLOR=$COLOR_OK
|
||||
|
||||
if [ "$DISK_PCT" -ge "$DISK_CRIT" ] || [ "$MEM_MAX" -ge "$MEM_CRIT" ]; then
|
||||
STATUS="CRITICAL"
|
||||
COLOR=$COLOR_ERROR
|
||||
elif [ "$DISK_PCT" -ge "$DISK_WARN" ] || [ "$MEM_MAX" -ge "$MEM_WARN" ] || [ "$ERROR_COUNT" -gt 0 ]; then
|
||||
STATUS="WARNING"
|
||||
COLOR=$COLOR_WARN
|
||||
fi
|
||||
|
||||
if echo "$SMART_STATUS" | grep -q "FAILED"; then
|
||||
STATUS="CRITICAL"
|
||||
COLOR=$COLOR_ERROR
|
||||
fi
|
||||
|
||||
# Skip if errors-only mode and everything is OK
|
||||
if [ "$ERRORS_ONLY" = "--errors-only" ] && [ "$STATUS" = "OK" ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Format errors for Discord
|
||||
ERROR_FIELD=""
|
||||
if [ "$ERROR_COUNT" -gt 0 ] && [ -n "$ERRORS" ]; then
|
||||
# Escape and truncate errors
|
||||
ERRORS_CLEAN=$(echo "$ERRORS" | head -5 | sed 's/`/'"'"'/g' | tr '\n' '|' | sed 's/|/\\n/g' | cut -c1-400)
|
||||
ERROR_FIELD=',{"name": "📋 Recent Errors", "value": "```'"$ERRORS_CLEAN"'```", "inline": false}'
|
||||
fi
|
||||
|
||||
# Build Discord embed JSON
|
||||
PAYLOAD=$(cat <<EOF
|
||||
{
|
||||
"embeds": [{
|
||||
"title": "🖥️ ${HOSTNAME} - Health Report",
|
||||
"color": ${COLOR},
|
||||
"fields": [
|
||||
{"name": "Status", "value": "**${STATUS}**", "inline": true},
|
||||
{"name": "Uptime", "value": "${UPTIME}", "inline": true},
|
||||
{"name": "Samples", "value": "${SAMPLES} (24h)", "inline": true},
|
||||
{"name": "💻 CPU % (min/avg/max)", "value": "${CPU_STATS}", "inline": true},
|
||||
{"name": "🧠 RAM % (min/avg/max)", "value": "${MEM_STATS}", "inline": true},
|
||||
{"name": "📊 Load (min/avg/max)", "value": "${LOAD_STATS}", "inline": true},
|
||||
{"name": "💾 Disk /", "value": "${DISK_PCT}% (${DISK_DETAIL})", "inline": true},
|
||||
{"name": "🔧 SSD Health", "value": "${SMART_STATUS}", "inline": true},
|
||||
{"name": "⚠️ Errors (24h)", "value": "${ERROR_COUNT}", "inline": true}${ERROR_FIELD}
|
||||
],
|
||||
"footer": {"text": "$(date '+%Y-%m-%d %H:%M:%S')"}
|
||||
}]
|
||||
}
|
||||
EOF
|
||||
)
|
||||
|
||||
# Send to Discord
|
||||
RESPONSE=$(curl -s -o /dev/null -w "%{http_code}" -H "Content-Type: application/json" -d "$PAYLOAD" "$WEBHOOK_URL")
|
||||
|
||||
if [ "$RESPONSE" = "204" ]; then
|
||||
echo "Health report sent successfully"
|
||||
else
|
||||
echo "Failed to send report (HTTP $RESPONSE)"
|
||||
echo "$PAYLOAD" | head -50
|
||||
exit 1
|
||||
fi
|
||||
Reference in New Issue
Block a user