New module: phoebe.services.monitoring

Basic monitoring and alerting with netdata.
This commit is contained in:
Peter Jones 2019-01-24 16:46:23 -07:00
parent 3129ab3fa7
commit 4846f4ccfa
No known key found for this signature in database
GPG key ID: 9DAFAA8D01941E49
4 changed files with 226 additions and 0 deletions

View file

@ -4,6 +4,7 @@
imports = [
./builder
./databases
./monitoring
./web
];
}

View file

@ -0,0 +1,125 @@
#!/bin/bash
# Taken from: https://github.com/mo0nsniper/netdata/commit/157b6e04b1931f57f16433fae42e028c525bd5cb
# no need for shebang - this file is loaded from charts.d.plugin
# if this chart is called X.chart.sh, then all functions and global variables
# must start with X_
# _update_every is a special variable - it holds the number of seconds
# between the calls of the _update() function
services_update_every=2
# the priority is used to sort the charts on the dashboard
# 1 = the first chart
services_priority=60000
# global variables to store our collected data
# remember: they need to start with the module name example_
declare -a services_service
declare -a services_status
services_running=
services_dead=
services_exited=
services_failed=
services_get() {
# do all the work to collect / calculate the values
# for each dimension
#
# Remember:
# 1. KEEP IT SIMPLE AND SHORT
# 2. AVOID FORKS (avoid piping commands)
# 3. AVOID CALLING TOO MANY EXTERNAL PROGRAMS
# 4. USE LOCAL VARIABLES (global variables may overlap with other modules)
declare -a services_line
services_service=()
services_status=()
services_line=()
services_running=0
services_dead=0
services_exited=0
services_failed=0
while read -a services_line ; do
services_service+=(${services_line%.*})
case ${services_line[3]} in
running) services_status+=("1") ; ((services_running++)) ;;
dead) services_status+=("-2"); ((services_dead++)) ;;
exited) services_status+=("-3"); ((services_exited++)) ;;
failed) services_status+=("-4"); ((services_failed++)) ;;
esac
done < <(systemctl --no-legend --no-pager --plain --state=loaded --all --type=service )
# this should return:
# - 0 to send the data to netdata
# - 1 to report a failure to collect the data
return 0
}
# _check is called once, to find out if this chart should be enabled or not
services_check() {
# this should return:
# - 0 to enable the chart
# - 1 to disable the chart
# check something
require_cmd systemctl || return 1
# check that we can collect data
services_get || return 1
return 0
}
# _create is called once, to create the charts
services_create() {
cat <<EOF
CHART Services.summary '' "Summary: $((services_running + services_dead + services_exited + services_failed)) services" "Total" Summary summary stacked $((services_priority)) $services_update_every
DIMENSION running '' $services_running 1 1
DIMENSION dead '' $services_dead 1 1
DIMENSION exited '' $services_exited 1 1
DIMENSION failed '' $services_failed 1 1
EOF
echo "CHART Services.status 'System services' 'Status of systemd services: 1=running -2=dead -3=exited -4=failed' 'Status' Services services line $((services_priority + 1)) $services_update_every"
for ((i = 0; i < ${#services_service[@]}; i++)) do
echo "DIMENSION ${services_service[$i]} '' absolute 1 1"
done
return 0
}
# _update is called continiously, to collect the values
services_update() {
# the first argument to this function is the microseconds since last update
# pass this parameter to the BEGIN statement (see bellow).
services_get || return 1
# write the result of the work.
cat <<VALUESEOF
BEGIN Services.summary $1
SET running = $services_running
SET dead = $services_dead
SET exited = $services_exited
SET failed = $services_failed
END
VALUESEOF
echo "BEGIN Services.status $1"
for ((i = 0; i < ${#services_service[@]}; i++)) do
echo "SET ${services_service[$i]} = ${services_status[$i]}"
done
echo "END"
return 0
}

View file

@ -0,0 +1,83 @@
# Configure monitoring and reporting services.
{ config, lib, pkgs, ...}:
# Bring in library functions:
with lib;
let
cfg = config.phoebe.services.monitoring;
plugins = import ./plugins.nix { inherit (pkgs) stdenvNoCC netdata; };
alarmNotifyConf = pkgs.writeText "health_alarm_notify.conf"
(optionalString cfg.pushover.enable ''
SEND_PUSHOVER=YES
PUSHOVER_APP_TOKEN="${cfg.pushover.apiKey}"
DEFAULT_RECIPIENT_PUSHOVER="${concatStringsSep "," cfg.pushover.userKeys}"
'');
alarmConf = pkgs.writeText "alarms.conf" ''
# Send alarms for systemd services.
alarm: failed_service
on: Services.status
os: linux
hosts: *
lookup: min -1m unaligned
every: 1m
crit: $this < -3 && $this >= -4
info: service is failed
to: sysadmin
'';
in
{
#### Interface
options.phoebe.services.monitoring = {
enable = mkEnableOption "Monitoring and Reporting.";
pushover = {
enable = mkEnableOption "Alerts via Pushover.";
apiKey = mkOption {
type = types.str;
example = "1234567890abcdefghijklmnopqrst";
description = "Pushover API key for netdata";
};
userKeys = mkOption {
type = types.listOf types.str;
example = [ "1234567890abcdefghijklmnopqrst" ];
description = "List of user keys.";
};
};
};
#### Implementation
config = mkIf cfg.enable {
# Enable systemd accounting:
systemd.enableCgroupAccounting = true;
# Use netdata to collect metrics:
services.netdata = {
enable = true;
extraPluginPaths = [ "${plugins}/plugins.d" ];
config.global = {
"debug log" = "syslog";
"access log" = "syslog";
"error log" = "syslog";
};
config.plugins = {
"phoebe.charts.d.plugin" = "yes";
};
};
environment.etc."netdata/health_alarm_notify.conf" = {
source = "${alarmNotifyConf}";
mode = "0444";
};
environment.etc."netdata/health.d/alarm.conf" = {
source = "${alarmConf}";
mode = "0444";
};
};
}

View file

@ -0,0 +1,17 @@
{ stdenvNoCC
, netdata
}:
stdenvNoCC.mkDerivation {
name = "netdata-extra-scripts";
phases = [ "installPhase" "fixupPhase" ];
installPhase = ''
mkdir -p $out/plugins.d $out/charts.d
install -m 0555 ${netdata}/libexec/netdata/plugins.d/charts.d.plugin $out/plugins.d/phoebe.charts.d.plugin
install -m 0555 ${./charts.d/services.chart.sh} $out/charts.d/services.chart.sh
# Force our copy of charts.d.plugin to use the correct charts.d directory:
sed -i "s|^chartsd=.*|chartsd=$out/charts.d|" $out/plugins.d/phoebe.charts.d.plugin
'';
}