From 825701e285bd7d29433c617f846851184f54c9a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20H=C3=B6=C3=9F?= Date: Wed, 23 Feb 2022 12:30:36 +0100 Subject: [PATCH] Improve README/Docs, Minor fix --- README.md | 101 +++++++++++++++++++++++++++++++++++++++++---- app/sample.job.yml | 12 +++--- app/types/types.go | 2 +- 3 files changed, 101 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index db12b0d..eb80223 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,20 @@ # Jobwatch -## (C) 2022, Michael Höß, MIT-Licensed +> (C) 2022, Michael Höß, hoess@gmx.net, MIT-Licensed -### What is jobwatch +## What is jobwatch If you just want to execute a simple shell-script via cron every couple of hour, often only a few lines of code are required. -But if you want to ensure the script really run, in the required interval, completed -without error, you have to add a lot of plumping code. +But if you want to ensure +- the script really run +- in the required interval +- completed without error/with the expected exitcode +- has the expected output +- only one instances executes in parallel + +and want all this to be monitored with your CheckMK, you have to add a lot of plumping code. jobwatch helps to migitate this problem. It wraps the execution of your script, and does all the rest for you, by providing output, which can be fed to CheckMK by the @@ -25,9 +31,90 @@ jobwatch Simply add a .job-file into /etc/jobwatch.d where you defines all of this, call you script via jobwatch -j job in the crontab and you are done. -TBD: -- document deployment -- document job-file (see included sample) +## Sample-Job `/etc/jobwatch.d/sample.job.yml`: +``` +cmd: /usr/bin/w +args: + - -i +exitcode_map: + - from: 23 # Map + to: 3 + - from: -1 # Map all nonzero-exit codes to 1=WARN + to: 1 + - from: 0 # Map exitcode 0 also to 1=WARN + to: 1 +log_matches: + - regex: .*192.168.*.* + state: 1 + - regex: ".*tty[0-9].+-.*" # 2=CRIT when sombody is logged on the console + state: 2 + alt_msg: "%v -> a user is logged in at console" + - regex: .+ # Include All other lines as OK + state: 0 +hide_output: False # Dont't hide output, but pass through +last_run_warn: # How often is this job to be expected to be executed + val: 8 + unit: "h" +last_run_crit: + val: 16 + ``` + +> Note: job-names may only consists of chars `a-z`,`0-9` and `- ` + +The config-config dir is When run as +- root: `/etc/jobwatch.d` +- user `$HOME/etc/jobwatch.d` + +## Deploying jobwatch +- Compile the go program, if required, the src-dir: `go get .;go build .` +- `cp jobwatch /usr/loca/bin` +- `ln -s /usr/local/bin /usr/lib/check_agent/plugins` + +On the check-mk server just deploy the provede .mkp, no further +config there is currently needed + +## Invoking Jobwatch + +``` +jobwatch -h +Usage of jobwatch: + -d Debug + -i string + JobId Instance. Default '' + -j string + JobId. reads $jobDir/$job.job.yml Default '' + -jd string + JobDir. Default: $HOME/etc/jobwatch.d /etc/jobwatch.d' + -- After this parameter every further parameter is passed to the + called program + +Without any parameters CheckMK-Agent-Output is generated +``` + +Example1 from an crontab +``` +10 9 * * 0 root /usr/local/bin/jobwatch -j bu-dupl +``` + +Example2 from an crontab, here we use the same job for +different instances. (In this e.g. we have backup-sets) +``` +10 5 * * 0,3 root /usr/local/bin/jobwatch -j bu-borg -i b01-main -- /opt/borg/B01_main.borgjob +10 5 * * 1,4 root /usr/local/bin/jobwatch -j bu-borg -i b10-dvp -- /opt/borg/B10_dvp.borgjob +``` + +### Instances/Multiuser + +See "Invoking jobwatch above" + +> Note instance-names have the same restrictions as job-names + +When using instances, the final job name in the output is `[userid]/[job-name]_[instance-name]`, otherwise only `[userid]/[job-name]` + + + + +## TBD: - redirect script-output to a logfile via Config - job-timeout - more metrics (last-success-ful run ) diff --git a/app/sample.job.yml b/app/sample.job.yml index a3ff92e..9482983 100644 --- a/app/sample.job.yml +++ b/app/sample.job.yml @@ -4,20 +4,20 @@ args: exitcode_map: - from: 23 to: 3 - - from: -1 + - from: -1 # Map all nonzero-exit codes to 1=WARN to: 1 - - from: 0 + - from: 0 # Map exitcode 0 also to 1=WARN to: 1 log_matches: - regex: .*192.168.*.* state: 1 - - regex: "-" + - regex: ".*tty[0-9].+-.*" # 2=CRIT when sombody is logged on the console state: 2 alt_msg: "%v -> a user is logged in at console" - - regex: .+ + - regex: .+ # Include All other lines as OK state: 0 -hide_output: False -last_run_warn: +hide_output: False # Dont't hide output, but pass through +last_run_warn: # How often is this job to be expected to be executed val: 8 unit: "h" last_run_crit: diff --git a/app/types/types.go b/app/types/types.go index 3ea47a4..edf35a9 100644 --- a/app/types/types.go +++ b/app/types/types.go @@ -50,7 +50,7 @@ func LoadJob(jobdir string, jobid string) (*Job, error) { dirs = append(dirs, jobdir) } else { if hd, err := os.UserHomeDir(); err == nil { - dirs = append(dirs, filepath.FromSlash(hd)+"/jobwatch.d") + dirs = append(dirs, filepath.FromSlash(hd)+"/etc/jobwatch.d") } dirs = append(dirs, "/etc/jobwatch.d") }