Added log/logwatch-features. Locking. Other features

This commit is contained in:
Michael Höß 2022-02-23 00:29:40 +01:00
parent 91fedca64a
commit d4c130360b
9 changed files with 326 additions and 49 deletions

View File

@ -11,22 +11,24 @@ But if you want to ensure the script really run, in the required interval, compl
without error, you have to add a lot of plumping code.
jobwatch helps to migitate this problem. It wraps the execution of your script, and does
all the rest for you, by providing output, which can be fed to CheckMK then
Agent-Plugin-Mechanism.
all the rest for you, by providing output, which can be fed to CheckMK by the
Agent-Plugin-mechanism.
jobwatch
- checks for required exit-codes
- searches through the output of your script via regex to classify certain
keywords as WARN or CRIT
- let you define the required run-interval
- checks for required _exit-codes_, and maps them to _OK, WARN, CRIT, UNKN (0-3)_
- _searches_ through the _console-output_ of your script via _regex_ to classify certain
keywords as OK, WARN or CRIT (0, 1, 2)
- sends previous regex matches to _logwatch_
- prevents running your job in multiple instances (previous job took longer than expected)
- let you define the required run-intervals
Simply add a .job-file into /etc/jobwatch.d where you defined all of this,
Simply add a .job-file into /etc/jobwatch.d where you defines all of this,
call you script via jobwatch -j job in the crontab and you are done.
TBD:
- document deployment
- document job-file (see included sample)
- feed logoutput to CheckMK
- redirect script-output to a logfile via Config
- job-timeout
- more metrics (last-success-ful run )

View File

@ -2,4 +2,7 @@ module jobwatch
go 1.18
require gopkg.in/yaml.v2 v2.4.0
require (
github.com/nightlyone/lockfile v1.0.0
gopkg.in/yaml.v2 v2.4.0
)

View File

@ -1,3 +1,5 @@
github.com/nightlyone/lockfile v1.0.0 h1:RHep2cFKK4PonZJDdEl4GmkabuhbsRMgk/k3uAmxBiA=
github.com/nightlyone/lockfile v1.0.0/go.mod h1:rywoIealpdNse2r832aiD9jRk8ErCatROs6LzC841CI=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=

View File

@ -21,15 +21,15 @@ import (
// Jobs root-idfile
func evalOutput(job types.Job, output []string) (types.CMKState, error) {
func evalOutput(job types.Job, output []string) (types.CMKState, []state.LogEntry, error) {
var res types.CMKState = types.OK
var logLines []string
var logLines []state.LogEntry
var rcs []*regexp.Regexp
for _, m := range job.LogMatches {
r, err := regexp.Compile(m.Regex)
if err != nil {
return 0, err
return 0, logLines, err
} else {
rcs = append(rcs, r)
}
@ -47,20 +47,12 @@ func evalOutput(job types.Job, output []string) (types.CMKState, error) {
v = fmt.Sprintf(m.AltMsg, v)
}
var p = ""
switch s := m.State; s {
case types.OK:
p = "O"
case types.WARN:
p = "W"
case types.CRIT:
p = "C"
case types.UNKNOWN:
p = "U"
}
p += ": "
logLines = append(logLines, time.Now().Format("2006-01-2 15:04:05")+" "+p+v)
var le state.LogEntry
le.State = state.ToLogState(m.State)
le.Date = time.Now().Format("2006-01-2 15:04:05")
le.Text = v
logLines = append(logLines, le)
break
}
}
}
@ -68,10 +60,10 @@ func evalOutput(job types.Job, output []string) (types.CMKState, error) {
log.Printf("- Matched LogLine %v\n", oln)
}
return res, nil
return res, logLines, nil
}
func runJob(job types.Job) (state.State, error) {
func runJob(job types.Job) (state.State, []state.LogEntry, error) {
st := state.StateFromJob(job)
st.LastExitCode = -1
@ -96,7 +88,7 @@ func runJob(job types.Job) (state.State, error) {
stcode = exitError.ProcessState.ExitCode()
} else {
// cmd not found etc
return st, err
return st, []state.LogEntry{}, err
}
if stcode == 0 { // Exec failed, but exitcode, be sure to get error!
stcode = int(types.UNKNOWN)
@ -119,7 +111,7 @@ func runJob(job types.Job) (state.State, error) {
log.Printf("- State after ExitCodeMapping: %v", stcode)
outText := string(stdcombinedBuf.Bytes())
log_state, err := evalOutput(job, strings.Split(outText, "\n"))
log_state, logLines, err := evalOutput(job, strings.Split(outText, "\n"))
if int(log_state) > stcode {
stcode = int(log_state)
}
@ -130,7 +122,7 @@ func runJob(job types.Job) (state.State, error) {
st.LastState = stcode
st.LastRun = time.Now().Format(time.RFC3339)
return st, err
return st, logLines, err
}
func setup() (*types.Job, error) {
@ -172,6 +164,7 @@ func setup() (*types.Job, error) {
job.InstId = job_instance
return job, nil
} else {
log.Printf("! Error loading job : %v\n", err)
return nil, err
}
}
@ -182,26 +175,50 @@ func main() {
if job == nil && err == nil {
fmt.Println("<<<jobwatch:sep(0)>>>")
state.ShowAll()
fmt.Println("<<<logwatch>>>")
state.ShowAllLogs()
fmt.Println("<<<>>>")
} else {
fmt.Fprintln(os.Stderr, "Jobwatch 0.1 (C) 2022 hoess@gmx.net")
var exitCode = 0
var logLines []state.LogEntry
var res state.State
defer func() {
os.Exit(exitCode)
}()
lock, err := state.Lock(job.GetJobBaseFileName(), "JOB")
log.Println("Locked on job")
if err == nil {
res, err = runJob(*job)
defer func() {
log.Println("Unlocking job")
state.Unlock(lock)
}()
if err != nil {
log.Println("Can't acquire lock, skipping")
}
if err == nil {
res, logLines, err = runJob(*job)
res.ReEval()
res.SaveState()
}
if err == nil {
err = state.AppendLog(*job, logLines)
}
}
if err != nil {
fmt.Printf("! Error running job %+v\n", err)
os.Exit(int(types.UNKNOWN))
exitCode = int(types.UNKNOWN)
} else {
os.Exit(int(res.LastState))
exitCode = int(res.LastState)
}
state.WriteLog(*job)
}
}

View File

@ -13,8 +13,10 @@ log_matches:
state: 1
- regex: "-"
state: 2
alt_msg: "User logged in at console (%v)"
hide_output: True
alt_msg: "%v -> a user is logged in at console"
- regex: .+
state: 0
hide_output: False
last_run_warn:
val: 8
unit: "h"

45
app/state/lock.go Normal file
View File

@ -0,0 +1,45 @@
package state
import (
"fmt"
"log"
"os"
"path/filepath"
"time"
"github.com/nightlyone/lockfile"
)
const LOCk_WAIT_MILLIS = 30000
func Lock(jobFullName string, lockType string) (*lockfile.Lockfile, error) {
fn := fmt.Sprintf("%v_%v", jobFullName, lockType)
lock, err := lockfile.New(filepath.Join(os.TempDir(), fn))
if err != nil {
return nil, fmt.Errorf("Cannot init lock. reason: %v", err)
}
var retries = 300
for retries > 0 {
err = lock.TryLock()
if err == nil {
log.Printf("Locked via lockfile %v", lock)
return &lock, nil
}
if _, ok := err.(interface{ Temporary() bool }); ok {
retries--
time.Sleep(time.Millisecond * 100)
} else {
retries = -1
}
}
return nil, fmt.Errorf("Timeout acquiring lock %v/%v", jobFullName, lockType)
}
func Unlock(lock *lockfile.Lockfile) {
if lock != nil {
log.Printf("Unlock lockfile %v", *lock)
lock.Unlock()
}
}

188
app/state/log.go Normal file
View File

@ -0,0 +1,188 @@
package state
import (
"encoding/json"
"fmt"
"io/ioutil"
"jobwatch/types"
"log"
"os"
"path/filepath"
"strings"
)
type LogEntryState string
const MAX_LOG_ENTRIES = 2500
const MAX_LOG_SEND = 250
const (
LOG_OK LogEntryState = "O"
LOG_WARN LogEntryState = "W"
LOG_CRIT LogEntryState = "C"
LOG_UNKWN LogEntryState = "U"
)
type LogEntry struct {
Id int `json:"id"`
Date string `json:"date"`
State LogEntryState `json:"state"`
Text string `json:"text"`
}
type LogDb struct {
JobFullName string `json:"job_full_name"`
MaxSentByRemote map[string]int `json:"max_sent_by_remote"`
Entries []LogEntry `json:"entries"`
}
func LoadLog(logPath string) (*LogDb, error) {
if _, err := os.Stat(logPath); err != nil {
log.Println("Existing log not readable, creating new")
return &LogDb{}, nil
}
data, err := os.ReadFile(logPath)
if err != nil {
return nil, err
}
var logDb LogDb
err = json.Unmarshal(data, &logDb)
if err != nil {
return nil, fmt.Errorf("Error unmarshaling state: %v", err)
}
log.Println("existing log loaded")
return &logDb, nil
}
func (logDb LogDb) Save(logPath string) error {
data, err := json.MarshalIndent(logDb, " ", " ")
if err != nil {
return fmt.Errorf("Error marshaling state: %v", err)
}
if err := os.WriteFile(logPath, data, 0600); err != nil {
return fmt.Errorf("Error writing log file %v: %v", logPath, err)
}
log.Println("Log written")
return nil
}
func AppendLog(job types.Job, entries []LogEntry) error {
lock, err := Lock(job.GetJobBaseFileName(), "LOG")
if err != nil {
return err
}
defer func() {
Unlock(lock)
}()
logDir, err := getLibDir("states")
if err != nil {
return err
}
var logPath = filepath.FromSlash(fmt.Sprintf("%v/%v.log.json", logDir, job.GetJobBaseFileName()))
logDb, err := LoadLog(logPath)
if err != nil {
return err
}
logDb.JobFullName = job.GetJobBaseFileName()
var maxId int = 1
if len(logDb.Entries) > 0 {
maxId = logDb.Entries[len(logDb.Entries)-1].Id
}
for _, e := range entries {
maxId++
e.Id = maxId
logDb.Entries = append(logDb.Entries, e)
}
for len(logDb.Entries) > MAX_LOG_ENTRIES {
logDb.Entries = logDb.Entries[:1]
}
return logDb.Save(logPath)
}
func ShowAllLogs() {
dirs := findAllStateDirs()
var remote = os.Getenv("REMOTE")
if remote == "" {
remote = "-no-remote-"
}
for _, d := range dirs {
fsi, err := ioutil.ReadDir(d.Path)
log.Printf("Scanning path %v", d.Path)
if err != nil {
continue
}
for _, e := range fsi {
if strings.LastIndex(e.Name(), ".log.json") < 0 {
continue
}
p := filepath.FromSlash(d.Path + "/" + e.Name())
jfn := strings.Replace(e.Name(), ".log.json", "", -1)
lock, err := Lock(jfn, "LOG")
if err != nil {
log.Printf("Can't lock log %v: %v, skipping!", p, err)
continue
}
defer func() {
Unlock(lock)
}()
log.Printf("Processing log file %v", p)
logDb, err := LoadLog(p)
if err != nil {
log.Printf("Can't read log %v: %v, skipping!", p, err)
continue
}
var maxSent = logDb.MaxSentByRemote[remote]
fmt.Printf("[[[JobWatch %v/%v]]]\n", d.UserId, logDb.JobFullName)
var sentRem = MAX_LOG_SEND
for _, l := range logDb.Entries {
if l.Id <= maxSent {
continue
}
fmt.Printf("%v %v: %v\n", l.State, l.Date, l.Text)
maxSent = l.Id
sentRem--
if sentRem <= 0 {
break
}
}
if logDb.MaxSentByRemote == nil {
logDb.MaxSentByRemote = map[string]int{}
}
logDb.MaxSentByRemote[remote] = maxSent
logDb.Save(p)
}
}
}
func ToLogState(s types.CMKState) LogEntryState {
var p = LOG_UNKWN
switch s {
case types.OK:
p = LOG_OK
case types.WARN:
p = LOG_WARN
case types.CRIT:
p = LOG_CRIT
}
return p
}

View File

@ -45,21 +45,29 @@ type summary struct {
Entries []summaryPerUser `json:"all"`
}
func getStateDir() (string, error) {
func getLibDir(subDir string) (string, error) {
var dir string
dir = "/var/lib/jobwatch/states"
dir = "/var/lib/jobwatch"
if os.Getuid() > 0 {
if od, err := os.UserHomeDir(); err == nil {
dir = od + "/.jobwatch/states"
dir = od + "/.jobwatch"
}
}
if len(subDir) > 0 {
dir = filepath.FromSlash(dir + "/" + subDir)
}
err := os.MkdirAll(dir, 0770)
return dir, err
}
func getStateDir() (string, error) {
return getLibDir("states")
}
func findAllStateDirs() []summaryPerUser {
var dirs []summaryPerUser
var res []summaryPerUser
@ -115,10 +123,6 @@ func LoadState(path string) (*State, error) {
return &res, nil
}
func WriteLog(job types.Job, line ...string) {
fmt.Println("B")
}
func (state *State) ReEval() error {
lr, err := time.Parse(time.RFC3339, state.LastRun)
if err != nil {
@ -184,6 +188,10 @@ func ShowAll() error {
}
for _, e := range fsi {
if strings.LastIndex(e.Name(), ".state.json") < 0 {
continue
}
p := filepath.FromSlash(d.Path + "/" + e.Name())
log.Printf("Processing state file %v", p)

View File

@ -78,8 +78,18 @@ func LoadJob(jobdir string, jobid string) (*Job, error) {
if err == nil {
log.Printf("%+v\n", job)
} else {
return nil, fmt.Errorf("! Error reading job %v: %+v\n", jobfile, err)
s := fmt.Sprintf("! Error reading job %v: %+v\n", jobfile, err)
return nil, fmt.Errorf(s)
}
job.Id = jobid
return &job, nil
}
func (j Job) GetJobBaseFileName() string {
if j.InstId == "" {
return filepath.FromSlash(fmt.Sprintf("%v", j.Id))
} else {
return filepath.FromSlash(fmt.Sprintf("%v_%v", j.Id, j.InstId))
}
}