diff --git a/README.md b/README.md index 3d5ada4..db12b0d 100644 --- a/README.md +++ b/README.md @@ -11,22 +11,24 @@ But if you want to ensure the script really run, in the required interval, compl without error, you have to add a lot of plumping code. jobwatch helps to migitate this problem. It wraps the execution of your script, and does -all the rest for you, by providing output, which can be fed to CheckMK then -Agent-Plugin-Mechanism. +all the rest for you, by providing output, which can be fed to CheckMK by the +Agent-Plugin-mechanism. jobwatch -- checks for required exit-codes -- searches through the output of your script via regex to classify certain - keywords as WARN or CRIT -- let you define the required run-interval +- checks for required _exit-codes_, and maps them to _OK, WARN, CRIT, UNKN (0-3)_ +- _searches_ through the _console-output_ of your script via _regex_ to classify certain + keywords as OK, WARN or CRIT (0, 1, 2) +- sends previous regex matches to _logwatch_ +- prevents running your job in multiple instances (previous job took longer than expected) +- let you define the required run-intervals -Simply add a .job-file into /etc/jobwatch.d where you defined all of this, +Simply add a .job-file into /etc/jobwatch.d where you defines all of this, call you script via jobwatch -j job in the crontab and you are done. TBD: - document deployment - document job-file (see included sample) -- feed logoutput to CheckMK - redirect script-output to a logfile via Config +- job-timeout - more metrics (last-success-ful run ) diff --git a/app/go.mod b/app/go.mod index 633de7c..c71474c 100644 --- a/app/go.mod +++ b/app/go.mod @@ -2,4 +2,7 @@ module jobwatch go 1.18 -require gopkg.in/yaml.v2 v2.4.0 +require ( + github.com/nightlyone/lockfile v1.0.0 + gopkg.in/yaml.v2 v2.4.0 +) diff --git a/app/go.sum b/app/go.sum index 7534661..9f40921 100644 --- a/app/go.sum +++ b/app/go.sum @@ -1,3 +1,5 @@ +github.com/nightlyone/lockfile v1.0.0 h1:RHep2cFKK4PonZJDdEl4GmkabuhbsRMgk/k3uAmxBiA= +github.com/nightlyone/lockfile v1.0.0/go.mod h1:rywoIealpdNse2r832aiD9jRk8ErCatROs6LzC841CI= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= diff --git a/app/main.go b/app/main.go index a8e358a..a95dbc3 100644 --- a/app/main.go +++ b/app/main.go @@ -21,15 +21,15 @@ import ( // Jobs root-idfile -func evalOutput(job types.Job, output []string) (types.CMKState, error) { +func evalOutput(job types.Job, output []string) (types.CMKState, []state.LogEntry, error) { var res types.CMKState = types.OK - var logLines []string + var logLines []state.LogEntry var rcs []*regexp.Regexp for _, m := range job.LogMatches { r, err := regexp.Compile(m.Regex) if err != nil { - return 0, err + return 0, logLines, err } else { rcs = append(rcs, r) } @@ -47,20 +47,12 @@ func evalOutput(job types.Job, output []string) (types.CMKState, error) { v = fmt.Sprintf(m.AltMsg, v) } - var p = "" - switch s := m.State; s { - case types.OK: - p = "O" - case types.WARN: - p = "W" - case types.CRIT: - p = "C" - case types.UNKNOWN: - p = "U" - } - p += ": " - logLines = append(logLines, time.Now().Format("2006-01-2 15:04:05")+" "+p+v) - + var le state.LogEntry + le.State = state.ToLogState(m.State) + le.Date = time.Now().Format("2006-01-2 15:04:05") + le.Text = v + logLines = append(logLines, le) + break } } } @@ -68,10 +60,10 @@ func evalOutput(job types.Job, output []string) (types.CMKState, error) { log.Printf("- Matched LogLine %v\n", oln) } - return res, nil + return res, logLines, nil } -func runJob(job types.Job) (state.State, error) { +func runJob(job types.Job) (state.State, []state.LogEntry, error) { st := state.StateFromJob(job) st.LastExitCode = -1 @@ -96,7 +88,7 @@ func runJob(job types.Job) (state.State, error) { stcode = exitError.ProcessState.ExitCode() } else { // cmd not found etc - return st, err + return st, []state.LogEntry{}, err } if stcode == 0 { // Exec failed, but exitcode, be sure to get error! stcode = int(types.UNKNOWN) @@ -119,7 +111,7 @@ func runJob(job types.Job) (state.State, error) { log.Printf("- State after ExitCodeMapping: %v", stcode) outText := string(stdcombinedBuf.Bytes()) - log_state, err := evalOutput(job, strings.Split(outText, "\n")) + log_state, logLines, err := evalOutput(job, strings.Split(outText, "\n")) if int(log_state) > stcode { stcode = int(log_state) } @@ -130,7 +122,7 @@ func runJob(job types.Job) (state.State, error) { st.LastState = stcode st.LastRun = time.Now().Format(time.RFC3339) - return st, err + return st, logLines, err } func setup() (*types.Job, error) { @@ -172,6 +164,7 @@ func setup() (*types.Job, error) { job.InstId = job_instance return job, nil } else { + log.Printf("! Error loading job : %v\n", err) return nil, err } } @@ -182,26 +175,50 @@ func main() { if job == nil && err == nil { fmt.Println("<<>>") state.ShowAll() + fmt.Println("<<>>") + state.ShowAllLogs() fmt.Println("<<<>>>") } else { fmt.Fprintln(os.Stderr, "Jobwatch 0.1 (C) 2022 hoess@gmx.net") + var exitCode = 0 + var logLines []state.LogEntry var res state.State - if err == nil { - res, err = runJob(*job) - } - res.ReEval() - res.SaveState() + defer func() { + os.Exit(exitCode) + }() + + lock, err := state.Lock(job.GetJobBaseFileName(), "JOB") + log.Println("Locked on job") + if err == nil { + defer func() { + log.Println("Unlocking job") + state.Unlock(lock) + }() + + if err != nil { + log.Println("Can't acquire lock, skipping") + } + + if err == nil { + res, logLines, err = runJob(*job) + res.ReEval() + res.SaveState() + } + + if err == nil { + err = state.AppendLog(*job, logLines) + } + } if err != nil { fmt.Printf("! Error running job %+v\n", err) - os.Exit(int(types.UNKNOWN)) + exitCode = int(types.UNKNOWN) } else { - os.Exit(int(res.LastState)) + exitCode = int(res.LastState) } - state.WriteLog(*job) } } diff --git a/app/sample.job.yml b/app/sample.job.yml index 3ed19ea..a3ff92e 100644 --- a/app/sample.job.yml +++ b/app/sample.job.yml @@ -13,8 +13,10 @@ log_matches: state: 1 - regex: "-" state: 2 - alt_msg: "User logged in at console (%v)" -hide_output: True + alt_msg: "%v -> a user is logged in at console" + - regex: .+ + state: 0 +hide_output: False last_run_warn: val: 8 unit: "h" diff --git a/app/state/lock.go b/app/state/lock.go new file mode 100644 index 0000000..d900e10 --- /dev/null +++ b/app/state/lock.go @@ -0,0 +1,45 @@ +package state + +import ( + "fmt" + "log" + "os" + "path/filepath" + "time" + + "github.com/nightlyone/lockfile" +) + +const LOCk_WAIT_MILLIS = 30000 + +func Lock(jobFullName string, lockType string) (*lockfile.Lockfile, error) { + + fn := fmt.Sprintf("%v_%v", jobFullName, lockType) + lock, err := lockfile.New(filepath.Join(os.TempDir(), fn)) + if err != nil { + return nil, fmt.Errorf("Cannot init lock. reason: %v", err) + } + + var retries = 300 + for retries > 0 { + err = lock.TryLock() + if err == nil { + log.Printf("Locked via lockfile %v", lock) + return &lock, nil + } + if _, ok := err.(interface{ Temporary() bool }); ok { + retries-- + time.Sleep(time.Millisecond * 100) + } else { + retries = -1 + } + } + return nil, fmt.Errorf("Timeout acquiring lock %v/%v", jobFullName, lockType) +} + +func Unlock(lock *lockfile.Lockfile) { + if lock != nil { + log.Printf("Unlock lockfile %v", *lock) + lock.Unlock() + } +} diff --git a/app/state/log.go b/app/state/log.go new file mode 100644 index 0000000..2d9c1e7 --- /dev/null +++ b/app/state/log.go @@ -0,0 +1,188 @@ +package state + +import ( + "encoding/json" + "fmt" + "io/ioutil" + "jobwatch/types" + "log" + "os" + "path/filepath" + "strings" +) + +type LogEntryState string + +const MAX_LOG_ENTRIES = 2500 +const MAX_LOG_SEND = 250 + +const ( + LOG_OK LogEntryState = "O" + LOG_WARN LogEntryState = "W" + LOG_CRIT LogEntryState = "C" + LOG_UNKWN LogEntryState = "U" +) + +type LogEntry struct { + Id int `json:"id"` + Date string `json:"date"` + State LogEntryState `json:"state"` + Text string `json:"text"` +} + +type LogDb struct { + JobFullName string `json:"job_full_name"` + MaxSentByRemote map[string]int `json:"max_sent_by_remote"` + Entries []LogEntry `json:"entries"` +} + +func LoadLog(logPath string) (*LogDb, error) { + + if _, err := os.Stat(logPath); err != nil { + log.Println("Existing log not readable, creating new") + return &LogDb{}, nil + } + + data, err := os.ReadFile(logPath) + if err != nil { + return nil, err + } + + var logDb LogDb + err = json.Unmarshal(data, &logDb) + if err != nil { + return nil, fmt.Errorf("Error unmarshaling state: %v", err) + } + + log.Println("existing log loaded") + return &logDb, nil +} + +func (logDb LogDb) Save(logPath string) error { + + data, err := json.MarshalIndent(logDb, " ", " ") + if err != nil { + return fmt.Errorf("Error marshaling state: %v", err) + } + + if err := os.WriteFile(logPath, data, 0600); err != nil { + return fmt.Errorf("Error writing log file %v: %v", logPath, err) + } + + log.Println("Log written") + return nil +} + +func AppendLog(job types.Job, entries []LogEntry) error { + + lock, err := Lock(job.GetJobBaseFileName(), "LOG") + if err != nil { + return err + } + defer func() { + Unlock(lock) + }() + + logDir, err := getLibDir("states") + if err != nil { + return err + } + var logPath = filepath.FromSlash(fmt.Sprintf("%v/%v.log.json", logDir, job.GetJobBaseFileName())) + + logDb, err := LoadLog(logPath) + if err != nil { + return err + } + logDb.JobFullName = job.GetJobBaseFileName() + var maxId int = 1 + if len(logDb.Entries) > 0 { + maxId = logDb.Entries[len(logDb.Entries)-1].Id + } + + for _, e := range entries { + maxId++ + e.Id = maxId + logDb.Entries = append(logDb.Entries, e) + } + + for len(logDb.Entries) > MAX_LOG_ENTRIES { + logDb.Entries = logDb.Entries[:1] + } + + return logDb.Save(logPath) +} + +func ShowAllLogs() { + dirs := findAllStateDirs() + + var remote = os.Getenv("REMOTE") + if remote == "" { + remote = "-no-remote-" + } + + for _, d := range dirs { + fsi, err := ioutil.ReadDir(d.Path) + log.Printf("Scanning path %v", d.Path) + if err != nil { + continue + } + + for _, e := range fsi { + if strings.LastIndex(e.Name(), ".log.json") < 0 { + continue + } + p := filepath.FromSlash(d.Path + "/" + e.Name()) + + jfn := strings.Replace(e.Name(), ".log.json", "", -1) + lock, err := Lock(jfn, "LOG") + if err != nil { + log.Printf("Can't lock log %v: %v, skipping!", p, err) + continue + } + defer func() { + Unlock(lock) + }() + + log.Printf("Processing log file %v", p) + logDb, err := LoadLog(p) + if err != nil { + log.Printf("Can't read log %v: %v, skipping!", p, err) + continue + } + var maxSent = logDb.MaxSentByRemote[remote] + + fmt.Printf("[[[JobWatch %v/%v]]]\n", d.UserId, logDb.JobFullName) + var sentRem = MAX_LOG_SEND + for _, l := range logDb.Entries { + if l.Id <= maxSent { + continue + } + + fmt.Printf("%v %v: %v\n", l.State, l.Date, l.Text) + maxSent = l.Id + sentRem-- + if sentRem <= 0 { + break + } + } + if logDb.MaxSentByRemote == nil { + logDb.MaxSentByRemote = map[string]int{} + } + logDb.MaxSentByRemote[remote] = maxSent + logDb.Save(p) + } + } +} + +func ToLogState(s types.CMKState) LogEntryState { + var p = LOG_UNKWN + switch s { + case types.OK: + p = LOG_OK + case types.WARN: + p = LOG_WARN + case types.CRIT: + p = LOG_CRIT + } + return p +} diff --git a/app/state/state.go b/app/state/state.go index 53698ad..a603d3d 100644 --- a/app/state/state.go +++ b/app/state/state.go @@ -45,21 +45,29 @@ type summary struct { Entries []summaryPerUser `json:"all"` } -func getStateDir() (string, error) { +func getLibDir(subDir string) (string, error) { var dir string - dir = "/var/lib/jobwatch/states" + dir = "/var/lib/jobwatch" if os.Getuid() > 0 { if od, err := os.UserHomeDir(); err == nil { - dir = od + "/.jobwatch/states" + dir = od + "/.jobwatch" } } + if len(subDir) > 0 { + dir = filepath.FromSlash(dir + "/" + subDir) + } + err := os.MkdirAll(dir, 0770) return dir, err } +func getStateDir() (string, error) { + return getLibDir("states") +} + func findAllStateDirs() []summaryPerUser { var dirs []summaryPerUser var res []summaryPerUser @@ -115,10 +123,6 @@ func LoadState(path string) (*State, error) { return &res, nil } -func WriteLog(job types.Job, line ...string) { - fmt.Println("B") -} - func (state *State) ReEval() error { lr, err := time.Parse(time.RFC3339, state.LastRun) if err != nil { @@ -184,6 +188,10 @@ func ShowAll() error { } for _, e := range fsi { + if strings.LastIndex(e.Name(), ".state.json") < 0 { + continue + } + p := filepath.FromSlash(d.Path + "/" + e.Name()) log.Printf("Processing state file %v", p) diff --git a/app/types/types.go b/app/types/types.go index 95208f3..3ea47a4 100644 --- a/app/types/types.go +++ b/app/types/types.go @@ -78,8 +78,18 @@ func LoadJob(jobdir string, jobid string) (*Job, error) { if err == nil { log.Printf("%+v\n", job) } else { - return nil, fmt.Errorf("! Error reading job %v: %+v\n", jobfile, err) + s := fmt.Sprintf("! Error reading job %v: %+v\n", jobfile, err) + return nil, fmt.Errorf(s) } job.Id = jobid return &job, nil } + +func (j Job) GetJobBaseFileName() string { + if j.InstId == "" { + return filepath.FromSlash(fmt.Sprintf("%v", j.Id)) + } else { + return filepath.FromSlash(fmt.Sprintf("%v_%v", j.Id, j.InstId)) + } + +}