Added log/logwatch-features. Locking. Other features
This commit is contained in:
parent
91fedca64a
commit
d4c130360b
18
README.md
18
README.md
|
|
@ -11,22 +11,24 @@ But if you want to ensure the script really run, in the required interval, compl
|
|||
without error, you have to add a lot of plumping code.
|
||||
|
||||
jobwatch helps to migitate this problem. It wraps the execution of your script, and does
|
||||
all the rest for you, by providing output, which can be fed to CheckMK then
|
||||
Agent-Plugin-Mechanism.
|
||||
all the rest for you, by providing output, which can be fed to CheckMK by the
|
||||
Agent-Plugin-mechanism.
|
||||
|
||||
jobwatch
|
||||
- checks for required exit-codes
|
||||
- searches through the output of your script via regex to classify certain
|
||||
keywords as WARN or CRIT
|
||||
- let you define the required run-interval
|
||||
- checks for required _exit-codes_, and maps them to _OK, WARN, CRIT, UNKN (0-3)_
|
||||
- _searches_ through the _console-output_ of your script via _regex_ to classify certain
|
||||
keywords as OK, WARN or CRIT (0, 1, 2)
|
||||
- sends previous regex matches to _logwatch_
|
||||
- prevents running your job in multiple instances (previous job took longer than expected)
|
||||
- let you define the required run-intervals
|
||||
|
||||
Simply add a .job-file into /etc/jobwatch.d where you defined all of this,
|
||||
Simply add a .job-file into /etc/jobwatch.d where you defines all of this,
|
||||
call you script via jobwatch -j job in the crontab and you are done.
|
||||
|
||||
TBD:
|
||||
- document deployment
|
||||
- document job-file (see included sample)
|
||||
- feed logoutput to CheckMK
|
||||
- redirect script-output to a logfile via Config
|
||||
- job-timeout
|
||||
- more metrics (last-success-ful run )
|
||||
|
||||
|
|
|
|||
|
|
@ -2,4 +2,7 @@ module jobwatch
|
|||
|
||||
go 1.18
|
||||
|
||||
require gopkg.in/yaml.v2 v2.4.0
|
||||
require (
|
||||
github.com/nightlyone/lockfile v1.0.0
|
||||
gopkg.in/yaml.v2 v2.4.0
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
github.com/nightlyone/lockfile v1.0.0 h1:RHep2cFKK4PonZJDdEl4GmkabuhbsRMgk/k3uAmxBiA=
|
||||
github.com/nightlyone/lockfile v1.0.0/go.mod h1:rywoIealpdNse2r832aiD9jRk8ErCatROs6LzC841CI=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
|
||||
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
|
||||
|
|
|
|||
77
app/main.go
77
app/main.go
|
|
@ -21,15 +21,15 @@ import (
|
|||
|
||||
// Jobs root-idfile
|
||||
|
||||
func evalOutput(job types.Job, output []string) (types.CMKState, error) {
|
||||
func evalOutput(job types.Job, output []string) (types.CMKState, []state.LogEntry, error) {
|
||||
var res types.CMKState = types.OK
|
||||
var logLines []string
|
||||
var logLines []state.LogEntry
|
||||
var rcs []*regexp.Regexp
|
||||
|
||||
for _, m := range job.LogMatches {
|
||||
r, err := regexp.Compile(m.Regex)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
return 0, logLines, err
|
||||
} else {
|
||||
rcs = append(rcs, r)
|
||||
}
|
||||
|
|
@ -47,20 +47,12 @@ func evalOutput(job types.Job, output []string) (types.CMKState, error) {
|
|||
v = fmt.Sprintf(m.AltMsg, v)
|
||||
}
|
||||
|
||||
var p = ""
|
||||
switch s := m.State; s {
|
||||
case types.OK:
|
||||
p = "O"
|
||||
case types.WARN:
|
||||
p = "W"
|
||||
case types.CRIT:
|
||||
p = "C"
|
||||
case types.UNKNOWN:
|
||||
p = "U"
|
||||
}
|
||||
p += ": "
|
||||
logLines = append(logLines, time.Now().Format("2006-01-2 15:04:05")+" "+p+v)
|
||||
|
||||
var le state.LogEntry
|
||||
le.State = state.ToLogState(m.State)
|
||||
le.Date = time.Now().Format("2006-01-2 15:04:05")
|
||||
le.Text = v
|
||||
logLines = append(logLines, le)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -68,10 +60,10 @@ func evalOutput(job types.Job, output []string) (types.CMKState, error) {
|
|||
log.Printf("- Matched LogLine %v\n", oln)
|
||||
}
|
||||
|
||||
return res, nil
|
||||
return res, logLines, nil
|
||||
}
|
||||
|
||||
func runJob(job types.Job) (state.State, error) {
|
||||
func runJob(job types.Job) (state.State, []state.LogEntry, error) {
|
||||
|
||||
st := state.StateFromJob(job)
|
||||
st.LastExitCode = -1
|
||||
|
|
@ -96,7 +88,7 @@ func runJob(job types.Job) (state.State, error) {
|
|||
stcode = exitError.ProcessState.ExitCode()
|
||||
} else {
|
||||
// cmd not found etc
|
||||
return st, err
|
||||
return st, []state.LogEntry{}, err
|
||||
}
|
||||
if stcode == 0 { // Exec failed, but exitcode, be sure to get error!
|
||||
stcode = int(types.UNKNOWN)
|
||||
|
|
@ -119,7 +111,7 @@ func runJob(job types.Job) (state.State, error) {
|
|||
log.Printf("- State after ExitCodeMapping: %v", stcode)
|
||||
|
||||
outText := string(stdcombinedBuf.Bytes())
|
||||
log_state, err := evalOutput(job, strings.Split(outText, "\n"))
|
||||
log_state, logLines, err := evalOutput(job, strings.Split(outText, "\n"))
|
||||
if int(log_state) > stcode {
|
||||
stcode = int(log_state)
|
||||
}
|
||||
|
|
@ -130,7 +122,7 @@ func runJob(job types.Job) (state.State, error) {
|
|||
st.LastState = stcode
|
||||
st.LastRun = time.Now().Format(time.RFC3339)
|
||||
|
||||
return st, err
|
||||
return st, logLines, err
|
||||
}
|
||||
|
||||
func setup() (*types.Job, error) {
|
||||
|
|
@ -172,6 +164,7 @@ func setup() (*types.Job, error) {
|
|||
job.InstId = job_instance
|
||||
return job, nil
|
||||
} else {
|
||||
log.Printf("! Error loading job : %v\n", err)
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
|
@ -182,26 +175,50 @@ func main() {
|
|||
if job == nil && err == nil {
|
||||
fmt.Println("<<<jobwatch:sep(0)>>>")
|
||||
state.ShowAll()
|
||||
fmt.Println("<<<logwatch>>>")
|
||||
state.ShowAllLogs()
|
||||
fmt.Println("<<<>>>")
|
||||
} else {
|
||||
fmt.Fprintln(os.Stderr, "Jobwatch 0.1 (C) 2022 hoess@gmx.net")
|
||||
|
||||
var exitCode = 0
|
||||
var logLines []state.LogEntry
|
||||
var res state.State
|
||||
if err == nil {
|
||||
res, err = runJob(*job)
|
||||
}
|
||||
|
||||
res.ReEval()
|
||||
res.SaveState()
|
||||
defer func() {
|
||||
os.Exit(exitCode)
|
||||
}()
|
||||
|
||||
lock, err := state.Lock(job.GetJobBaseFileName(), "JOB")
|
||||
log.Println("Locked on job")
|
||||
if err == nil {
|
||||
defer func() {
|
||||
log.Println("Unlocking job")
|
||||
state.Unlock(lock)
|
||||
}()
|
||||
|
||||
if err != nil {
|
||||
log.Println("Can't acquire lock, skipping")
|
||||
}
|
||||
|
||||
if err == nil {
|
||||
res, logLines, err = runJob(*job)
|
||||
res.ReEval()
|
||||
res.SaveState()
|
||||
}
|
||||
|
||||
if err == nil {
|
||||
err = state.AppendLog(*job, logLines)
|
||||
}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
fmt.Printf("! Error running job %+v\n", err)
|
||||
os.Exit(int(types.UNKNOWN))
|
||||
exitCode = int(types.UNKNOWN)
|
||||
} else {
|
||||
os.Exit(int(res.LastState))
|
||||
exitCode = int(res.LastState)
|
||||
}
|
||||
|
||||
state.WriteLog(*job)
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,8 +13,10 @@ log_matches:
|
|||
state: 1
|
||||
- regex: "-"
|
||||
state: 2
|
||||
alt_msg: "User logged in at console (%v)"
|
||||
hide_output: True
|
||||
alt_msg: "%v -> a user is logged in at console"
|
||||
- regex: .+
|
||||
state: 0
|
||||
hide_output: False
|
||||
last_run_warn:
|
||||
val: 8
|
||||
unit: "h"
|
||||
|
|
|
|||
|
|
@ -0,0 +1,45 @@
|
|||
package state
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
"github.com/nightlyone/lockfile"
|
||||
)
|
||||
|
||||
const LOCk_WAIT_MILLIS = 30000
|
||||
|
||||
func Lock(jobFullName string, lockType string) (*lockfile.Lockfile, error) {
|
||||
|
||||
fn := fmt.Sprintf("%v_%v", jobFullName, lockType)
|
||||
lock, err := lockfile.New(filepath.Join(os.TempDir(), fn))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Cannot init lock. reason: %v", err)
|
||||
}
|
||||
|
||||
var retries = 300
|
||||
for retries > 0 {
|
||||
err = lock.TryLock()
|
||||
if err == nil {
|
||||
log.Printf("Locked via lockfile %v", lock)
|
||||
return &lock, nil
|
||||
}
|
||||
if _, ok := err.(interface{ Temporary() bool }); ok {
|
||||
retries--
|
||||
time.Sleep(time.Millisecond * 100)
|
||||
} else {
|
||||
retries = -1
|
||||
}
|
||||
}
|
||||
return nil, fmt.Errorf("Timeout acquiring lock %v/%v", jobFullName, lockType)
|
||||
}
|
||||
|
||||
func Unlock(lock *lockfile.Lockfile) {
|
||||
if lock != nil {
|
||||
log.Printf("Unlock lockfile %v", *lock)
|
||||
lock.Unlock()
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,188 @@
|
|||
package state
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"jobwatch/types"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type LogEntryState string
|
||||
|
||||
const MAX_LOG_ENTRIES = 2500
|
||||
const MAX_LOG_SEND = 250
|
||||
|
||||
const (
|
||||
LOG_OK LogEntryState = "O"
|
||||
LOG_WARN LogEntryState = "W"
|
||||
LOG_CRIT LogEntryState = "C"
|
||||
LOG_UNKWN LogEntryState = "U"
|
||||
)
|
||||
|
||||
type LogEntry struct {
|
||||
Id int `json:"id"`
|
||||
Date string `json:"date"`
|
||||
State LogEntryState `json:"state"`
|
||||
Text string `json:"text"`
|
||||
}
|
||||
|
||||
type LogDb struct {
|
||||
JobFullName string `json:"job_full_name"`
|
||||
MaxSentByRemote map[string]int `json:"max_sent_by_remote"`
|
||||
Entries []LogEntry `json:"entries"`
|
||||
}
|
||||
|
||||
func LoadLog(logPath string) (*LogDb, error) {
|
||||
|
||||
if _, err := os.Stat(logPath); err != nil {
|
||||
log.Println("Existing log not readable, creating new")
|
||||
return &LogDb{}, nil
|
||||
}
|
||||
|
||||
data, err := os.ReadFile(logPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var logDb LogDb
|
||||
err = json.Unmarshal(data, &logDb)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Error unmarshaling state: %v", err)
|
||||
}
|
||||
|
||||
log.Println("existing log loaded")
|
||||
return &logDb, nil
|
||||
}
|
||||
|
||||
func (logDb LogDb) Save(logPath string) error {
|
||||
|
||||
data, err := json.MarshalIndent(logDb, " ", " ")
|
||||
if err != nil {
|
||||
return fmt.Errorf("Error marshaling state: %v", err)
|
||||
}
|
||||
|
||||
if err := os.WriteFile(logPath, data, 0600); err != nil {
|
||||
return fmt.Errorf("Error writing log file %v: %v", logPath, err)
|
||||
}
|
||||
|
||||
log.Println("Log written")
|
||||
return nil
|
||||
}
|
||||
|
||||
func AppendLog(job types.Job, entries []LogEntry) error {
|
||||
|
||||
lock, err := Lock(job.GetJobBaseFileName(), "LOG")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() {
|
||||
Unlock(lock)
|
||||
}()
|
||||
|
||||
logDir, err := getLibDir("states")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
var logPath = filepath.FromSlash(fmt.Sprintf("%v/%v.log.json", logDir, job.GetJobBaseFileName()))
|
||||
|
||||
logDb, err := LoadLog(logPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
logDb.JobFullName = job.GetJobBaseFileName()
|
||||
var maxId int = 1
|
||||
if len(logDb.Entries) > 0 {
|
||||
maxId = logDb.Entries[len(logDb.Entries)-1].Id
|
||||
}
|
||||
|
||||
for _, e := range entries {
|
||||
maxId++
|
||||
e.Id = maxId
|
||||
logDb.Entries = append(logDb.Entries, e)
|
||||
}
|
||||
|
||||
for len(logDb.Entries) > MAX_LOG_ENTRIES {
|
||||
logDb.Entries = logDb.Entries[:1]
|
||||
}
|
||||
|
||||
return logDb.Save(logPath)
|
||||
}
|
||||
|
||||
func ShowAllLogs() {
|
||||
dirs := findAllStateDirs()
|
||||
|
||||
var remote = os.Getenv("REMOTE")
|
||||
if remote == "" {
|
||||
remote = "-no-remote-"
|
||||
}
|
||||
|
||||
for _, d := range dirs {
|
||||
fsi, err := ioutil.ReadDir(d.Path)
|
||||
log.Printf("Scanning path %v", d.Path)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
for _, e := range fsi {
|
||||
if strings.LastIndex(e.Name(), ".log.json") < 0 {
|
||||
continue
|
||||
}
|
||||
p := filepath.FromSlash(d.Path + "/" + e.Name())
|
||||
|
||||
jfn := strings.Replace(e.Name(), ".log.json", "", -1)
|
||||
lock, err := Lock(jfn, "LOG")
|
||||
if err != nil {
|
||||
log.Printf("Can't lock log %v: %v, skipping!", p, err)
|
||||
continue
|
||||
}
|
||||
defer func() {
|
||||
Unlock(lock)
|
||||
}()
|
||||
|
||||
log.Printf("Processing log file %v", p)
|
||||
logDb, err := LoadLog(p)
|
||||
if err != nil {
|
||||
log.Printf("Can't read log %v: %v, skipping!", p, err)
|
||||
continue
|
||||
}
|
||||
var maxSent = logDb.MaxSentByRemote[remote]
|
||||
|
||||
fmt.Printf("[[[JobWatch %v/%v]]]\n", d.UserId, logDb.JobFullName)
|
||||
var sentRem = MAX_LOG_SEND
|
||||
for _, l := range logDb.Entries {
|
||||
if l.Id <= maxSent {
|
||||
continue
|
||||
}
|
||||
|
||||
fmt.Printf("%v %v: %v\n", l.State, l.Date, l.Text)
|
||||
maxSent = l.Id
|
||||
sentRem--
|
||||
if sentRem <= 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
if logDb.MaxSentByRemote == nil {
|
||||
logDb.MaxSentByRemote = map[string]int{}
|
||||
}
|
||||
logDb.MaxSentByRemote[remote] = maxSent
|
||||
logDb.Save(p)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func ToLogState(s types.CMKState) LogEntryState {
|
||||
var p = LOG_UNKWN
|
||||
switch s {
|
||||
case types.OK:
|
||||
p = LOG_OK
|
||||
case types.WARN:
|
||||
p = LOG_WARN
|
||||
case types.CRIT:
|
||||
p = LOG_CRIT
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
|
@ -45,21 +45,29 @@ type summary struct {
|
|||
Entries []summaryPerUser `json:"all"`
|
||||
}
|
||||
|
||||
func getStateDir() (string, error) {
|
||||
func getLibDir(subDir string) (string, error) {
|
||||
var dir string
|
||||
dir = "/var/lib/jobwatch/states"
|
||||
dir = "/var/lib/jobwatch"
|
||||
|
||||
if os.Getuid() > 0 {
|
||||
if od, err := os.UserHomeDir(); err == nil {
|
||||
dir = od + "/.jobwatch/states"
|
||||
dir = od + "/.jobwatch"
|
||||
}
|
||||
}
|
||||
|
||||
if len(subDir) > 0 {
|
||||
dir = filepath.FromSlash(dir + "/" + subDir)
|
||||
}
|
||||
|
||||
err := os.MkdirAll(dir, 0770)
|
||||
|
||||
return dir, err
|
||||
}
|
||||
|
||||
func getStateDir() (string, error) {
|
||||
return getLibDir("states")
|
||||
}
|
||||
|
||||
func findAllStateDirs() []summaryPerUser {
|
||||
var dirs []summaryPerUser
|
||||
var res []summaryPerUser
|
||||
|
|
@ -115,10 +123,6 @@ func LoadState(path string) (*State, error) {
|
|||
return &res, nil
|
||||
}
|
||||
|
||||
func WriteLog(job types.Job, line ...string) {
|
||||
fmt.Println("B")
|
||||
}
|
||||
|
||||
func (state *State) ReEval() error {
|
||||
lr, err := time.Parse(time.RFC3339, state.LastRun)
|
||||
if err != nil {
|
||||
|
|
@ -184,6 +188,10 @@ func ShowAll() error {
|
|||
}
|
||||
|
||||
for _, e := range fsi {
|
||||
if strings.LastIndex(e.Name(), ".state.json") < 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
p := filepath.FromSlash(d.Path + "/" + e.Name())
|
||||
log.Printf("Processing state file %v", p)
|
||||
|
||||
|
|
|
|||
|
|
@ -78,8 +78,18 @@ func LoadJob(jobdir string, jobid string) (*Job, error) {
|
|||
if err == nil {
|
||||
log.Printf("%+v\n", job)
|
||||
} else {
|
||||
return nil, fmt.Errorf("! Error reading job %v: %+v\n", jobfile, err)
|
||||
s := fmt.Sprintf("! Error reading job %v: %+v\n", jobfile, err)
|
||||
return nil, fmt.Errorf(s)
|
||||
}
|
||||
job.Id = jobid
|
||||
return &job, nil
|
||||
}
|
||||
|
||||
func (j Job) GetJobBaseFileName() string {
|
||||
if j.InstId == "" {
|
||||
return filepath.FromSlash(fmt.Sprintf("%v", j.Id))
|
||||
} else {
|
||||
return filepath.FromSlash(fmt.Sprintf("%v_%v", j.Id, j.InstId))
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue