瀏覽代碼

Add display of identifying labels + reworked the structure

Brendan Abolivier 7 年之前
父節點
當前提交
ff134181ad
簽署人: Brendan Abolivier <contact@brendanabolivier.com> GPG 金鑰 ID: 8EF1500759F70623

+ 9
- 38
src/metrics-alerting/alert/alert.go 查看文件

@@ -4,54 +4,25 @@ import (
4 4
 	"fmt"
5 5
 
6 6
 	"metrics-alerting/config"
7
-	"metrics-alerting/warp10"
8
-)
9
-
10
-func ProcessNumber(
11
-	client warp10.Warp10Client,
12
-	script config.Script,
13
-	ms config.MailSettings,
14
-) error {
15
-	value, err := client.ReadNumber(script.Script)
16
-	if err != nil {
17
-		return err
18
-	}
19
-
20
-	if value < script.Threshold {
21
-		// Nothing to alert about
22
-		return nil
23
-	}
24 7
 
25
-	return alert(script, value, ms)
26
-}
27
-
28
-func ProcessBool(
29
-	client warp10.Warp10Client,
30
-	script config.Script,
31
-	ms config.MailSettings,
32
-) error {
33
-	value, err := client.ReadBool(script.Script)
34
-	if err != nil {
35
-		return err
36
-	}
37
-
38
-	if value {
39
-		return nil
40
-	}
8
+	"gopkg.in/gomail.v2"
9
+)
41 10
 
42
-	return alert(script, value, ms)
11
+type Alerter struct {
12
+	Dialer *gomail.Dialer
13
+	Sender string
43 14
 }
44 15
 
45
-func alert(
16
+func (a *Alerter) Alert(
46 17
 	script config.Script,
47 18
 	result interface{},
48
-	ms config.MailSettings,
19
+	labels map[string]string,
49 20
 ) error {
50 21
 	switch script.Action {
51 22
 	case "http":
52
-		return alertHttp(script, result)
23
+		return a.alertHttp(script, result, labels)
53 24
 	case "email":
54
-		return alertEmail(script, result, ms)
25
+		return a.alertEmail(script, result, labels)
55 26
 	default:
56 27
 		return fmt.Errorf("invalid action type: %s", script.Action)
57 28
 	}

+ 52
- 28
src/metrics-alerting/alert/email.go 查看文件

@@ -1,55 +1,79 @@
1 1
 package alert
2 2
 
3 3
 import (
4
+	"encoding/json"
4 5
 	"fmt"
6
+	"strings"
5 7
 
6 8
 	"metrics-alerting/config"
7 9
 
8 10
 	"gopkg.in/gomail.v2"
9 11
 )
10 12
 
11
-func alertEmail(
13
+func (a *Alerter) alertEmail(
12 14
 	script config.Script,
13 15
 	result interface{},
14
-	ms config.MailSettings,
16
+	labels map[string]string,
15 17
 ) error {
16
-	formatNumber := `
17
-Script %s just exceeded its threshold of %f and now returns %f
18
-
19
-Script:
20
-
21
-%s
22
-	`
23
-
24
-	formatBool := `
25
-Test for script %s and returned false instead of true
26
-
27
-Script:
28
-
29
-%s
30
-	`
18
+	formatNumber := "Script %s just exceeded its threshold of %.2f and now returns %f"
19
+	formatBool := "Test for script %s and returned false instead of true"
31 20
 
32 21
 	var body, subject string
33 22
 	switch script.Type {
34
-	case "number":
35
-		subject = fmt.Sprintf("Threshold exceeded for script %s", script.Key)
23
+	case "number", "series":
24
+		subject = fmt.Sprintf(
25
+			"Threshold exceeded for script %s %s", script.Key,
26
+			getIdentifyingLabels(script, labels),
27
+		)
36 28
 		body = fmt.Sprintf(
37 29
 			formatNumber, script.Key, script.Threshold, result.(float64),
38
-			script.Script,
39 30
 		)
40 31
 	case "bool":
41
-		subject = fmt.Sprintf("Test for script %s failed", script.Key)
42
-		body = fmt.Sprintf(formatBool, script.Key, script.Script)
32
+		subject = fmt.Sprintf(
33
+			"Test for script %s failed %s", script.Key,
34
+			getIdentifyingLabels(script, labels),
35
+		)
36
+		body = fmt.Sprintf(formatBool, script.Key)
43 37
 	}
44 38
 
39
+	if labels != nil {
40
+		jsonLabels, err := json.Marshal(labels)
41
+		if err != nil {
42
+			return err
43
+		}
44
+		body = fmt.Sprintf("%s\n\nLabels: %+v", body, string(jsonLabels))
45
+	}
46
+
47
+	body = fmt.Sprintf("%s\n\nScript:\n%s", body, script.Script)
48
+
45 49
 	m := gomail.NewMessage()
46
-	m.SetHeader("From", ms.Sender)
47
-	m.SetHeader("To", ms.Recipient)
50
+	m.SetHeader("From", a.Sender)
51
+	m.SetHeader("To", script.Target)
48 52
 	m.SetHeader("Subject", subject)
49 53
 	m.SetBody("text/plain", body)
50 54
 
51
-	d := gomail.NewDialer(
52
-		ms.SMTP.Host, ms.SMTP.Port, ms.SMTP.Username, ms.SMTP.Password,
53
-	)
54
-	return d.DialAndSend(m)
55
+	return a.Dialer.DialAndSend(m)
56
+}
57
+
58
+func getIdentifyingLabels(
59
+	script config.Script,
60
+	labels map[string]string,
61
+) string {
62
+	if len(script.IdentifyingLabels) == 0 {
63
+		return ""
64
+	}
65
+
66
+	identifyingLabels := make(map[string]string)
67
+	for _, label := range script.IdentifyingLabels {
68
+		identifyingLabels[label] = labels[label]
69
+	}
70
+
71
+	labelsAsStrs := []string{}
72
+	var labelAsStr string
73
+	for key, value := range identifyingLabels {
74
+		labelAsStr = key + ": " + value
75
+		labelsAsStrs = append(labelsAsStrs, labelAsStr)
76
+	}
77
+
78
+	return "(" + strings.Join(labelsAsStrs, ", ") + ")"
55 79
 }

+ 11
- 5
src/metrics-alerting/alert/http.go 查看文件

@@ -11,11 +11,16 @@ import (
11 11
 )
12 12
 
13 13
 type alertBody struct {
14
-	Key   string `json:"scriptKey"`
15
-	Value string `json:"value"`
14
+	Key    string            `json:"scriptKey"`
15
+	Value  string            `json:"value"`
16
+	Labels map[string]string `json:"labels"`
16 17
 }
17 18
 
18
-func alertHttp(script config.Script, result interface{}) error {
19
+func (a *Alerter) alertHttp(
20
+	script config.Script,
21
+	result interface{},
22
+	labels map[string]string,
23
+) error {
19 24
 	var value string
20 25
 	switch script.Type {
21 26
 	case "number":
@@ -25,8 +30,9 @@ func alertHttp(script config.Script, result interface{}) error {
25 30
 	}
26 31
 
27 32
 	alert := alertBody{
28
-		Key:   script.Key,
29
-		Value: value,
33
+		Key:    script.Key,
34
+		Value:  value,
35
+		Labels: labels,
30 36
 	}
31 37
 
32 38
 	body, err := json.Marshal(alert)

+ 3
- 2
src/metrics-alerting/config/config.go 查看文件

@@ -9,8 +9,6 @@ import (
9 9
 type MailSettings struct {
10 10
 	// Sender of the alert emails
11 11
 	Sender string `yaml:"sender"`
12
-	// Recipient of the alert emails
13
-	Recipient string `yaml:"recipient"`
14 12
 	// Settings to connect to the mail server
15 13
 	SMTP SMTPSettings `yaml:"smtp"`
16 14
 }
@@ -40,6 +38,9 @@ type Script struct {
40 38
 	Action string `yaml:"action"`
41 39
 	// The action's target
42 40
 	Target string `yaml:"target"`
41
+	// The labels that will be mentioned in the email subject, only required if
42
+	// the action is "email"
43
+	IdentifyingLabels []string `yaml:"identifying_labels,omitempty"`
43 44
 }
44 45
 
45 46
 type Config struct {

+ 15
- 2
src/metrics-alerting/main.go 查看文件

@@ -6,9 +6,11 @@ import (
6 6
 
7 7
 	"metrics-alerting/alert"
8 8
 	"metrics-alerting/config"
9
+	"metrics-alerting/process"
9 10
 	"metrics-alerting/warp10"
10 11
 
11 12
 	"github.com/sirupsen/logrus"
13
+	"gopkg.in/gomail.v2"
12 14
 )
13 15
 
14 16
 var (
@@ -23,15 +25,26 @@ func main() {
23 25
 		ExecEndpoint: cfg.Warp10Exec,
24 26
 		ReadToken:    cfg.ReadToken,
25 27
 	}
28
+	dialer := gomail.NewDialer(
29
+		cfg.Mail.SMTP.Host, cfg.Mail.SMTP.Port, cfg.Mail.SMTP.Username,
30
+		cfg.Mail.SMTP.Password,
31
+	)
32
+	alerter := alert.Alerter{
33
+		Dialer: dialer,
34
+		Sender: cfg.Mail.Sender,
35
+	}
26 36
 
27 37
 	for _, script := range cfg.Scripts {
28 38
 		var err error
29 39
 		switch script.Type {
30 40
 		case "number":
31
-			err = alert.ProcessNumber(client, script, cfg.Mail)
41
+			err = process.ProcessNumber(client, script, alerter)
32 42
 			break
33 43
 		case "bool":
34
-			err = alert.ProcessBool(client, script, cfg.Mail)
44
+			err = process.ProcessBool(client, script, alerter)
45
+			break
46
+		case "series":
47
+			err = process.ProcessSeries(client, script, alerter)
35 48
 			break
36 49
 		default:
37 50
 			err = fmt.Errorf("invalid return type: %s", script.Type)

+ 97
- 0
src/metrics-alerting/process/process.go 查看文件

@@ -0,0 +1,97 @@
1
+package process
2
+
3
+import (
4
+	"time"
5
+
6
+	"metrics-alerting/alert"
7
+	"metrics-alerting/config"
8
+	"metrics-alerting/warp10"
9
+)
10
+
11
+func ProcessNumber(
12
+	client warp10.Warp10Client,
13
+	script config.Script,
14
+	alerter alert.Alerter,
15
+) error {
16
+	value, err := client.ReadNumber(script.Script)
17
+	if err != nil {
18
+		return err
19
+	}
20
+
21
+	return processFloat(value, script, alerter, nil)
22
+}
23
+
24
+func ProcessBool(
25
+	client warp10.Warp10Client,
26
+	script config.Script,
27
+	alerter alert.Alerter,
28
+) error {
29
+	value, err := client.ReadBool(script.Script)
30
+	if err != nil {
31
+		return err
32
+	}
33
+
34
+	if value {
35
+		return nil
36
+	}
37
+
38
+	return alerter.Alert(script, value, nil)
39
+}
40
+
41
+func ProcessSeries(
42
+	client warp10.Warp10Client,
43
+	script config.Script,
44
+	alerter alert.Alerter,
45
+) error {
46
+	series, err := client.ReadSeriesOfNumbers(script.Script)
47
+
48
+	for _, serie := range series {
49
+		if !isRecentEnough(serie.Datapoints[0]) {
50
+			// If the serie hasn't been active in the last 10min, don't consider
51
+			// it
52
+			// TODO: If the serie was active at the previous run, send an alert
53
+			continue
54
+		}
55
+
56
+		// Remove useless ".app" label
57
+		_, ok := serie.Labels[".app"]
58
+		if ok {
59
+			delete(serie.Labels, ".app")
60
+		}
61
+		// TODO: Currently we only process the most recent point.
62
+		// If the datapoint is above the threshold, we should crawl back in time
63
+		// to find when the situation began so we can add info about time in the
64
+		// alert.
65
+		if err = processFloat(
66
+			serie.Datapoints[0][1], script, alerter, serie.Labels,
67
+		); err != nil {
68
+			return err
69
+		}
70
+	}
71
+
72
+	return nil
73
+}
74
+
75
+func processFloat(
76
+	value float64,
77
+	script config.Script,
78
+	alerter alert.Alerter,
79
+	labels map[string]string,
80
+) error {
81
+	if value < script.Threshold {
82
+		// Nothing to alert about
83
+		return nil
84
+	}
85
+
86
+	return alerter.Alert(script, value, labels)
87
+}
88
+
89
+func isRecentEnough(datapoint []float64) bool {
90
+	// Allowed offset between a point and now is 10min
91
+	allowedOffset := int64(600000000)
92
+
93
+	now := time.Now().UnixNano() / 1000 // Current timestamp (seconds)
94
+
95
+	return now-int64(datapoint[0]) <= allowedOffset
96
+	return false
97
+}

+ 14
- 0
src/metrics-alerting/warp10/serie.go 查看文件

@@ -0,0 +1,14 @@
1
+package warp10
2
+
3
+type FloatTimeSerie struct {
4
+	// Class name of the serie
5
+	Class string `json:"c"`
6
+	// Labels of the serie
7
+	Labels map[string]string `json:"l"`
8
+	// Attributes of the serie
9
+	Attributes map[string]string `json:"a"`
10
+	// Datapoints: each element of the slice is a point, represented as a slice
11
+	// with the timestamp as its first element and the value of the point as its
12
+	// second element
13
+	Datapoints [][]float64 `json:"v"`
14
+}

+ 10
- 0
src/metrics-alerting/warp10/warp10.go 查看文件

@@ -42,6 +42,16 @@ func (w *Warp10Client) ReadNumber(script string) (f float64, err error) {
42 42
 	return
43 43
 }
44 44
 
45
+func (w *Warp10Client) ReadSeriesOfNumbers(script string) (f []FloatTimeSerie, err error) {
46
+	resp, err := w.sendRequest(script)
47
+	if err != nil {
48
+		return
49
+	}
50
+
51
+	err = json.NewDecoder(resp.Body).Decode(&f)
52
+	return
53
+}
54
+
45 55
 func (w *Warp10Client) appendToken(script string) string {
46 56
 	return fmt.Sprintf("'%s' 'token' STORE\n%s", w.ReadToken, script)
47 57
 }