Prometheus rate irate increase笔记

rate

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
func extrapolatedRate(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper, isCounter bool, isRate bool) Vector {
ms := args[0].(*parser.MatrixSelector)
vs := ms.VectorSelector.(*parser.VectorSelector)
var (
// samples表示某个metric的某段时间的数据(区间向量)
samples = vals[0].(Matrix)[0]

// enh.Ts: 执行查询的时间.
// ms.Range: 区间向量表达式中括号内的时间转换成Duration.
// vs.Offset: 区间向量表达式中后面跟的offset 的时间转换成Duration.
rangeStart = enh.Ts - durationMilliseconds(ms.Range+vs.Offset)
rangeEnd = enh.Ts - durationMilliseconds(vs.Offset)
)

// No sense in trying to compute a rate without at least two points. Drop
// this Vector element.
if len(samples.Points) < 2 {
return enh.Out
}

resultValue := samples.Points[len(samples.Points)-1].V - samples.Points[0].V
// 如果exporter被重启,conter会从头开始计数,跟前面就不对应了,下面是斧正的逻辑
if isCounter {
var lastValue float64
for _, sample := range samples.Points {
if sample.V < lastValue {
resultValue += lastValue
}
lastValue = sample.V
}
}

// Duration between first/last samples and boundary of range.
// 区间向量第一个指标的时间戳 - rangeStart,除1000表示以Milliseconds为时间单位。
durationToStart := float64(samples.Points[0].T-rangeStart) / 1000
// 区间向量最后个指标(距当前时间最近一次的指标)的时间戳 - rangeStart,除1000表示以Milliseconds为时间单位。
durationToEnd := float64(rangeEnd-samples.Points[len(samples.Points)-1].T) / 1000

// 取区间向量第一个和最后一个指标时间的差值,除1000表示以Milliseconds为时间单位。
sampledInterval := float64(samples.Points[len(samples.Points)-1].T-samples.Points[0].T) / 1000
// 平均时间间隔
averageDurationBetweenSamples := sampledInterval / float64(len(samples.Points)-1)

if isCounter && resultValue > 0 && samples.Points[0].V >= 0 {
// Counters cannot be negative. If we have any slope at
// all (i.e. resultValue went up), we can extrapolate
// the zero point of the counter. If the duration to the
// zero point is shorter than the durationToStart, we
// take the zero point as the start of the series,
// thereby avoiding extrapolation to negative counter
// values.
durationToZero := sampledInterval * (samples.Points[0].V / resultValue)
if durationToZero < durationToStart {
durationToStart = durationToZero
}
}

// If the first/last samples are close to the boundaries of the range,
// extrapolate the result. This is as we expect that another sample
// will exist given the spacing between samples we've seen thus far,
// with an allowance for noise.
extrapolationThreshold := averageDurationBetweenSamples * 1.1
extrapolateToInterval := sampledInterval

if durationToStart < extrapolationThreshold {
extrapolateToInterval += durationToStart
} else {
extrapolateToInterval += averageDurationBetweenSamples / 2
}
if durationToEnd < extrapolationThreshold {
extrapolateToInterval += durationToEnd
} else {
extrapolateToInterval += averageDurationBetweenSamples / 2
}
resultValue = resultValue * (extrapolateToInterval / sampledInterval)
if isRate {
resultValue = resultValue / ms.Range.Seconds()
}

return append(enh.Out, Sample{
Point: Point{V: resultValue},
})
}


// === rate(node parser.ValueTypeMatrix) Vector ===
func funcRate(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) Vector {
return extrapolatedRate(vals, args, enh, true, true)
}

说明:
44行开始,推算就可以算出来具体的数值了,但是有些细节可以补充一下:

durationToStartdurationToEnd受查询时间和metricscrape time影响,如果超出了extrapolationThreshold时间,durationToStartdurationToEnd的值 = averageDurationBetweenSamples / 2

例如:假设当前的指标:A(conter类型),每秒以10的的速度增长,我们需要采集60s的指标,该指标的设置的采集间隔为5s,第一次采集的时间为00:01 00,最后一次的采集时间为00:02 00,当前查询时间为00:02 03,查询语句为A[1m],一般情况下,那么计算逻辑(伪代码)如下:

1
2
3
4
5
6
7
8
rangeStart = "00:02 03" (1m + 0) // 没有offset rangeStart == 00:01 03
rangeEnd = "00:02 03" - 0 // 没有offset rangeEnd == 00:02 03
durationToStart = (第一个metric的时间戳(00:02 00) - rangeStart) / 1000
durationToEnd = (最后一个metric的时间戳(00:01 00) - rangeStart) / 1000

sampledInterval = (第一个metric的时间戳(00:02 00) - (最后一个metric的时间戳(00:01 00) // sampledInterval = 60,0000

averageDurationBetweenSamples = sampledInterval / len(区间向量的数量,也就是12个) // averageDurationBetweenSamples = 60,0000 / 12

剩下的去套上面的程序(从第44行开始),至于resultValue可以随便编一个,但是要合理。

increase

1
2
3
4
// === rate(node parser.ValueTypeMatrix) Vector ===
func funcRate(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) Vector {
return extrapolatedRate(vals, args, enh, true, true)
}

increaserate共用一个函数extrapolatedRate,只是结果不需要执行extrapolatedRate函数第76行的内容。

irate

源代码(2021/4/14):

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41

// === irate(node parser.ValueTypeMatrix) Vector ===
func funcIrate(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) Vector {
return instantValue(vals, enh.Out, true)
}
....

func instantValue(vals []parser.Value, out Vector, isRate bool) Vector {
samples := vals[0].(Matrix)[0]
// No sense in trying to compute a rate without at least two points. Drop
// this Vector element.
if len(samples.Points) < 2 {
return out
}

lastSample := samples.Points[len(samples.Points)-1]
previousSample := samples.Points[len(samples.Points)-2]

var resultValue float64
if isRate && lastSample.V < previousSample.V {
// Counter reset.
resultValue = lastSample.V
} else {
resultValue = lastSample.V - previousSample.V
}

sampledInterval := lastSample.T - previousSample.T
if sampledInterval == 0 {
// Avoid dividing by 0.
return out
}

if isRate {
// Convert to per-second.
resultValue /= float64(sampledInterval) / 1000
}

return append(out, Sample{
Point: Point{V: resultValue},
})
}

irate最终的计算规则:

1
(倒数第一个Metric Value - 减倒数第二个metris Value) / (倒数第一个Metric抓取时间(秒) - 减倒数第二个metris抓取时间(秒))