Skip to content
This repository was archived by the owner on Oct 23, 2024. It is now read-only.

Commit f0bd442

Browse files
Add non-collectd load monitor and make default (#1140)
The old collectd/load monitor is still present for backwards compatibility The metrics are exactly the same except for the lack of a 'plugin' dimension
1 parent fb7640e commit f0bd442

File tree

24 files changed

+304
-14
lines changed

24 files changed

+304
-14
lines changed

deployments/ansible/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ This role sources the following variables:
3838
- type: collectd/df
3939
- type: disk
4040
- type: collectd/interface
41-
- type: collectd/load
41+
- type: load
4242
- type: collectd/memory
4343
- type: collectd/vmem
4444
- type: collectd/signalfx-metadata

deployments/ansible/example-config.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ sfx_agent_config:
77
- type: collectd/df
88
- type: disk
99
- type: collectd/interface
10-
- type: collectd/load
10+
- type: load
1111
- type: collectd/memory
1212
- type: collectd/signalfx-metadata
1313
omitProcessInfo: true

deployments/chef/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ node['signalfx_agent']['conf'] = {
5353
{type: "collectd/df"},
5454
{type: "disk"},
5555
{type: "collectd/interface"},
56-
{type: "collectd/load"},
56+
{type: "load"},
5757
{type: "collectd/memory"},
5858
{"type": "collectd/signalfx-metadata", "omitProcessInfo": true},
5959
{type: "collectd/vmem"}

deployments/chef/example_attrs.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
{"type": "collectd/df"},
1313
{"type": "disk"},
1414
{"type": "collectd/interface"},
15-
{"type": "collectd/load"},
15+
{"type": "load"},
1616
{"type": "collectd/memory"},
1717
{"type": "collectd/signalfx-metadata", "omitProcessInfo": true},
1818
{"type": "collectd/vmem"},

deployments/docker/agent.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ monitors:
2727
hostFSPath: /hostfs
2828
- type: disk
2929
- type: collectd/interface
30-
- type: collectd/load
30+
- type: load
3131
- type: collectd/memory
3232
- type: collectd/vmem
3333
- type: collectd/signalfx-metadata

deployments/ecs/agent.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ monitors:
4141
hostFSPath: /hostfs
4242
- type: disk
4343
- type: collectd/interface
44-
- type: collectd/load
44+
- type: load
4545
- type: collectd/memory
4646
- type: collectd/signalfx-metadata
4747
omitProcessInfo: true

deployments/fargate/agent.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ monitors:
2121
- type: collectd/cpu
2222
- type: disk
2323
- type: collectd/interface
24-
- type: collectd/load
24+
- type: load
2525
- type: collectd/memory
2626
- type: collectd/protocols
2727
- type: collectd/signalfx-metadata

deployments/k8s/configmap.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ data:
3737
hostFSPath: /hostfs
3838
- type: disk
3939
- type: collectd/interface
40-
- type: collectd/load
40+
- type: load
4141
- type: collectd/memory
4242
- type: collectd/protocols
4343
- type: collectd/signalfx-metadata

deployments/k8s/helm/signalfx-agent/templates/configmap.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ data:
6161
hostFSPath: {{ .Values.hostFSPath }}
6262
- type: disk
6363
- type: collectd/interface
64-
- type: collectd/load
64+
- type: load
6565
- type: collectd/memory
6666
- type: collectd/protocols
6767
- type: collectd/signalfx-metadata

deployments/puppet/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ class accepts the following parameters:
2222
{type: "collectd/df"},
2323
{type: "disk"},
2424
{type: "collectd/interface"},
25-
{type: "collectd/load"},
25+
{type: "load"},
2626
{type: "collectd/memory"},
2727
{type: "collectd/protocols"},
2828
{type: "collectd/signalfx-metadata", "omitProcessInfo": true},

deployments/salt/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ signalfx-agent:
4949
- type: collectd/df
5050
- type: disk
5151
- type: collectd/interface
52-
- type: collectd/load
52+
- type: load
5353
- type: collectd/memory
5454
- type: collectd/vmem
5555
- type: collectd/signalfx-metadata

deployments/salt/pillar.example

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ signalfx-agent:
1111
- type: collectd/df
1212
- type: disk
1313
- type: collectd/interface
14-
- type: collectd/load
14+
- type: load
1515
- type: collectd/memory
1616
- type: collectd/signalfx-metadata
1717
omitProcessInfo: true

docs/monitor-config.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ These are all of the monitors included in the agent, along with their possible c
126126
- [kubernetes-proxy](./monitors/kubernetes-proxy.md)
127127
- [kubernetes-scheduler](./monitors/kubernetes-scheduler.md)
128128
- [kubernetes-volumes](./monitors/kubernetes-volumes.md)
129+
- [load](./monitors/load.md)
129130
- [logstash](./monitors/logstash.md)
130131
- [logstash-tcp](./monitors/logstash-tcp.md)
131132
- [memory](./monitors/memory.md)

docs/monitors/collectd-load.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@ Monitor Type: `collectd/load` ([Source](https://github.com/signalfx/signalfx-age
1313
Monitors process load on the host using the collectd
1414
[Load plugin](https://collectd.org/wiki/index.php/Plugin:Load).
1515

16+
**This monitor has been deprecated in favor of the `load` monitor. That
17+
monitor emits the same metrics and is fully compatible. This
18+
`collectd/load` monitor will be removed in a future release of the agent."
19+
1620

1721
## Configuration
1822

docs/monitors/load.md

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
<!--- GENERATED BY gomplate from scripts/docs/templates/monitor-page.md.tmpl --->
2+
3+
# load
4+
5+
Monitor Type: `load` ([Source](https://github.com/signalfx/signalfx-agent/tree/master/pkg/monitors/load))
6+
7+
**Accepts Endpoints**: No
8+
9+
**Multiple Instances Allowed**: **No**
10+
11+
## Overview
12+
13+
Monitors process load on the host. Process load is the average number of
14+
running or waiting processes over a certain time period (1, 5, and 15
15+
minutes).
16+
17+
This monitor is only available on Linux.
18+
19+
20+
## Configuration
21+
22+
To activate this monitor in the Smart Agent, add the following to your
23+
agent config:
24+
25+
```
26+
monitors: # All monitor config goes under this key
27+
- type: load
28+
... # Additional config
29+
```
30+
31+
**For a list of monitor options that are common to all monitors, see [Common
32+
Configuration](../monitor-config.md#common-configuration).**
33+
34+
35+
This monitor has no configuration options.
36+
## Metrics
37+
38+
These are the metrics available for this monitor.
39+
Metrics that are categorized as
40+
[container/host](https://docs.signalfx.com/en/latest/admin-guide/usage.html#about-custom-bundled-and-high-resolution-metrics)
41+
(*default*) are ***in bold and italics*** in the list below.
42+
43+
44+
- ***`load.longterm`*** (*gauge*)<br> Average CPU load per core over the last 15 minutes
45+
- ***`load.midterm`*** (*gauge*)<br> Average CPU load per core over the last five minutes
46+
- ***`load.shortterm`*** (*gauge*)<br> Average CPU load per core over the last one minute
47+
48+
### Non-default metrics (version 4.7.0+)
49+
50+
**The following information applies to the agent version 4.7.0+ that has
51+
`enableBuiltInFiltering: true` set on the top level of the agent config.**
52+
53+
To emit metrics that are not _default_, you can add those metrics in the
54+
generic monitor-level `extraMetrics` config option. Metrics that are derived
55+
from specific configuration options that do not appear in the above list of
56+
metrics do not need to be added to `extraMetrics`.
57+
58+
To see a list of metrics that will be emitted you can run `agent-status
59+
monitors` after configuring this monitor in a running agent instance.
60+
61+
### Legacy non-default metrics (version < 4.7.0)
62+
63+
**The following information only applies to agent version older than 4.7.0. If
64+
you have a newer agent and have set `enableBuiltInFiltering: true` at the top
65+
level of your agent config, see the section above. See upgrade instructions in
66+
[Old-style whitelist filtering](../legacy-filtering.md#old-style-whitelist-filtering).**
67+
68+
If you have a reference to the `whitelist.json` in your agent's top-level
69+
`metricsToExclude` config option, and you want to emit metrics that are not in
70+
that whitelist, then you need to add an item to the top-level
71+
`metricsToInclude` config option to override that whitelist (see [Inclusion
72+
filtering](../legacy-filtering.md#inclusion-filtering). Or you can just
73+
copy the whitelist.json, modify it, and reference that in `metricsToExclude`.
74+
75+
76+

packaging/etc/agent.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ monitors:
2424
- type: collectd/df
2525
- type: disk
2626
- type: collectd/interface
27-
- type: collectd/load
27+
- type: load
2828
- type: collectd/memory
2929
- type: collectd/signalfx-metadata
3030
omitProcessInfo: true

pkg/core/modules.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ import (
4545
_ "github.com/signalfx/signalfx-agent/pkg/monitors/jaegergrpc"
4646
_ "github.com/signalfx/signalfx-agent/pkg/monitors/jmx"
4747
_ "github.com/signalfx/signalfx-agent/pkg/monitors/kubernetes"
48+
_ "github.com/signalfx/signalfx-agent/pkg/monitors/load"
4849
_ "github.com/signalfx/signalfx-agent/pkg/monitors/logstash/logstash"
4950
_ "github.com/signalfx/signalfx-agent/pkg/monitors/logstash/tcp"
5051
_ "github.com/signalfx/signalfx-agent/pkg/monitors/memory"

pkg/monitors/collectd/load/metadata.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@ monitors:
33
doc: |
44
Monitors process load on the host using the collectd
55
[Load plugin](https://collectd.org/wiki/index.php/Plugin:Load).
6+
7+
**This monitor has been deprecated in favor of the `load` monitor. That
8+
monitor emits the same metrics and is fully compatible. This
9+
`collectd/load` monitor will be removed in a future release of the agent."
610
metrics:
711
load.longterm:
812
description: Average CPU load per core over the last 15 minutes

pkg/monitors/load/genmetadata.go

Lines changed: 42 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/monitors/load/load.go

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
package load
2+
3+
import (
4+
"context"
5+
"runtime"
6+
"time"
7+
8+
"github.com/shirou/gopsutil/load"
9+
"github.com/signalfx/golib/v3/datapoint"
10+
"github.com/signalfx/signalfx-agent/pkg/core/config"
11+
"github.com/signalfx/signalfx-agent/pkg/monitors"
12+
"github.com/signalfx/signalfx-agent/pkg/monitors/types"
13+
"github.com/signalfx/signalfx-agent/pkg/utils"
14+
"github.com/sirupsen/logrus"
15+
log "github.com/sirupsen/logrus"
16+
)
17+
18+
func init() {
19+
if runtime.GOOS != "windows" {
20+
monitors.Register(&monitorMetadata, func() interface{} { return &Monitor{} }, &Config{})
21+
}
22+
}
23+
24+
// Config for this monitor
25+
type Config struct {
26+
config.MonitorConfig `singleInstance:"true" acceptsEndpoints:"false"`
27+
}
28+
29+
// Monitor for load
30+
type Monitor struct {
31+
Output types.Output
32+
cancel func()
33+
logger logrus.FieldLogger
34+
}
35+
36+
// Configure is the main function of the monitor, it will report host metadata
37+
// on a varied interval
38+
func (m *Monitor) Configure(conf *Config) error {
39+
m.logger = logrus.WithFields(log.Fields{"monitorType": monitorType})
40+
41+
// create contexts for managing the the plugin loop
42+
var ctx context.Context
43+
ctx, m.cancel = context.WithCancel(context.Background())
44+
45+
// gather metrics on the specified interval
46+
utils.RunOnInterval(ctx, func() {
47+
avgLoad, err := load.Avg()
48+
if err != nil {
49+
m.logger.WithError(err).Error("Failed to get load statistics")
50+
return
51+
}
52+
53+
numCPUs := float64(runtime.NumCPU())
54+
55+
m.Output.SendDatapoints([]*datapoint.Datapoint{
56+
datapoint.New(loadLongterm, nil, datapoint.NewFloatValue(avgLoad.Load15/numCPUs), datapoint.Gauge, time.Time{}),
57+
datapoint.New(loadMidterm, nil, datapoint.NewFloatValue(avgLoad.Load5/numCPUs), datapoint.Gauge, time.Time{}),
58+
datapoint.New(loadShortterm, nil, datapoint.NewFloatValue(avgLoad.Load1/numCPUs), datapoint.Gauge, time.Time{}),
59+
}...)
60+
}, time.Duration(conf.IntervalSeconds)*time.Second)
61+
62+
return nil
63+
}
64+
65+
// Shutdown stops the metric sync
66+
func (m *Monitor) Shutdown() {
67+
if m.cancel != nil {
68+
m.cancel()
69+
}
70+
}

pkg/monitors/load/metadata.yaml

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
monitors:
2+
- dimensions:
3+
doc: |
4+
Monitors process load on the host. Process load is the average number of
5+
running or waiting processes over a certain time period (1, 5, and 15
6+
minutes).
7+
8+
This monitor is only available on Linux.
9+
10+
metrics:
11+
load.longterm:
12+
description: Average CPU load per core over the last 15 minutes
13+
default: true
14+
type: gauge
15+
load.midterm:
16+
description: Average CPU load per core over the last five minutes
17+
default: true
18+
type: gauge
19+
load.shortterm:
20+
description: Average CPU load per core over the last one minute
21+
default: true
22+
type: gauge
23+
monitorType: load
24+
properties:
25+

0 commit comments

Comments
 (0)