Skip to content

Commit 601a99d

Browse files
authored
[exporter/doris] Second PR of New component: Doris Exporter (#34980)
**Description:** Second PR of New component: Doris Exporter. Implementation of traces. **Link to tracking Issue:** #33479 **Testing:** **Documentation:**
1 parent e3a44b8 commit 601a99d

File tree

12 files changed

+693
-23
lines changed

12 files changed

+693
-23
lines changed

.chloggen/doris-traces.yaml

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Use this changelog template to create an entry for release notes.
2+
3+
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
4+
change_type: new_component
5+
6+
# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
7+
component: dorisexporter
8+
9+
# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
10+
note: traces implementation
11+
12+
# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
13+
issues: [33479]
14+
15+
# (Optional) One or more lines of additional information to render under the primary note.
16+
# These lines will be padded with 2 spaces and then inserted directly into the document.
17+
# Use pipe (|) for multiline entries.
18+
subtext:
19+
20+
# If your change doesn't affect end users or the exported elements of any package,
21+
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
22+
# Optional: The change log or logs in which this entry should be included.
23+
# e.g. '[user]' or '[user, api]'
24+
# Include 'user' if the change is relevant to end users.
25+
# Include 'api' if there is a change to a library API.
26+
# Default: '[user]'
27+
change_logs: [user]

exporter/dorisexporter/README.md

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,12 @@ The following configuration options are supported:
2424
* `logs` (default = otel_logs) The table name for logs.
2525
* `traces` (default = otel_traces) The table name for traces.
2626
* `metrics` (default = otel_metrics) The table name for metrics.
27-
* `create_schema` (default = true) Whether databases and tables are created automatically.
28-
* `mysql_endpoint` The mysql protocol address to create the schema; ignored if `create_schema` is false.
27+
* `create_schema` (default = true) Whether databases and tables are created automatically in doris.
28+
* `mysql_endpoint` The mysql protocol address of doris. Only use to create the schema; ignored if `create_schema` is false.
2929
* `history_days` (default = 0) Data older than these days will be deleted; ignored if `create_schema` is false. If set to 0, historical data will not be deleted.
3030
* `create_history_days` (default = 0) The number of days in the history partition that was created when the table was created; ignored if `create_schema` is false. If `history_days` is not 0, `create_history_days` needs to be less than or equal to `history_days`.
3131
* `replication_num` (default = 1) The number of replicas of the table; ignored if `create_schema` is false.
3232
* `timezone` (default is the time zone of the opentelemetry collector) The time zone of doris.
33-
* `timeout` (default = 5s) Time to wait per individual attempt to send data to a backend.
3433
* `sending_queue` [details here](https://github.com/open-telemetry/opentelemetry-collector/tree/main/exporter/exporterhelper#configuration)
3534
* `enabled` (default = true)
3635
* `num_consumers` (default = 10) Number of consumers that dequeue batches; ignored if `enabled` is false.
@@ -41,6 +40,10 @@ The following configuration options are supported:
4140
* `max_interval` (default = 30s) The upper bound on backoff; ignored if `enabled` is false.
4241
* `max_elapsed_time` (default = 300s) The maximum amount of time spent trying to send a batch; ignored if `enabled` is false. If set to 0, the retries are never stopped.
4342

43+
The Doris exporter supports common [HTTP Configuration Settings](https://github.com/open-telemetry/opentelemetry-collector/blob/main/config/confighttp/README.md#http-configuration-settings), except for compression (all requests are uncompressed). As a consequence of supporting [confighttp](https://github.com/open-telemetry/opentelemetry-collector/blob/main/config/confighttp/README.md#http-configuration-settings), the Doris exporter also supports common [TLS Configuration Settings](https://github.com/open-telemetry/opentelemetry-collector/blob/main/config/configtls/README.md#tls-configuration-settings).
44+
45+
The Doris exporter sets `timeout` (HTTP request timeout) to 60s by default. All other defaults are as defined by [confighttp](https://github.com/open-telemetry/opentelemetry-collector/blob/main/config/confighttp/README.md#http-configuration-settings).
46+
4447
Example:
4548
```yaml
4649
exporters:

exporter/dorisexporter/config.go

Lines changed: 47 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,23 +5,24 @@ package dorisexporter // import "github.com/open-telemetry/opentelemetry-collect
55

66
import (
77
"errors"
8+
"fmt"
89
"regexp"
10+
"time"
911

12+
"go.opentelemetry.io/collector/config/confighttp"
1013
"go.opentelemetry.io/collector/config/configopaque"
1114
"go.opentelemetry.io/collector/config/configretry"
1215
"go.opentelemetry.io/collector/exporter/exporterhelper"
1316
)
1417

1518
type Config struct {
16-
exporterhelper.TimeoutSettings `mapstructure:",squash"`
17-
configretry.BackOffConfig `mapstructure:"retry_on_failure"`
18-
exporterhelper.QueueSettings `mapstructure:"sending_queue"`
19+
confighttp.ClientConfig `mapstructure:",squash"`
20+
configretry.BackOffConfig `mapstructure:"retry_on_failure"`
21+
exporterhelper.QueueSettings `mapstructure:"sending_queue"`
1922

2023
// TableNames is the table name for logs, traces and metrics.
2124
Table `mapstructure:"table"`
2225

23-
// Endpoint is the http stream load address.
24-
Endpoint string `mapstructure:"endpoint"`
2526
// Database is the database name.
2627
Database string `mapstructure:"database"`
2728
// Username is the authentication username.
@@ -93,5 +94,46 @@ func (cfg *Config) Validate() (err error) {
9394
err = errors.Join(err, errors.New("metrics table name must be alphanumeric and underscore"))
9495
}
9596

97+
_, errT := cfg.timeZone()
98+
if errT != nil {
99+
err = errors.Join(err, errors.New("invalid timezone"))
100+
}
101+
96102
return err
97103
}
104+
105+
const (
106+
defaultStart = -2147483648 // IntMin
107+
)
108+
109+
func (cfg *Config) startHistoryDays() int32 {
110+
if cfg.HistoryDays == 0 {
111+
return defaultStart
112+
}
113+
return -cfg.HistoryDays
114+
}
115+
116+
func (cfg *Config) timeZone() (*time.Location, error) {
117+
return time.LoadLocation(cfg.TimeZone)
118+
}
119+
120+
const (
121+
properties = `
122+
PROPERTIES (
123+
"replication_num" = "%d",
124+
"enable_single_replica_compaction" = "true",
125+
"compaction_policy" = "time_series",
126+
"dynamic_partition.enable" = "true",
127+
"dynamic_partition.create_history_partition" = "true",
128+
"dynamic_partition.time_unit" = "DAY",
129+
"dynamic_partition.start" = "%d",
130+
"dynamic_partition.history_partition_num" = "%d",
131+
"dynamic_partition.end" = "1",
132+
"dynamic_partition.prefix" = "p"
133+
)
134+
`
135+
)
136+
137+
func (cfg *Config) propertiesStr() string {
138+
return fmt.Sprintf(properties, cfg.ReplicationNum, cfg.startHistoryDays(), cfg.CreateHistoryDays)
139+
}

exporter/dorisexporter/config_test.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"github.com/stretchr/testify/assert"
1313
"github.com/stretchr/testify/require"
1414
"go.opentelemetry.io/collector/component"
15+
"go.opentelemetry.io/collector/config/confighttp"
1516
"go.opentelemetry.io/collector/config/configopaque"
1617
"go.opentelemetry.io/collector/config/configretry"
1718
"go.opentelemetry.io/collector/confmap/confmaptest"
@@ -30,6 +31,10 @@ func TestLoadConfig(t *testing.T) {
3031
defaultCfg.(*Config).Endpoint = "http://localhost:8030"
3132
defaultCfg.(*Config).MySQLEndpoint = "localhost:9030"
3233

34+
httpClientConfig := confighttp.NewDefaultClientConfig()
35+
httpClientConfig.Timeout = 5 * time.Second
36+
httpClientConfig.Endpoint = "http://localhost:8030"
37+
3338
tests := []struct {
3439
id component.ID
3540
expected component.Config
@@ -41,7 +46,7 @@ func TestLoadConfig(t *testing.T) {
4146
{
4247
id: component.NewIDWithName(metadata.Type, "full"),
4348
expected: &Config{
44-
TimeoutSettings: exporterhelper.TimeoutSettings{Timeout: 5 * time.Second},
49+
ClientConfig: httpClientConfig,
4550
BackOffConfig: configretry.BackOffConfig{
4651
Enabled: true,
4752
InitialInterval: 5 * time.Second,
@@ -60,7 +65,6 @@ func TestLoadConfig(t *testing.T) {
6065
Traces: "otel_traces",
6166
Metrics: "otel_metrics",
6267
},
63-
Endpoint: "http://localhost:8030",
6468
Database: "otel",
6569
Username: "admin",
6670
Password: configopaque.String("admin"),
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
// Copyright The OpenTelemetry Authors
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package dorisexporter // import "github.com/open-telemetry/opentelemetry-collector-contrib/exporter/dorisexporter"
5+
6+
import (
7+
"bytes"
8+
"context"
9+
"database/sql"
10+
"fmt"
11+
"net/http"
12+
"time"
13+
14+
_ "github.com/go-sql-driver/mysql" // for register database driver
15+
"go.opentelemetry.io/collector/component"
16+
"go.uber.org/zap"
17+
)
18+
19+
const timeFormat = "2006-01-02 15:04:05.999999"
20+
21+
type commonExporter struct {
22+
component.TelemetrySettings
23+
24+
client *http.Client
25+
26+
logger *zap.Logger
27+
cfg *Config
28+
timeZone *time.Location
29+
}
30+
31+
func newExporter(logger *zap.Logger, cfg *Config, set component.TelemetrySettings) *commonExporter {
32+
// There won't be an error because it's already been validated in the Config.Validate method.
33+
timeZone, _ := cfg.timeZone()
34+
35+
return &commonExporter{
36+
TelemetrySettings: set,
37+
logger: logger,
38+
cfg: cfg,
39+
timeZone: timeZone,
40+
}
41+
}
42+
43+
func (e *commonExporter) formatTime(t time.Time) string {
44+
return t.In(e.timeZone).Format(timeFormat)
45+
}
46+
47+
type streamLoadResponse struct {
48+
TxnID int64
49+
Label string
50+
Status string
51+
ExistingJobStatus string
52+
Message string
53+
NumberTotalRows int64
54+
NumberLoadedRows int64
55+
NumberFilteredRows int64
56+
NumberUnselectedRows int64
57+
LoadBytes int64
58+
LoadTimeMs int64
59+
BeginTxnTimeMs int64
60+
StreamLoadPutTimeMs int64
61+
ReadDataTimeMs int64
62+
WriteDataTimeMs int64
63+
CommitAndPublishTimeMs int64
64+
ErrorURL string
65+
}
66+
67+
func (r *streamLoadResponse) success() bool {
68+
return r.Status == "Success" || r.Status == "Publish Timeout"
69+
}
70+
71+
func streamLoadURL(address string, db string, table string) string {
72+
return address + "/api/" + db + "/" + table + "/_stream_load"
73+
}
74+
75+
func streamLoadRequest(ctx context.Context, cfg *Config, table string, data []byte) (*http.Request, error) {
76+
url := streamLoadURL(cfg.Endpoint, cfg.Database, table)
77+
req, err := http.NewRequestWithContext(ctx, http.MethodPut, url, bytes.NewBuffer(data))
78+
if err != nil {
79+
return nil, err
80+
}
81+
82+
req.Header.Set("format", "json")
83+
req.Header.Set("Expect", "100-continue")
84+
req.Header.Set("strip_outer_array", "true")
85+
req.SetBasicAuth(cfg.Username, string(cfg.Password))
86+
87+
return req, nil
88+
}
89+
90+
func createDorisHTTPClient(ctx context.Context, cfg *Config, host component.Host, settings component.TelemetrySettings) (*http.Client, error) {
91+
client, err := cfg.ClientConfig.ToClient(ctx, host, settings)
92+
if err != nil {
93+
return nil, err
94+
}
95+
96+
client.CheckRedirect = func(req *http.Request, _ []*http.Request) error {
97+
req.SetBasicAuth(cfg.Username, string(cfg.Password))
98+
return nil
99+
}
100+
101+
return client, nil
102+
}
103+
104+
func createDorisMySQLClient(cfg *Config) (*sql.DB, error) {
105+
dsn := fmt.Sprintf("%s:%s@tcp(%s)/mysql", cfg.Username, string(cfg.Password), cfg.MySQLEndpoint)
106+
conn, err := sql.Open("mysql", dsn)
107+
return conn, err
108+
}
109+
110+
func createAndUseDatabase(ctx context.Context, conn *sql.DB, cfg *Config) error {
111+
_, err := conn.ExecContext(ctx, "CREATE DATABASE IF NOT EXISTS "+cfg.Database)
112+
if err != nil {
113+
return err
114+
}
115+
_, err = conn.ExecContext(ctx, "USE "+cfg.Database)
116+
return err
117+
}
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
// Copyright The OpenTelemetry Authors
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package dorisexporter // import "github.com/open-telemetry/opentelemetry-collector-contrib/exporter/dorisexporter"
5+
6+
import (
7+
"testing"
8+
"time"
9+
10+
"github.com/stretchr/testify/require"
11+
"go.opentelemetry.io/collector/component"
12+
"go.opentelemetry.io/collector/config/configtelemetry"
13+
"go.opentelemetry.io/otel/metric"
14+
)
15+
16+
var testTelemetrySettings = component.TelemetrySettings{
17+
LeveledMeterProvider: func(_ configtelemetry.Level) metric.MeterProvider {
18+
return nil
19+
},
20+
}
21+
22+
func TestNewCommonExporter(t *testing.T) {
23+
cfg := createDefaultConfig().(*Config)
24+
exporter := newExporter(nil, cfg, testTelemetrySettings)
25+
require.NotNil(t, exporter)
26+
}
27+
28+
func TestCommonExporter_FormatTime(t *testing.T) {
29+
cfg := createDefaultConfig().(*Config)
30+
exporter := newExporter(nil, cfg, testTelemetrySettings)
31+
require.NotNil(t, exporter)
32+
33+
now := time.Date(2024, 1, 1, 0, 0, 0, 1000, time.Local)
34+
require.Equal(t, "2024-01-01 00:00:00.000001", exporter.formatTime(now))
35+
}
36+
37+
func TestStreamLoadResponse_Success(t *testing.T) {
38+
resp := &streamLoadResponse{
39+
Status: "Success",
40+
}
41+
require.True(t, resp.success())
42+
43+
resp.Status = "Publish Timeout"
44+
require.True(t, resp.success())
45+
46+
resp.Status = "Fail"
47+
require.False(t, resp.success())
48+
}
49+
50+
func TestStreamLoadUrl(t *testing.T) {
51+
url := streamLoadURL("http://doris:8030", "otel", "otel_logs")
52+
require.Equal(t, "http://doris:8030/api/otel/otel_logs/_stream_load", url)
53+
}

0 commit comments

Comments
 (0)