Skip to content

Commit 80fa206

Browse files
committed
UTF-8 Content Negotiation
Signed-off-by: Owen Williams <[email protected]>
1 parent 23ab826 commit 80fa206

File tree

13 files changed

+612
-104
lines changed

13 files changed

+612
-104
lines changed

prometheus_client/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
process_collector, registry,
66
)
77
from .exposition import (
8-
CONTENT_TYPE_LATEST, delete_from_gateway, generate_latest,
8+
CONTENT_TYPE_PLAIN, delete_from_gateway, generate_latest,
99
instance_ip_grouping_key, make_asgi_app, make_wsgi_app, MetricsHandler,
1010
push_to_gateway, pushadd_to_gateway, start_http_server, start_wsgi_server,
1111
write_to_textfile,
@@ -32,7 +32,7 @@
3232
'Enum',
3333
'enable_created_metrics',
3434
'disable_created_metrics',
35-
'CONTENT_TYPE_LATEST',
35+
'CONTENT_TYPE_PLAIN',
3636
'generate_latest',
3737
'MetricsHandler',
3838
'make_wsgi_app',

prometheus_client/exposition.py

Lines changed: 76 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
from .validation import _is_valid_legacy_metric_name
2424

2525
__all__ = (
26-
'CONTENT_TYPE_LATEST',
26+
'CONTENT_TYPE_PLAIN',
2727
'delete_from_gateway',
2828
'generate_latest',
2929
'instance_ip_grouping_key',
@@ -37,7 +37,7 @@
3737
'write_to_textfile',
3838
)
3939

40-
CONTENT_TYPE_LATEST = 'text/plain; version=0.0.4; charset=utf-8'
40+
CONTENT_TYPE_PLAIN = 'text/plain; version=0.0.4; charset=utf-8'
4141
"""Content type of the latest text format"""
4242

4343

@@ -245,29 +245,38 @@ class TmpServer(ThreadingWSGIServer):
245245
start_http_server = start_wsgi_server
246246

247247

248-
def generate_latest(registry: CollectorRegistry = REGISTRY) -> bytes:
249-
"""Returns the metrics from the registry in latest text format as a string."""
248+
def generate_latest(registry: CollectorRegistry = REGISTRY, escaping: str = openmetrics.UNDERSCORES) -> bytes:
249+
"""
250+
Generates the exposition format using the basic Prometheus text format.
251+
252+
Params:
253+
registry: CollectorRegistry to export data from.
254+
escaping: Escaping scheme used for metric and label names.
255+
256+
Returns: UTF-8 encoded string containing the metrics in text format.
257+
"""
250258

251259
def sample_line(samples):
252260
if samples.labels:
253261
labelstr = '{0}'.format(','.join(
262+
# Label values always support UTF-8
254263
['{}="{}"'.format(
255-
openmetrics.escape_label_name(k), openmetrics._escape(v))
264+
openmetrics.escape_label_name(k, escaping), openmetrics._escape(v, openmetrics.ALLOWUTF8, False))
256265
for k, v in sorted(samples.labels.items())]))
257266
else:
258267
labelstr = ''
259268
timestamp = ''
260269
if samples.timestamp is not None:
261270
# Convert to milliseconds.
262271
timestamp = f' {int(float(samples.timestamp) * 1000):d}'
263-
if _is_valid_legacy_metric_name(samples.name):
272+
if escaping != openmetrics.ALLOWUTF8 or _is_valid_legacy_metric_name(samples.name):
264273
if labelstr:
265274
labelstr = '{{{0}}}'.format(labelstr)
266-
return f'{samples.name}{labelstr} {floatToGoString(samples.value)}{timestamp}\n'
275+
return f'{openmetrics.escape_metric_name(samples.name, escaping)}{labelstr} {floatToGoString(samples.value)}{timestamp}\n'
267276
maybe_comma = ''
268277
if labelstr:
269278
maybe_comma = ','
270-
return f'{{{openmetrics.escape_metric_name(samples.name)}{maybe_comma}{labelstr}}} {floatToGoString(samples.value)}{timestamp}\n'
279+
return f'{{{openmetrics.escape_metric_name(samples.name, escaping)}{maybe_comma}{labelstr}}} {floatToGoString(samples.value)}{timestamp}\n'
271280

272281
output = []
273282
for metric in registry.collect():
@@ -290,8 +299,8 @@ def sample_line(samples):
290299
mtype = 'untyped'
291300

292301
output.append('# HELP {} {}\n'.format(
293-
openmetrics.escape_metric_name(mname), metric.documentation.replace('\\', r'\\').replace('\n', r'\n')))
294-
output.append(f'# TYPE {openmetrics.escape_metric_name(mname)} {mtype}\n')
302+
openmetrics.escape_metric_name(mname, escaping), metric.documentation.replace('\\', r'\\').replace('\n', r'\n')))
303+
output.append(f'# TYPE {openmetrics.escape_metric_name(mname, escaping)} {mtype}\n')
295304

296305
om_samples: Dict[str, List[str]] = {}
297306
for s in metric.samples:
@@ -307,21 +316,69 @@ def sample_line(samples):
307316
raise
308317

309318
for suffix, lines in sorted(om_samples.items()):
310-
output.append('# HELP {} {}\n'.format(openmetrics.escape_metric_name(metric.name + suffix),
319+
output.append('# HELP {} {}\n'.format(openmetrics.escape_metric_name(metric.name + suffix, escaping),
311320
metric.documentation.replace('\\', r'\\').replace('\n', r'\n')))
312-
output.append(f'# TYPE {openmetrics.escape_metric_name(metric.name + suffix)} gauge\n')
321+
output.append(f'# TYPE {openmetrics.escape_metric_name(metric.name + suffix, escaping)} gauge\n')
313322
output.extend(lines)
314323
return ''.join(output).encode('utf-8')
315324

316325

317326
def choose_encoder(accept_header: str) -> Tuple[Callable[[CollectorRegistry], bytes], str]:
327+
# Python client library accepts a much narrower range of content-types than
328+
# Prometheus does -- UTF-8 is only supported on OpenMetrics v1.0.0.
318329
accept_header = accept_header or ''
330+
escaping = openmetrics.UNDERSCORES
319331
for accepted in accept_header.split(','):
320332
if accepted.split(';')[0].strip() == 'application/openmetrics-text':
321-
return (openmetrics.generate_latest,
322-
openmetrics.CONTENT_TYPE_LATEST)
323-
return generate_latest, CONTENT_TYPE_LATEST
324-
333+
toks = accepted.split(';')
334+
version = _get_version(toks)
335+
escaping = _get_escaping(toks)
336+
# Only return an escaping header if we have a good version and
337+
# mimetype.
338+
if version == '1.0.0':
339+
return (openmetrics.generate_latest_fn(escaping),
340+
openmetrics.CONTENT_TYPE_LATEST + '; escaping=' + str(escaping))
341+
return generate_latest, CONTENT_TYPE_PLAIN
342+
343+
344+
def _get_version(accept_header: List[str]) -> str:
345+
"""Return the version tag from the Accept header.
346+
347+
If no escaping scheme is specified, returns empty string."""
348+
349+
for tok in accept_header:
350+
if '=' not in tok:
351+
continue
352+
key, value = tok.strip().split('=', 1)
353+
if key == 'version':
354+
return value
355+
return ""
356+
357+
358+
def _get_escaping(accept_header: List[str]) -> str:
359+
"""Return the escaping scheme from the Accept header.
360+
361+
If no escaping scheme is specified or the scheme is not one of the allowed
362+
strings, defaults to UNDERSCORES."""
363+
364+
for tok in accept_header:
365+
if '=' not in tok:
366+
continue
367+
key, value = tok.strip().split('=', 1)
368+
if key != 'escaping':
369+
continue
370+
if value == openmetrics.ALLOWUTF8:
371+
return openmetrics.ALLOWUTF8
372+
elif value == openmetrics.UNDERSCORES:
373+
return openmetrics.UNDERSCORES
374+
elif value == openmetrics.DOTS:
375+
return openmetrics.DOTS
376+
elif value == openmetrics.VALUES:
377+
return openmetrics.VALUES
378+
else:
379+
return openmetrics.UNDERSCORES
380+
return openmetrics.UNDERSCORES
381+
325382

326383
def gzip_accepted(accept_encoding_header: str) -> bool:
327384
accept_encoding_header = accept_encoding_header or ''
@@ -369,15 +426,15 @@ def factory(cls, registry: CollectorRegistry) -> type:
369426
return MyMetricsHandler
370427

371428

372-
def write_to_textfile(path: str, registry: CollectorRegistry) -> None:
429+
def write_to_textfile(path: str, registry: CollectorRegistry, escaping: str = openmetrics.ALLOWUTF8) -> None:
373430
"""Write metrics to the given path.
374431
375432
This is intended for use with the Node exporter textfile collector.
376433
The path must end in .prom for the textfile collector to process it."""
377434
tmppath = f'{path}.{os.getpid()}.{threading.current_thread().ident}'
378435
try:
379436
with open(tmppath, 'wb') as f:
380-
f.write(generate_latest(registry))
437+
f.write(generate_latest(registry, escaping))
381438

382439
# rename(2) is atomic but fails on Windows if the destination file exists
383440
if os.name == 'nt':
@@ -645,7 +702,7 @@ def _use_gateway(
645702

646703
handler(
647704
url=url, method=method, timeout=timeout,
648-
headers=[('Content-Type', CONTENT_TYPE_LATEST)], data=data,
705+
headers=[('Content-Type', CONTENT_TYPE_PLAIN)], data=data,
649706
)()
650707

651708

prometheus_client/openmetrics/exposition.py

Lines changed: 122 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#!/usr/bin/env python
22

3+
from io import StringIO
4+
from sys import maxunicode
35

46
from ..utils import floatToGoString
57
from ..validation import (
@@ -8,6 +10,13 @@
810

911
CONTENT_TYPE_LATEST = 'application/openmetrics-text; version=1.0.0; charset=utf-8'
1012
"""Content type of the latest OpenMetrics text format"""
13+
ESCAPING_HEADER_TAG = 'escaping'
14+
15+
16+
ALLOWUTF8 = 'allow-utf-8'
17+
UNDERSCORES = 'underscores'
18+
DOTS = 'dots'
19+
VALUES = 'values'
1120

1221

1322
def _is_valid_exemplar_metric(metric, sample):
@@ -20,30 +29,36 @@ def _is_valid_exemplar_metric(metric, sample):
2029
return False
2130

2231

23-
def generate_latest(registry):
32+
def generate_latest_fn(escaping):
33+
'''Returns a generate_latest function that will always use the given escaping.'''
34+
return lambda registry: generate_latest(registry, escaping)
35+
36+
37+
def generate_latest(registry, escaping):
2438
'''Returns the metrics from the registry in latest text format as a string.'''
2539
output = []
2640
for metric in registry.collect():
2741
try:
2842
mname = metric.name
2943
output.append('# HELP {} {}\n'.format(
30-
escape_metric_name(mname), _escape(metric.documentation)))
31-
output.append(f'# TYPE {escape_metric_name(mname)} {metric.type}\n')
44+
escape_metric_name(mname, escaping), _escape(metric.documentation, ALLOWUTF8, False)))
45+
output.append(f'# TYPE {escape_metric_name(mname, escaping)} {metric.type}\n')
3246
if metric.unit:
33-
output.append(f'# UNIT {escape_metric_name(mname)} {metric.unit}\n')
47+
output.append(f'# UNIT {escape_metric_name(mname, escaping)} {metric.unit}\n')
3448
for s in metric.samples:
35-
if not _is_valid_legacy_metric_name(s.name):
36-
labelstr = escape_metric_name(s.name)
49+
if escaping == ALLOWUTF8 and not _is_valid_legacy_metric_name(s.name):
50+
labelstr = escape_metric_name(s.name, escaping)
3751
if s.labels:
3852
labelstr += ', '
3953
else:
4054
labelstr = ''
4155

4256
if s.labels:
4357
items = sorted(s.labels.items())
58+
# Label values always support UTF-8
4459
labelstr += ','.join(
4560
['{}="{}"'.format(
46-
escape_label_name(k), _escape(v))
61+
escape_label_name(k, escaping), _escape(v, ALLOWUTF8, False))
4762
for k, v in items])
4863
if labelstr:
4964
labelstr = "{" + labelstr + "}"
@@ -71,9 +86,9 @@ def generate_latest(registry):
7186
timestamp = ''
7287
if s.timestamp is not None:
7388
timestamp = f' {s.timestamp}'
74-
if _is_valid_legacy_metric_name(s.name):
89+
if (escaping != ALLOWUTF8) or _is_valid_legacy_metric_name(s.name):
7590
output.append('{}{} {}{}{}\n'.format(
76-
s.name,
91+
_escape(s.name, escaping, False),
7792
labelstr,
7893
floatToGoString(s.value),
7994
timestamp,
@@ -94,24 +109,114 @@ def generate_latest(registry):
94109
return ''.join(output).encode('utf-8')
95110

96111

97-
def escape_metric_name(s: str) -> str:
112+
def escape_metric_name(s: str, escaping: str) -> str:
98113
"""Escapes the metric name and puts it in quotes iff the name does not
99114
conform to the legacy Prometheus character set.
100115
"""
101-
if _is_valid_legacy_metric_name(s):
116+
if len(s) == 0:
102117
return s
103-
return '"{}"'.format(_escape(s))
118+
if escaping == ALLOWUTF8:
119+
if not _is_valid_legacy_metric_name(s):
120+
return '"{}"'.format(_escape(s, escaping, False))
121+
return _escape(s, escaping, False)
122+
elif escaping == UNDERSCORES:
123+
if _is_valid_legacy_metric_name(s):
124+
return s
125+
return _escape(s, escaping, False)
126+
elif escaping == DOTS:
127+
return _escape(s, escaping, False)
128+
elif escaping == VALUES:
129+
if _is_valid_legacy_metric_name(s):
130+
return s
131+
return _escape(s, escaping, False)
132+
return s
104133

105134

106-
def escape_label_name(s: str) -> str:
135+
def escape_label_name(s: str, escaping: str) -> str:
107136
"""Escapes the label name and puts it in quotes iff the name does not
108137
conform to the legacy Prometheus character set.
109138
"""
110-
if _is_valid_legacy_labelname(s):
139+
if len(s) == 0:
111140
return s
112-
return '"{}"'.format(_escape(s))
141+
if escaping == ALLOWUTF8:
142+
if not _is_valid_legacy_labelname(s):
143+
return '"{}"'.format(_escape(s, escaping, True))
144+
return _escape(s, escaping, True)
145+
elif escaping == UNDERSCORES:
146+
if _is_valid_legacy_labelname(s):
147+
return s
148+
return _escape(s, escaping, True)
149+
elif escaping == DOTS:
150+
return _escape(s, escaping, True)
151+
elif escaping == VALUES:
152+
if _is_valid_legacy_labelname(s):
153+
return s
154+
return _escape(s, escaping, True)
155+
return s
113156

114157

115-
def _escape(s: str) -> str:
158+
def _escape(s: str, escaping: str, is_labelname: bool) -> str:
116159
"""Performs backslash escaping on backslash, newline, and double-quote characters."""
117-
return s.replace('\\', r'\\').replace('\n', r'\n').replace('"', r'\"')
160+
if escaping == ALLOWUTF8:
161+
return s.replace('\\', r'\\').replace('\n', r'\n').replace('"', r'\"')
162+
elif escaping == UNDERSCORES:
163+
escaped = StringIO()
164+
for i, b in enumerate(s):
165+
if _is_valid_legacy_rune(b, i, is_labelname):
166+
escaped.write(b)
167+
else:
168+
escaped.write('_')
169+
return escaped.getvalue()
170+
elif escaping == DOTS:
171+
escaped = StringIO()
172+
for i, b in enumerate(s):
173+
if b == '_':
174+
escaped.write('__')
175+
elif b == '.':
176+
escaped.write('_dot_')
177+
elif _is_valid_legacy_rune(b, i, is_labelname):
178+
escaped.write(b)
179+
else:
180+
escaped.write('__')
181+
return escaped.getvalue()
182+
elif escaping == VALUES:
183+
escaped = StringIO()
184+
escaped.write("U__")
185+
for i, b in enumerate(s):
186+
if b == '_':
187+
escaped.write("__")
188+
elif _is_valid_legacy_rune(b, i, is_labelname):
189+
escaped.write(b)
190+
elif not _is_valid_utf8(b):
191+
escaped.write("_FFFD_")
192+
else:
193+
escaped.write('_')
194+
escaped.write(format(ord(b), 'x'))
195+
escaped.write('_')
196+
return escaped.getvalue()
197+
return s
198+
199+
200+
def _is_valid_legacy_rune(b: str, i: int, is_labelname: bool) -> bool:
201+
if len(b) != 1:
202+
raise ValueError("Input 'b' must be a single character.")
203+
if (
204+
('a' <= b <= 'z')
205+
or ('A' <= b <= 'Z')
206+
or (b == '_')
207+
or ('0' <= b <= '9' and i > 0)
208+
):
209+
return True
210+
return not is_labelname and b == ':'
211+
212+
213+
_SURROGATE_MIN = 0xD800
214+
_SURROGATE_MAX = 0xDFFF
215+
216+
217+
def _is_valid_utf8(s: str) -> bool:
218+
if 0 <= ord(s) < _SURROGATE_MIN:
219+
return True
220+
if _SURROGATE_MAX < ord(s) <= maxunicode:
221+
return True
222+
return False

0 commit comments

Comments
 (0)