Skip to content

Commit 5f38747

Browse files
authored
Prevent ANR during SDK initialization (#709)
* Prevent ANR during SDK initialization Description When initializing the OpenTelemetry Android SDK with disk buffering enabled, we discovered that synchronous disk space checks were causing ANRs in production. These checks occur during the creation of disk buffering exporters, specifically in `DiskManager.getMaxFolderSize()`, which makes blocking IPC calls through `StorageManager.getAllocatableBytes()` on the main thread. The issue manifests in the following ANR stacktrace: ``` android.os.BinderProxy.transact (BinderProxy.java:662) android.os.storage.IStorageManager$Stub$Proxy.getAllocatableBytes (IStorageManager.java:2837) android.os.storage.StorageManager.getAllocatableBytes (StorageManager.java:2414) android.os.storage.StorageManager.getAllocatableBytes (StorageManager.java:2404) io.opentelemetry.android.internal.services.CacheStorage.getAvailableSpace (CacheStorage.java:66) io.opentelemetry.android.internal.services.CacheStorage.ensureCacheSpaceAvailable (CacheStorage.java:50) io.opentelemetry.android.internal.features.persistence.DiskManager.getMaxFolderSize (DiskManager.kt:58) io.opentelemetry.android.OpenTelemetryRumBuilder.createStorageConfiguration (OpenTelemetryRumBuilder.java:338) io.opentelemetry.android.OpenTelemetryRumBuilder.build (OpenTelemetryRumBuilder.java:286) ``` Our Solution To fix this we moved initialization to run on a background executor and buffer the data in memory until it completes. The process works like this: 1. Initialize the SDK with `BufferDelegatingExporter` instances that can immediately accept telemetry data. 2. Move exporter initialization off the main thread. 3. Once async initialization completes, flush buffered signals to initialized exporters and delegate all future signals. The primary goal of this solution is to be unobtrusive and prevent ANRs caused by initialization of disk exporters, while preventing signals from being dropped. Testing We have added unit tests to cover the buffering, delevation, and RUM building. We've also verified this with both disk enabled and disk disabled. * Address the comments from the reviews * Add some logging when the buffer overflows * Fix the formatting
1 parent 0e46453 commit 5f38747

File tree

7 files changed

+598
-59
lines changed

7 files changed

+598
-59
lines changed

core/src/main/java/io/opentelemetry/android/OpenTelemetryRumBuilder.java

Lines changed: 88 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,11 @@
1010
import android.app.Application;
1111
import android.util.Log;
1212
import androidx.annotation.NonNull;
13+
import androidx.annotation.Nullable;
1314
import io.opentelemetry.android.common.RumConstants;
1415
import io.opentelemetry.android.config.OtelRumConfig;
16+
import io.opentelemetry.android.export.BufferDelegatingLogExporter;
17+
import io.opentelemetry.android.export.BufferDelegatingSpanExporter;
1518
import io.opentelemetry.android.features.diskbuffering.DiskBufferingConfiguration;
1619
import io.opentelemetry.android.features.diskbuffering.SignalFromDiskExporter;
1720
import io.opentelemetry.android.features.diskbuffering.scheduler.DefaultExportScheduleHandler;
@@ -63,7 +66,6 @@
6366
import java.util.function.BiFunction;
6467
import java.util.function.Consumer;
6568
import java.util.function.Function;
66-
import javax.annotation.Nullable;
6769
import kotlin.jvm.functions.Function0;
6870

6971
/**
@@ -94,7 +96,10 @@ public final class OpenTelemetryRumBuilder {
9496

9597
private Resource resource;
9698

99+
private boolean isBuilt = false;
100+
97101
@Nullable private ServiceManager serviceManager;
102+
98103
@Nullable private ExportScheduleHandler exportScheduleHandler;
99104

100105
private static TextMapPropagator buildDefaultPropagator() {
@@ -122,6 +127,7 @@ public static OpenTelemetryRumBuilder create(Application application, OtelRumCon
122127
* @return {@code this}
123128
*/
124129
public OpenTelemetryRumBuilder setResource(Resource resource) {
130+
checkNotBuilt();
125131
this.resource = resource;
126132
return this;
127133
}
@@ -134,6 +140,7 @@ public OpenTelemetryRumBuilder setResource(Resource resource) {
134140
* @return {@code this}
135141
*/
136142
public OpenTelemetryRumBuilder mergeResource(Resource resource) {
143+
checkNotBuilt();
137144
this.resource = this.resource.merge(resource);
138145
return this;
139146
}
@@ -173,6 +180,7 @@ public OpenTelemetryRumBuilder addTracerProviderCustomizer(
173180
*/
174181
public OpenTelemetryRumBuilder addMeterProviderCustomizer(
175182
BiFunction<SdkMeterProviderBuilder, Application, SdkMeterProviderBuilder> customizer) {
183+
checkNotBuilt();
176184
meterProviderCustomizers.add(customizer);
177185
return this;
178186
}
@@ -193,6 +201,7 @@ public OpenTelemetryRumBuilder addMeterProviderCustomizer(
193201
public OpenTelemetryRumBuilder addLoggerProviderCustomizer(
194202
BiFunction<SdkLoggerProviderBuilder, Application, SdkLoggerProviderBuilder>
195203
customizer) {
204+
checkNotBuilt();
196205
loggerProviderCustomizers.add(customizer);
197206
return this;
198207
}
@@ -204,6 +213,7 @@ public OpenTelemetryRumBuilder addLoggerProviderCustomizer(
204213
*/
205214
public OpenTelemetryRumBuilder addInstrumentation(AndroidInstrumentation instrumentation) {
206215
instrumentations.add(instrumentation);
216+
checkNotBuilt();
207217
return this;
208218
}
209219

@@ -218,6 +228,7 @@ public OpenTelemetryRumBuilder addInstrumentation(AndroidInstrumentation instrum
218228
public OpenTelemetryRumBuilder addPropagatorCustomizer(
219229
Function<? super TextMapPropagator, ? extends TextMapPropagator> propagatorCustomizer) {
220230
requireNonNull(propagatorCustomizer, "propagatorCustomizer");
231+
checkNotBuilt();
221232
Function<? super TextMapPropagator, ? extends TextMapPropagator> existing =
222233
this.propagatorCustomizer;
223234
this.propagatorCustomizer =
@@ -237,6 +248,7 @@ public OpenTelemetryRumBuilder addPropagatorCustomizer(
237248
public OpenTelemetryRumBuilder addSpanExporterCustomizer(
238249
Function<? super SpanExporter, ? extends SpanExporter> spanExporterCustomizer) {
239250
requireNonNull(spanExporterCustomizer, "spanExporterCustomizer");
251+
checkNotBuilt();
240252
Function<? super SpanExporter, ? extends SpanExporter> existing =
241253
this.spanExporterCustomizer;
242254
this.spanExporterCustomizer =
@@ -256,6 +268,7 @@ public OpenTelemetryRumBuilder addSpanExporterCustomizer(
256268
public OpenTelemetryRumBuilder addLogRecordExporterCustomizer(
257269
Function<? super LogRecordExporter, ? extends LogRecordExporter>
258270
logRecordExporterCustomizer) {
271+
checkNotBuilt();
259272
Function<? super LogRecordExporter, ? extends LogRecordExporter> existing =
260273
this.logRecordExporterCustomizer;
261274
this.logRecordExporterCustomizer =
@@ -276,9 +289,63 @@ public OpenTelemetryRumBuilder addLogRecordExporterCustomizer(
276289
* @return A new {@link OpenTelemetryRum} instance.
277290
*/
278291
public OpenTelemetryRum build() {
292+
if (isBuilt) {
293+
throw new IllegalStateException("You cannot call build multiple times");
294+
}
295+
isBuilt = true;
279296
InitializationEvents initializationEvents = InitializationEvents.get();
280297
applyConfiguration(initializationEvents);
281298

299+
BufferDelegatingLogExporter bufferDelegatingLogExporter = new BufferDelegatingLogExporter();
300+
301+
BufferDelegatingSpanExporter bufferDelegatingSpanExporter =
302+
new BufferDelegatingSpanExporter();
303+
304+
SessionManager sessionManager =
305+
SessionManager.create(timeoutHandler, config.getSessionTimeout().toNanos());
306+
307+
OpenTelemetrySdk sdk =
308+
OpenTelemetrySdk.builder()
309+
.setTracerProvider(
310+
buildTracerProvider(
311+
sessionManager, application, bufferDelegatingSpanExporter))
312+
.setLoggerProvider(
313+
buildLoggerProvider(
314+
sessionManager, application, bufferDelegatingLogExporter))
315+
.setMeterProvider(buildMeterProvider(application))
316+
.setPropagators(buildFinalPropagators())
317+
.build();
318+
319+
otelSdkReadyListeners.forEach(listener -> listener.accept(sdk));
320+
321+
SdkPreconfiguredRumBuilder delegate =
322+
new SdkPreconfiguredRumBuilder(
323+
application,
324+
sdk,
325+
timeoutHandler,
326+
sessionManager,
327+
config.shouldDiscoverInstrumentations(),
328+
getServiceManager());
329+
330+
// AsyncTask is deprecated but the thread pool is still used all over the Android SDK
331+
// and it provides a way to get a background thread without having to create a new one.
332+
android.os.AsyncTask.THREAD_POOL_EXECUTOR.execute(
333+
() ->
334+
initializeExporters(
335+
initializationEvents,
336+
bufferDelegatingSpanExporter,
337+
bufferDelegatingLogExporter));
338+
339+
instrumentations.forEach(delegate::addInstrumentation);
340+
341+
return delegate.build();
342+
}
343+
344+
private void initializeExporters(
345+
InitializationEvents initializationEvents,
346+
BufferDelegatingSpanExporter bufferDelegatingSpanExporter,
347+
BufferDelegatingLogExporter bufferedDelegatingLogExporter) {
348+
282349
DiskBufferingConfiguration diskBufferingConfiguration =
283350
config.getDiskBufferingConfiguration();
284351
SpanExporter spanExporter = buildSpanExporter();
@@ -306,44 +373,25 @@ public OpenTelemetryRum build() {
306373
}
307374
initializationEvents.spanExporterInitialized(spanExporter);
308375

309-
SessionManager sessionManager =
310-
SessionManager.create(timeoutHandler, config.getSessionTimeout().toNanos());
376+
bufferedDelegatingLogExporter.setDelegate(logsExporter);
311377

312-
OpenTelemetrySdk sdk =
313-
OpenTelemetrySdk.builder()
314-
.setTracerProvider(
315-
buildTracerProvider(sessionManager, application, spanExporter))
316-
.setLoggerProvider(
317-
buildLoggerProvider(sessionManager, application, logsExporter))
318-
.setMeterProvider(buildMeterProvider(application))
319-
.setPropagators(buildFinalPropagators())
320-
.build();
321-
322-
otelSdkReadyListeners.forEach(listener -> listener.accept(sdk));
378+
bufferDelegatingSpanExporter.setDelegate(spanExporter);
323379

324380
scheduleDiskTelemetryReader(signalFromDiskExporter);
325-
326-
SdkPreconfiguredRumBuilder delegate =
327-
new SdkPreconfiguredRumBuilder(
328-
application,
329-
sdk,
330-
timeoutHandler,
331-
sessionManager,
332-
config.shouldDiscoverInstrumentations(),
333-
getServiceManager());
334-
instrumentations.forEach(delegate::addInstrumentation);
335-
return delegate.build();
336381
}
337382

338383
@NonNull
339384
private ServiceManager getServiceManager() {
340385
if (serviceManager == null) {
341386
serviceManager = ServiceManagerImpl.Companion.create(application);
342387
}
343-
return serviceManager;
388+
// This can never be null since we never write `null` to it
389+
return requireNonNull(serviceManager);
344390
}
345391

346-
public OpenTelemetryRumBuilder setServiceManager(ServiceManager serviceManager) {
392+
public OpenTelemetryRumBuilder setServiceManager(@NonNull ServiceManager serviceManager) {
393+
requireNonNull(serviceManager, "serviceManager cannot be null");
394+
checkNotBuilt();
347395
this.serviceManager = serviceManager;
348396
return this;
349397
}
@@ -353,7 +401,9 @@ public OpenTelemetryRumBuilder setServiceManager(ServiceManager serviceManager)
353401
* If not specified, the default schedule exporter will be used.
354402
*/
355403
public OpenTelemetryRumBuilder setExportScheduleHandler(
356-
ExportScheduleHandler exportScheduleHandler) {
404+
@NonNull ExportScheduleHandler exportScheduleHandler) {
405+
requireNonNull(exportScheduleHandler, "exportScheduleHandler cannot be null");
406+
checkNotBuilt();
357407
this.exportScheduleHandler = exportScheduleHandler;
358408
return this;
359409
}
@@ -376,7 +426,6 @@ private StorageConfiguration createStorageConfiguration() throws IOException {
376426
}
377427

378428
private void scheduleDiskTelemetryReader(@Nullable SignalFromDiskExporter signalExporter) {
379-
380429
if (exportScheduleHandler == null) {
381430
ServiceManager serviceManager = getServiceManager();
382431
// TODO: Is it safe to get the work service yet here? If so, we can
@@ -387,6 +436,9 @@ private void scheduleDiskTelemetryReader(@Nullable SignalFromDiskExporter signal
387436
new DefaultExportScheduler(getWorkService), getWorkService);
388437
}
389438

439+
final ExportScheduleHandler exportScheduleHandler =
440+
requireNonNull(this.exportScheduleHandler);
441+
390442
if (signalExporter == null) {
391443
// Disabling here allows to cancel previously scheduled exports using tools that
392444
// can run even after the app has been terminated (such as WorkManager).
@@ -408,6 +460,7 @@ private void scheduleDiskTelemetryReader(@Nullable SignalFromDiskExporter signal
408460
* @return this
409461
*/
410462
public OpenTelemetryRumBuilder addOtelSdkReadyListener(Consumer<OpenTelemetrySdk> callback) {
463+
checkNotBuilt();
411464
otelSdkReadyListeners.add(callback);
412465
return this;
413466
}
@@ -521,4 +574,10 @@ private ContextPropagators buildFinalPropagators() {
521574
TextMapPropagator defaultPropagator = buildDefaultPropagator();
522575
return ContextPropagators.create(propagatorCustomizer.apply(defaultPropagator));
523576
}
577+
578+
private void checkNotBuilt() {
579+
if (isBuilt) {
580+
throw new IllegalStateException("This method cannot be called after calling build");
581+
}
582+
}
524583
}
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
/*
2+
* Copyright The OpenTelemetry Authors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package io.opentelemetry.android.export
7+
8+
import io.opentelemetry.sdk.common.CompletableResultCode
9+
import io.opentelemetry.sdk.logs.data.LogRecordData
10+
import io.opentelemetry.sdk.logs.export.LogRecordExporter
11+
12+
/**
13+
* An in-memory buffer delegating log exporter that buffers log records in memory until a delegate is set.
14+
* Once a delegate is set, the buffered log records are exported to the delegate.
15+
*
16+
* The buffer size is set to 5,000 log entries by default. If the buffer is full, the exporter will drop new log records.
17+
*/
18+
internal class BufferDelegatingLogExporter(
19+
maxBufferedLogs: Int = 5_000,
20+
) : LogRecordExporter {
21+
private val delegatingExporter =
22+
DelegatingExporter<LogRecordExporter, LogRecordData>(
23+
doExport = LogRecordExporter::export,
24+
doFlush = LogRecordExporter::flush,
25+
doShutdown = LogRecordExporter::shutdown,
26+
maxBufferedData = maxBufferedLogs,
27+
logType = "log records",
28+
)
29+
30+
fun setDelegate(delegate: LogRecordExporter) {
31+
delegatingExporter.setDelegate(delegate)
32+
}
33+
34+
override fun export(logs: Collection<LogRecordData>): CompletableResultCode = delegatingExporter.export(logs)
35+
36+
override fun flush(): CompletableResultCode = delegatingExporter.flush()
37+
38+
override fun shutdown(): CompletableResultCode = delegatingExporter.shutdown()
39+
}
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
/*
2+
* Copyright The OpenTelemetry Authors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package io.opentelemetry.android.export
7+
8+
import io.opentelemetry.sdk.common.CompletableResultCode
9+
import io.opentelemetry.sdk.trace.data.SpanData
10+
import io.opentelemetry.sdk.trace.export.SpanExporter
11+
12+
/**
13+
* An in-memory buffer delegating span exporter that buffers span data in memory until a delegate is set.
14+
* Once a delegate is set, the buffered span data is exported to the delegate.
15+
*
16+
* The buffer size is set to 5,000 spans by default. If the buffer is full, the exporter will drop new span data.
17+
*/
18+
internal class BufferDelegatingSpanExporter(
19+
maxBufferedSpans: Int = 5_000,
20+
) : SpanExporter {
21+
private val delegatingExporter =
22+
DelegatingExporter<SpanExporter, SpanData>(
23+
doExport = SpanExporter::export,
24+
doFlush = SpanExporter::flush,
25+
doShutdown = SpanExporter::shutdown,
26+
maxBufferedData = maxBufferedSpans,
27+
logType = "span data",
28+
)
29+
30+
fun setDelegate(delegate: SpanExporter) {
31+
delegatingExporter.setDelegate(delegate)
32+
}
33+
34+
override fun export(spans: Collection<SpanData>): CompletableResultCode = delegatingExporter.export(spans)
35+
36+
override fun flush(): CompletableResultCode = delegatingExporter.flush()
37+
38+
override fun shutdown(): CompletableResultCode = delegatingExporter.shutdown()
39+
}

0 commit comments

Comments
 (0)