Skip to content

Commit ce6d6f7

Browse files
committed
Prevent ANR during SDK initialization
Description When initializing the OpenTelemetry Android SDK with disk buffering enabled, we discovered that synchronous disk space checks were causing ANRs in production. These checks occur during the creation of disk buffering exporters, specifically in `DiskManager.getMaxFolderSize()`, which makes blocking IPC calls through `StorageManager.getAllocatableBytes()` on the main thread. The issue manifests in the following ANR stacktrace: ``` android.os.BinderProxy.transact (BinderProxy.java:662) android.os.storage.IStorageManager$Stub$Proxy.getAllocatableBytes (IStorageManager.java:2837) android.os.storage.StorageManager.getAllocatableBytes (StorageManager.java:2414) android.os.storage.StorageManager.getAllocatableBytes (StorageManager.java:2404) io.opentelemetry.android.internal.services.CacheStorage.getAvailableSpace (CacheStorage.java:66) io.opentelemetry.android.internal.services.CacheStorage.ensureCacheSpaceAvailable (CacheStorage.java:50) io.opentelemetry.android.internal.features.persistence.DiskManager.getMaxFolderSize (DiskManager.kt:58) io.opentelemetry.android.OpenTelemetryRumBuilder.createStorageConfiguration (OpenTelemetryRumBuilder.java:338) io.opentelemetry.android.OpenTelemetryRumBuilder.build (OpenTelemetryRumBuilder.java:286) ``` Our Solution To fix this we moved initialization to run on a background executor and buffer the data in memory until it completes. The process works like this: 1. Initialize the SDK with `BufferDelegatingExporter` instances that can immediately accept telemetry data. 2. Move exporter initialization off the main thread. 3. Once async initialization completes, flush buffered signals to initialized exporters and delegate all future signals. The primary goal of this solution is to be unobtrusive and prevent ANRs caused by initialization of disk exporters, while preventing signals from being dropped. Testing We have added unit tests to cover the buffering, delevation, and RUM building. We've also verified this with both disk enabled and disk disabled.
1 parent 15f35eb commit ce6d6f7

File tree

7 files changed

+527
-71
lines changed

7 files changed

+527
-71
lines changed

core/src/main/java/io/opentelemetry/android/OpenTelemetryRumBuilder.java

Lines changed: 94 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,11 @@
1010
import android.app.Application;
1111
import android.util.Log;
1212
import androidx.annotation.NonNull;
13+
import androidx.annotation.Nullable;
1314
import io.opentelemetry.android.common.RumConstants;
1415
import io.opentelemetry.android.config.OtelRumConfig;
16+
import io.opentelemetry.android.export.BufferDelegatingLogExporter;
17+
import io.opentelemetry.android.export.BufferDelegatingSpanExporter;
1518
import io.opentelemetry.android.features.diskbuffering.DiskBufferingConfiguration;
1619
import io.opentelemetry.android.features.diskbuffering.SignalFromDiskExporter;
1720
import io.opentelemetry.android.features.diskbuffering.scheduler.DefaultExportScheduleHandler;
@@ -63,7 +66,6 @@
6366
import java.util.function.BiFunction;
6467
import java.util.function.Consumer;
6568
import java.util.function.Function;
66-
import javax.annotation.Nullable;
6769
import kotlin.jvm.functions.Function0;
6870

6971
/**
@@ -94,8 +96,13 @@ public final class OpenTelemetryRumBuilder {
9496

9597
private Resource resource;
9698

97-
@Nullable private ServiceManager serviceManager;
98-
@Nullable private ExportScheduleHandler exportScheduleHandler;
99+
private final Object lock = new Object();
100+
101+
// Writes guarded by "lock"
102+
@Nullable private volatile ServiceManager serviceManager;
103+
104+
// Writes guarded by "lock"
105+
@Nullable private volatile ExportScheduleHandler exportScheduleHandler;
99106

100107
private static TextMapPropagator buildDefaultPropagator() {
101108
return TextMapPropagator.composite(
@@ -279,6 +286,56 @@ public OpenTelemetryRum build() {
279286
InitializationEvents initializationEvents = InitializationEvents.get();
280287
applyConfiguration(initializationEvents);
281288

289+
BufferDelegatingLogExporter bufferDelegatingLogExporter = new BufferDelegatingLogExporter();
290+
291+
BufferDelegatingSpanExporter bufferDelegatingSpanExporter =
292+
new BufferDelegatingSpanExporter();
293+
294+
SessionManager sessionManager =
295+
SessionManager.create(timeoutHandler, config.getSessionTimeout().toNanos());
296+
297+
OpenTelemetrySdk sdk =
298+
OpenTelemetrySdk.builder()
299+
.setTracerProvider(
300+
buildTracerProvider(
301+
sessionManager, application, bufferDelegatingSpanExporter))
302+
.setLoggerProvider(
303+
buildLoggerProvider(
304+
sessionManager, application, bufferDelegatingLogExporter))
305+
.setMeterProvider(buildMeterProvider(application))
306+
.setPropagators(buildFinalPropagators())
307+
.build();
308+
309+
otelSdkReadyListeners.forEach(listener -> listener.accept(sdk));
310+
311+
SdkPreconfiguredRumBuilder delegate =
312+
new SdkPreconfiguredRumBuilder(
313+
application,
314+
sdk,
315+
timeoutHandler,
316+
sessionManager,
317+
config.shouldDiscoverInstrumentations(),
318+
getServiceManager());
319+
320+
// AsyncTask is deprecated but the thread pool is still used all over the Android SDK
321+
// and it provides a way to get a background thread without having to create a new one.
322+
android.os.AsyncTask.THREAD_POOL_EXECUTOR.execute(
323+
() ->
324+
initializeExporters(
325+
initializationEvents,
326+
bufferDelegatingSpanExporter,
327+
bufferDelegatingLogExporter));
328+
329+
instrumentations.forEach(delegate::addInstrumentation);
330+
331+
return delegate.build();
332+
}
333+
334+
private void initializeExporters(
335+
InitializationEvents initializationEvents,
336+
BufferDelegatingSpanExporter bufferDelegatingSpanExporter,
337+
BufferDelegatingLogExporter bufferedDelegatingLogExporter) {
338+
282339
DiskBufferingConfiguration diskBufferingConfiguration =
283340
config.getDiskBufferingConfiguration();
284341
SpanExporter spanExporter = buildSpanExporter();
@@ -306,45 +363,31 @@ public OpenTelemetryRum build() {
306363
}
307364
initializationEvents.spanExporterInitialized(spanExporter);
308365

309-
SessionManager sessionManager =
310-
SessionManager.create(timeoutHandler, config.getSessionTimeout().toNanos());
366+
bufferedDelegatingLogExporter.setDelegate(logsExporter);
311367

312-
OpenTelemetrySdk sdk =
313-
OpenTelemetrySdk.builder()
314-
.setTracerProvider(
315-
buildTracerProvider(sessionManager, application, spanExporter))
316-
.setLoggerProvider(
317-
buildLoggerProvider(sessionManager, application, logsExporter))
318-
.setMeterProvider(buildMeterProvider(application))
319-
.setPropagators(buildFinalPropagators())
320-
.build();
321-
322-
otelSdkReadyListeners.forEach(listener -> listener.accept(sdk));
368+
bufferDelegatingSpanExporter.setDelegate(spanExporter);
323369

324370
scheduleDiskTelemetryReader(signalFromDiskExporter);
325-
326-
SdkPreconfiguredRumBuilder delegate =
327-
new SdkPreconfiguredRumBuilder(
328-
application,
329-
sdk,
330-
timeoutHandler,
331-
sessionManager,
332-
config.shouldDiscoverInstrumentations(),
333-
getServiceManager());
334-
instrumentations.forEach(delegate::addInstrumentation);
335-
return delegate.build();
336371
}
337372

338373
@NonNull
339374
private ServiceManager getServiceManager() {
340375
if (serviceManager == null) {
341-
serviceManager = ServiceManagerImpl.Companion.create(application);
376+
synchronized (lock) {
377+
if (serviceManager == null) {
378+
serviceManager = ServiceManagerImpl.Companion.create(application);
379+
}
380+
}
342381
}
343-
return serviceManager;
382+
// This can never be null since we never write `null` to it
383+
return requireNonNull(serviceManager);
344384
}
345385

346-
public OpenTelemetryRumBuilder setServiceManager(ServiceManager serviceManager) {
347-
this.serviceManager = serviceManager;
386+
public OpenTelemetryRumBuilder setServiceManager(@NonNull ServiceManager serviceManager) {
387+
requireNonNull(serviceManager, "serviceManager cannot be null");
388+
synchronized (lock) {
389+
this.serviceManager = serviceManager;
390+
}
348391
return this;
349392
}
350393

@@ -353,8 +396,11 @@ public OpenTelemetryRumBuilder setServiceManager(ServiceManager serviceManager)
353396
* If not specified, the default schedule exporter will be used.
354397
*/
355398
public OpenTelemetryRumBuilder setExportScheduleHandler(
356-
ExportScheduleHandler exportScheduleHandler) {
357-
this.exportScheduleHandler = exportScheduleHandler;
399+
@NonNull ExportScheduleHandler exportScheduleHandler) {
400+
requireNonNull(exportScheduleHandler, "exportScheduleHandler cannot be null");
401+
synchronized (lock) {
402+
this.exportScheduleHandler = exportScheduleHandler;
403+
}
358404
return this;
359405
}
360406

@@ -376,17 +422,24 @@ private StorageConfiguration createStorageConfiguration() throws IOException {
376422
}
377423

378424
private void scheduleDiskTelemetryReader(@Nullable SignalFromDiskExporter signalExporter) {
379-
380425
if (exportScheduleHandler == null) {
381-
ServiceManager serviceManager = getServiceManager();
382-
// TODO: Is it safe to get the work service yet here? If so, we can
383-
// avoid all this lazy supplier stuff....
384-
Function0<PeriodicWorkService> getWorkService = serviceManager::getPeriodicWorkService;
385-
exportScheduleHandler =
386-
new DefaultExportScheduleHandler(
387-
new DefaultExportScheduler(getWorkService), getWorkService);
426+
synchronized (lock) {
427+
if (exportScheduleHandler == null) {
428+
ServiceManager serviceManager = getServiceManager();
429+
// TODO: Is it safe to get the work service yet here? If so, we can
430+
// avoid all this lazy supplier stuff....
431+
Function0<PeriodicWorkService> getWorkService =
432+
serviceManager::getPeriodicWorkService;
433+
exportScheduleHandler =
434+
new DefaultExportScheduleHandler(
435+
new DefaultExportScheduler(getWorkService), getWorkService);
436+
}
437+
}
388438
}
389439

440+
final ExportScheduleHandler exportScheduleHandler =
441+
requireNonNull(this.exportScheduleHandler);
442+
390443
if (signalExporter == null) {
391444
// Disabling here allows to cancel previously scheduled exports using tools that
392445
// can run even after the app has been terminated (such as WorkManager).
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/*
2+
* Copyright The OpenTelemetry Authors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package io.opentelemetry.android.export
7+
8+
import io.opentelemetry.sdk.common.CompletableResultCode
9+
import io.opentelemetry.sdk.logs.data.LogRecordData
10+
import io.opentelemetry.sdk.logs.export.LogRecordExporter
11+
12+
/**
13+
* An in-memory buffer delegating log exporter that buffers log records in memory until a delegate is set.
14+
* Once a delegate is set, the buffered log records are exported to the delegate.
15+
*
16+
* The buffer size is set to 5,000 log entries by default. If the buffer is full, the exporter will drop new log records.
17+
*/
18+
internal class BufferDelegatingLogExporter(
19+
maxBufferedLogs: Int = 5_000,
20+
) : BufferedDelegatingExporter<LogRecordData, LogRecordExporter>(bufferedSignals = maxBufferedLogs),
21+
LogRecordExporter {
22+
override fun exportToDelegate(
23+
delegate: LogRecordExporter,
24+
data: Collection<LogRecordData>,
25+
): CompletableResultCode = delegate.export(data)
26+
27+
override fun shutdownDelegate(delegate: LogRecordExporter): CompletableResultCode = delegate.shutdown()
28+
29+
override fun export(logs: Collection<LogRecordData>): CompletableResultCode = bufferOrDelegate(logs)
30+
31+
override fun flush(): CompletableResultCode =
32+
withDelegateOrNull { delegate ->
33+
delegate?.flush() ?: CompletableResultCode.ofSuccess()
34+
}
35+
}
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/*
2+
* Copyright The OpenTelemetry Authors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package io.opentelemetry.android.export
7+
8+
import io.opentelemetry.sdk.common.CompletableResultCode
9+
import io.opentelemetry.sdk.trace.data.SpanData
10+
import io.opentelemetry.sdk.trace.export.SpanExporter
11+
12+
/**
13+
* An in-memory buffer delegating span exporter that buffers span data in memory until a delegate is set.
14+
* Once a delegate is set, the buffered span data is exported to the delegate.
15+
*
16+
* The buffer size is set to 5,000 spans by default. If the buffer is full, the exporter will drop new span data.
17+
*/
18+
internal class BufferDelegatingSpanExporter(
19+
maxBufferedSpans: Int = 5_000,
20+
) : BufferedDelegatingExporter<SpanData, SpanExporter>(bufferedSignals = maxBufferedSpans),
21+
SpanExporter {
22+
override fun exportToDelegate(
23+
delegate: SpanExporter,
24+
data: Collection<SpanData>,
25+
): CompletableResultCode = delegate.export(data)
26+
27+
override fun shutdownDelegate(delegate: SpanExporter): CompletableResultCode = delegate.shutdown()
28+
29+
override fun export(spans: Collection<SpanData>): CompletableResultCode = bufferOrDelegate(spans)
30+
31+
override fun flush(): CompletableResultCode =
32+
withDelegateOrNull { delegate ->
33+
delegate?.flush() ?: CompletableResultCode.ofSuccess()
34+
}
35+
}
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
/*
2+
* Copyright The OpenTelemetry Authors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package io.opentelemetry.android.export
7+
8+
import io.opentelemetry.sdk.common.CompletableResultCode
9+
import java.util.concurrent.atomic.AtomicBoolean
10+
11+
/**
12+
* An in-memory buffer delegating signal exporter that buffers signal in memory until a delegate is set.
13+
* Once a delegate is set, the buffered signals are exported to the delegate.
14+
*
15+
* The buffer size is set to 5,000 by default. If the buffer is full, the exporter will drop new signals.
16+
*/
17+
internal abstract class BufferedDelegatingExporter<T, D>(private val bufferedSignals: Int = 5_000) {
18+
@Volatile
19+
private var delegate: D? = null
20+
private val buffer = arrayListOf<T>()
21+
private val lock = Any()
22+
private var isShutDown = AtomicBoolean(false)
23+
24+
/**
25+
* Sets the delegate for this exporter and flushes the buffer to the delegate.
26+
*
27+
* If the delegate has already been set, an [IllegalStateException] will be thrown.
28+
* If this exporter has been shut down, the delegate will be shut down immediately.
29+
*
30+
* @param delegate the delegate to set
31+
*
32+
* @throws IllegalStateException if a delegate has already been set
33+
*/
34+
fun setDelegate(delegate: D) {
35+
synchronized(lock) {
36+
check(this.delegate == null) { "Exporter delegate has already been set." }
37+
38+
flushToDelegate(delegate)
39+
40+
this.delegate = delegate
41+
42+
if (isShutDown.get()) {
43+
shutdownDelegate(delegate)
44+
}
45+
}
46+
}
47+
48+
/**
49+
* Buffers the given data if the delegate has not been set, otherwise exports the data to the delegate.
50+
*
51+
* @param data the data to buffer or export
52+
*/
53+
protected fun bufferOrDelegate(data: Collection<T>): CompletableResultCode =
54+
withDelegateOrNull {
55+
if (it != null) {
56+
exportToDelegate(it, data)
57+
} else {
58+
val amountToTake = bufferedSignals - buffer.size
59+
buffer.addAll(data.take(amountToTake))
60+
CompletableResultCode.ofSuccess()
61+
}
62+
}
63+
64+
/**
65+
* Executes the given block with the delegate if it has been set, otherwise executes the block with a null delegate.
66+
*
67+
* @param block the block to execute
68+
*/
69+
protected fun <R> withDelegateOrNull(block: (D?) -> R): R {
70+
delegate?.let { return block(it) }
71+
return synchronized(lock) { block(delegate) }
72+
}
73+
74+
open fun shutdown(): CompletableResultCode = bufferedShutDown()
75+
76+
protected abstract fun exportToDelegate(
77+
delegate: D,
78+
data: Collection<T>,
79+
): CompletableResultCode
80+
81+
protected abstract fun shutdownDelegate(delegate: D): CompletableResultCode
82+
83+
private fun flushToDelegate(delegate: D) {
84+
exportToDelegate(delegate, buffer)
85+
buffer.clear()
86+
buffer.trimToSize()
87+
}
88+
89+
private fun bufferedShutDown(): CompletableResultCode {
90+
isShutDown.set(true)
91+
92+
return withDelegateOrNull {
93+
if (it != null) {
94+
shutdownDelegate(it)
95+
} else {
96+
CompletableResultCode.ofSuccess()
97+
}
98+
}
99+
}
100+
}

0 commit comments

Comments
 (0)