@@ -18,9 +18,11 @@ import {
18
18
TrackPublishOptions ,
19
19
TrackSource ,
20
20
} from '@livekit/rtc-node' ;
21
+ import { randomUUID } from 'node:crypto' ;
21
22
import { EventEmitter } from 'node:events' ;
22
23
import { AudioByteStream } from '../audio.js' ;
23
24
import {
25
+ ATTRIBUTE_SEGMENT_ID ,
24
26
ATTRIBUTE_TRANSCRIPTION_FINAL ,
25
27
ATTRIBUTE_TRANSCRIPTION_TRACK_ID ,
26
28
TOPIC_TRANSCRIPTION ,
@@ -72,6 +74,7 @@ export class MultimodalAgent extends EventEmitter {
72
74
73
75
#textResponseRetries = 0 ;
74
76
#maxTextResponseRetries: number ;
77
+ #transcriptionId?: string ;
75
78
76
79
constructor ( {
77
80
model,
@@ -257,13 +260,20 @@ export class MultimodalAgent extends EventEmitter {
257
260
258
261
const synchronizer = new TextAudioSynchronizer ( defaultTextSyncOptions ) ;
259
262
synchronizer . on ( 'textUpdated' , async ( text ) => {
263
+ if ( ! this . #transcriptionId) {
264
+ this . #transcriptionId = randomUUID ( ) ;
265
+ }
260
266
await this . #publishTranscription(
261
267
this . room ! . localParticipant ! . identity ! ,
262
268
this . #getLocalTrackSid( ) ! ,
263
269
text . text ,
264
270
text . final ,
265
271
text . id ,
272
+ this . #transcriptionId,
266
273
) ;
274
+ if ( text . final ) {
275
+ this . #transcriptionId = undefined ;
276
+ }
267
277
} ) ;
268
278
269
279
const handle = this . #agentPlayout?. play (
@@ -312,7 +322,17 @@ export class MultimodalAgent extends EventEmitter {
312
322
const participantIdentity = this . linkedParticipant ?. identity ;
313
323
const trackSid = this . subscribedTrack ?. sid ;
314
324
if ( participantIdentity && trackSid ) {
315
- await this . #publishTranscription( participantIdentity , trackSid , '…' , false , ev . itemId ) ;
325
+ if ( ! this . #transcriptionId) {
326
+ this . #transcriptionId = randomUUID ( ) ;
327
+ }
328
+ await this . #publishTranscription(
329
+ participantIdentity ,
330
+ trackSid ,
331
+ '…' ,
332
+ false ,
333
+ ev . itemId ,
334
+ this . #transcriptionId,
335
+ ) ;
316
336
} else {
317
337
this . #logger. error ( 'Participant or track not set' ) ;
318
338
}
@@ -325,13 +345,18 @@ export class MultimodalAgent extends EventEmitter {
325
345
const participantIdentity = this . linkedParticipant ?. identity ;
326
346
const trackSid = this . subscribedTrack ?. sid ;
327
347
if ( participantIdentity && trackSid ) {
348
+ if ( ! this . #transcriptionId) {
349
+ this . #transcriptionId = randomUUID ( ) ;
350
+ }
328
351
await this . #publishTranscription(
329
352
participantIdentity ,
330
353
trackSid ,
331
354
transcription ,
332
355
true ,
333
356
ev . itemId ,
357
+ this . #transcriptionId,
334
358
) ;
359
+ this . #transcriptionId = undefined ;
335
360
} else {
336
361
this . #logger. error ( 'Participant or track not set' ) ;
337
362
}
@@ -360,7 +385,17 @@ export class MultimodalAgent extends EventEmitter {
360
385
const participantIdentity = this . linkedParticipant ?. identity ;
361
386
const trackSid = this . subscribedTrack ?. sid ;
362
387
if ( participantIdentity && trackSid ) {
363
- await this . #publishTranscription( participantIdentity , trackSid , '…' , false , ev . itemId ) ;
388
+ if ( ! this . #transcriptionId) {
389
+ this . #transcriptionId = randomUUID ( ) ;
390
+ }
391
+ await this . #publishTranscription(
392
+ participantIdentity ,
393
+ trackSid ,
394
+ '…' ,
395
+ false ,
396
+ ev . itemId ,
397
+ this . #transcriptionId,
398
+ ) ;
364
399
}
365
400
} ) ;
366
401
@@ -492,6 +527,7 @@ export class MultimodalAgent extends EventEmitter {
492
527
text : string ,
493
528
isFinal : boolean ,
494
529
id : string ,
530
+ segmentId : string ,
495
531
) : Promise < void > {
496
532
this . #logger. debug (
497
533
`Publishing transcription ${ participantIdentity } ${ trackSid } ${ text } ${ isFinal } ${ id } ` ,
@@ -522,6 +558,7 @@ export class MultimodalAgent extends EventEmitter {
522
558
attributes : {
523
559
[ ATTRIBUTE_TRANSCRIPTION_TRACK_ID ] : trackSid ,
524
560
[ ATTRIBUTE_TRANSCRIPTION_FINAL ] : isFinal . toString ( ) ,
561
+ [ ATTRIBUTE_SEGMENT_ID ] : segmentId ,
525
562
} ,
526
563
} ) ;
527
564
await stream . write ( text ) ;
0 commit comments