davidliu
Committed by GitHub

Add first and last received times to TranscriptionSegment (#485)

  1 +---
  2 +"client-sdk-android": minor
  3 +---
  4 +
  5 +Add first and last received times to TranscriptionSegment
@@ -61,6 +61,7 @@ import livekit.LivekitRtc @@ -61,6 +61,7 @@ import livekit.LivekitRtc
61 import livekit.org.webrtc.* 61 import livekit.org.webrtc.*
62 import livekit.org.webrtc.audio.AudioDeviceModule 62 import livekit.org.webrtc.audio.AudioDeviceModule
63 import java.net.URI 63 import java.net.URI
  64 +import java.util.Date
64 import javax.inject.Named 65 import javax.inject.Named
65 66
66 class Room 67 class Room
@@ -271,6 +272,8 @@ constructor( @@ -271,6 +272,8 @@ constructor(
271 private var regionUrlProvider: RegionUrlProvider? = null 272 private var regionUrlProvider: RegionUrlProvider? = null
272 private var regionUrl: String? = null 273 private var regionUrl: String? = null
273 274
  275 + private var transcriptionReceivedTimes = mutableMapOf<String, Long>()
  276 +
274 private fun getCurrentRoomOptions(): RoomOptions = 277 private fun getCurrentRoomOptions(): RoomOptions =
275 RoomOptions( 278 RoomOptions(
276 adaptiveStream = adaptiveStream, 279 adaptiveStream = adaptiveStream,
@@ -1131,10 +1134,24 @@ constructor( @@ -1131,10 +1134,24 @@ constructor(
1131 * @suppress 1134 * @suppress
1132 */ 1135 */
1133 override fun onTranscriptionReceived(transcription: LivekitModels.Transcription) { 1136 override fun onTranscriptionReceived(transcription: LivekitModels.Transcription) {
  1137 + if (transcription.segmentsList.isEmpty()) {
  1138 + LKLog.d { "Received transcription segments are empty." }
  1139 + return
  1140 + }
  1141 +
1134 val participant = getParticipantByIdentity(transcription.transcribedParticipantIdentity) 1142 val participant = getParticipantByIdentity(transcription.transcribedParticipantIdentity)
1135 val publication = participant?.trackPublications?.get(transcription.trackId) 1143 val publication = participant?.trackPublications?.get(transcription.trackId)
1136 val segments = transcription.segmentsList 1144 val segments = transcription.segmentsList
1137 - .map { it.toSDKType() } 1145 + .map { it.toSDKType(firstReceivedTime = transcriptionReceivedTimes[it.id] ?: Date().time) }
  1146 +
  1147 + // Update receive times
  1148 + for (segment in segments) {
  1149 + if (segment.final) {
  1150 + transcriptionReceivedTimes.remove(segment.id)
  1151 + } else {
  1152 + transcriptionReceivedTimes[segment.id] = segment.firstReceivedTime
  1153 + }
  1154 + }
1138 1155
1139 val event = RoomEvent.TranscriptionReceived( 1156 val event = RoomEvent.TranscriptionReceived(
1140 room = this, 1157 room = this,
@@ -18,27 +18,69 @@ package io.livekit.android.room.types @@ -18,27 +18,69 @@ package io.livekit.android.room.types
18 18
19 import io.livekit.android.util.LKLog 19 import io.livekit.android.util.LKLog
20 import livekit.LivekitModels 20 import livekit.LivekitModels
  21 +import java.util.Date
21 22
22 data class TranscriptionSegment( 23 data class TranscriptionSegment(
  24 + /**
  25 + * The id of the transcription segment.
  26 + */
23 val id: String, 27 val id: String,
  28 + /**
  29 + * The text of the transcription.
  30 + */
24 val text: String, 31 val text: String,
  32 + /**
  33 + * Language
  34 + */
25 val language: String, 35 val language: String,
26 - val startTime: Long,  
27 - val endTime: Long, 36 + /**
  37 + * If false, the user can expect this transcription to update in the future.
  38 + */
28 val final: Boolean, 39 val final: Boolean,
  40 + /**
  41 + * When this client first locally received this segment.
  42 + *
  43 + * Defined as milliseconds from epoch date (using [Date.getTime])
  44 + */
  45 + val firstReceivedTime: Long = Date().time,
  46 + /**
  47 + * When this client last locally received this segment.
  48 + *
  49 + * Defined as milliseconds from epoch date (using [Date.getTime])
  50 + */
  51 + val lastReceivedTime: Long = Date().time,
29 ) { 52 ) {
30 - override fun equals(other: Any?): Boolean {  
31 - if (this === other) return true  
32 - if (javaClass != other?.javaClass) return false 53 + override fun hashCode(): Int {
  54 + return id.hashCode()
  55 + }
  56 +}
33 57
34 - other as TranscriptionSegment 58 +/**
  59 + * Merges [newSegment] info into this segment if the ids are equal.
  60 + *
  61 + * Returns `this` if a different segment is passed.
  62 + */
  63 +fun TranscriptionSegment?.merge(newSegment: TranscriptionSegment): TranscriptionSegment {
  64 + if (this == null) {
  65 + return newSegment
  66 + }
35 67
36 - return id == other.id 68 + if (this.id != newSegment.id) {
  69 + return this
37 } 70 }
38 71
39 - override fun hashCode(): Int {  
40 - return id.hashCode() 72 + if (this.final) {
  73 + LKLog.d { "new segment for $id overwriting final segment?" }
41 } 74 }
  75 +
  76 + return copy(
  77 + id = this.id,
  78 + text = newSegment.text,
  79 + language = newSegment.language,
  80 + final = newSegment.final,
  81 + firstReceivedTime = this.firstReceivedTime,
  82 + lastReceivedTime = newSegment.lastReceivedTime,
  83 + )
42 } 84 }
43 85
44 /** 86 /**
@@ -47,22 +89,18 @@ data class TranscriptionSegment( @@ -47,22 +89,18 @@ data class TranscriptionSegment(
47 fun MutableMap<String, TranscriptionSegment>.mergeNewSegments(newSegments: Collection<TranscriptionSegment>) { 89 fun MutableMap<String, TranscriptionSegment>.mergeNewSegments(newSegments: Collection<TranscriptionSegment>) {
48 for (segment in newSegments) { 90 for (segment in newSegments) {
49 val existingSegment = get(segment.id) 91 val existingSegment = get(segment.id)
50 - if (existingSegment?.final == true) {  
51 - LKLog.d { "new segment for ${segment.id} overwriting final segment?" }  
52 - }  
53 - put(segment.id, segment) 92 + put(segment.id, existingSegment.merge(segment))
54 } 93 }
55 } 94 }
56 95
57 /** 96 /**
58 * @suppress 97 * @suppress
59 */ 98 */
60 -fun LivekitModels.TranscriptionSegment.toSDKType() = 99 +fun LivekitModels.TranscriptionSegment.toSDKType(firstReceivedTime: Long = Date().time) =
61 TranscriptionSegment( 100 TranscriptionSegment(
62 id = id, 101 id = id,
63 text = text, 102 text = text,
64 language = language, 103 language = language,
65 - startTime = startTime,  
66 - endTime = endTime,  
67 final = final, 104 final = final,
  105 + firstReceivedTime = firstReceivedTime,
68 ) 106 )
@@ -33,7 +33,10 @@ import io.livekit.android.test.mock.MockPeerConnection @@ -33,7 +33,10 @@ import io.livekit.android.test.mock.MockPeerConnection
33 import io.livekit.android.test.mock.TestData 33 import io.livekit.android.test.mock.TestData
34 import io.livekit.android.test.util.toDataChannelBuffer 34 import io.livekit.android.test.util.toDataChannelBuffer
35 import kotlinx.coroutines.ExperimentalCoroutinesApi 35 import kotlinx.coroutines.ExperimentalCoroutinesApi
  36 +import kotlinx.coroutines.delay
  37 +import kotlinx.coroutines.runBlocking
36 import org.junit.Assert.assertEquals 38 import org.junit.Assert.assertEquals
  39 +import org.junit.Assert.assertTrue
37 import org.junit.Test 40 import org.junit.Test
38 41
39 @OptIn(ExperimentalCoroutinesApi::class) 42 @OptIn(ExperimentalCoroutinesApi::class)
@@ -97,4 +100,60 @@ class RoomTranscriptionMockE2ETest : MockE2ETest() { @@ -97,4 +100,60 @@ class RoomTranscriptionMockE2ETest : MockE2ETest() {
97 assertIsClass(TrackPublicationEvent.TranscriptionReceived::class.java, publicationEvents[0]) 100 assertIsClass(TrackPublicationEvent.TranscriptionReceived::class.java, publicationEvents[0])
98 } 101 }
99 } 102 }
  103 +
  104 + @Test
  105 + fun transcriptionFirstReceivedStaysSame() = runTest {
  106 + connect()
  107 + room.localParticipant.publishAudioTrack(
  108 + LocalAudioTrack(
  109 + name = "",
  110 + mediaTrack = MockAudioStreamTrack(id = TestData.LOCAL_TRACK_PUBLISHED.trackPublished.cid),
  111 + options = LocalAudioTrackOptions(),
  112 + audioProcessingController = MockAudioProcessingController(),
  113 + dispatcher = coroutineRule.dispatcher,
  114 + ),
  115 + options = AudioTrackPublishOptions(
  116 + source = Track.Source.MICROPHONE,
  117 + ),
  118 + )
  119 + val subPeerConnection = component.rtcEngine().getSubscriberPeerConnection() as MockPeerConnection
  120 + val subDataChannel = MockDataChannel(RTCEngine.RELIABLE_DATA_CHANNEL_LABEL)
  121 + subPeerConnection.observer?.onDataChannel(subDataChannel)
  122 +
  123 + val roomCollector = EventCollector(room.events, coroutineRule.scope)
  124 +
  125 + val firstDataBuffer = with(TestData.DATA_PACKET_TRANSCRIPTION.toBuilder()) {
  126 + transcription = with(transcription.toBuilder()) {
  127 + val firstSegment = with(getSegments(0).toBuilder()) {
  128 + text = "first_text"
  129 + language = "first_enUS"
  130 + text = "This is a not a final transcription."
  131 + final = false
  132 + build()
  133 + }
  134 + clearSegments()
  135 + addSegments(firstSegment)
  136 + build()
  137 + }
  138 + build()
  139 + }.toDataChannelBuffer()
  140 + subDataChannel.observer?.onMessage(firstDataBuffer)
  141 +
  142 + runBlocking {
  143 + delay(2) // to ensure start and end received times are different.
  144 + }
  145 + val dataBuffer = TestData.DATA_PACKET_TRANSCRIPTION.toDataChannelBuffer()
  146 + subDataChannel.observer?.onMessage(dataBuffer)
  147 +
  148 + val roomEvents = roomCollector.stopCollecting()
  149 +
  150 + assertEquals(2, roomEvents.size)
  151 +
  152 + val first = (roomEvents[0] as RoomEvent.TranscriptionReceived).transcriptionSegments[0]
  153 + val final = (roomEvents[1] as RoomEvent.TranscriptionReceived).transcriptionSegments[0]
  154 + val expectedSegment = TestData.DATA_PACKET_TRANSCRIPTION.transcription.getSegments(0)
  155 + assertEquals(expectedSegment.id, final.id)
  156 + assertEquals(final.firstReceivedTime, first.firstReceivedTime)
  157 + assertTrue(final.lastReceivedTime > final.firstReceivedTime)
  158 + }
100 } 159 }
  1 +/*
  2 + * Copyright 2024 LiveKit, Inc.
  3 + *
  4 + * Licensed under the Apache License, Version 2.0 (the "License");
  5 + * you may not use this file except in compliance with the License.
  6 + * You may obtain a copy of the License at
  7 + *
  8 + * http://www.apache.org/licenses/LICENSE-2.0
  9 + *
  10 + * Unless required by applicable law or agreed to in writing, software
  11 + * distributed under the License is distributed on an "AS IS" BASIS,
  12 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 + * See the License for the specific language governing permissions and
  14 + * limitations under the License.
  15 + */
  16 +
  17 +package io.livekit.android.room.types
  18 +
  19 +import org.junit.Assert.assertEquals
  20 +import org.junit.Test
  21 +
  22 +class TranscriptionSegmentTest {
  23 +
  24 + @Test
  25 + fun mergeSegments() {
  26 + val first = TranscriptionSegment(
  27 + id = "1",
  28 + text = "text",
  29 + language = "language",
  30 + final = false,
  31 + firstReceivedTime = 0,
  32 + lastReceivedTime = 0,
  33 + )
  34 +
  35 + val last = TranscriptionSegment(
  36 + id = "1",
  37 + text = "newtext",
  38 + language = "newlanguage",
  39 + final = true,
  40 + firstReceivedTime = 100,
  41 + lastReceivedTime = 100,
  42 + )
  43 +
  44 + val merged = first.merge(last)
  45 +
  46 + val expected = TranscriptionSegment(
  47 + id = "1",
  48 + text = "newtext",
  49 + language = "newlanguage",
  50 + final = true,
  51 + firstReceivedTime = 0,
  52 + lastReceivedTime = 100,
  53 + )
  54 + assertEquals(expected, merged)
  55 + }
  56 +}