Committed by
GitHub
Add first and last received times to TranscriptionSegment (#485)
正在显示
5 个修改的文件
包含
192 行增加
和
17 行删除
.changeset/sour-needles-drop.md
0 → 100644
| @@ -61,6 +61,7 @@ import livekit.LivekitRtc | @@ -61,6 +61,7 @@ import livekit.LivekitRtc | ||
| 61 | import livekit.org.webrtc.* | 61 | import livekit.org.webrtc.* |
| 62 | import livekit.org.webrtc.audio.AudioDeviceModule | 62 | import livekit.org.webrtc.audio.AudioDeviceModule |
| 63 | import java.net.URI | 63 | import java.net.URI |
| 64 | +import java.util.Date | ||
| 64 | import javax.inject.Named | 65 | import javax.inject.Named |
| 65 | 66 | ||
| 66 | class Room | 67 | class Room |
| @@ -271,6 +272,8 @@ constructor( | @@ -271,6 +272,8 @@ constructor( | ||
| 271 | private var regionUrlProvider: RegionUrlProvider? = null | 272 | private var regionUrlProvider: RegionUrlProvider? = null |
| 272 | private var regionUrl: String? = null | 273 | private var regionUrl: String? = null |
| 273 | 274 | ||
| 275 | + private var transcriptionReceivedTimes = mutableMapOf<String, Long>() | ||
| 276 | + | ||
| 274 | private fun getCurrentRoomOptions(): RoomOptions = | 277 | private fun getCurrentRoomOptions(): RoomOptions = |
| 275 | RoomOptions( | 278 | RoomOptions( |
| 276 | adaptiveStream = adaptiveStream, | 279 | adaptiveStream = adaptiveStream, |
| @@ -1131,10 +1134,24 @@ constructor( | @@ -1131,10 +1134,24 @@ constructor( | ||
| 1131 | * @suppress | 1134 | * @suppress |
| 1132 | */ | 1135 | */ |
| 1133 | override fun onTranscriptionReceived(transcription: LivekitModels.Transcription) { | 1136 | override fun onTranscriptionReceived(transcription: LivekitModels.Transcription) { |
| 1137 | + if (transcription.segmentsList.isEmpty()) { | ||
| 1138 | + LKLog.d { "Received transcription segments are empty." } | ||
| 1139 | + return | ||
| 1140 | + } | ||
| 1141 | + | ||
| 1134 | val participant = getParticipantByIdentity(transcription.transcribedParticipantIdentity) | 1142 | val participant = getParticipantByIdentity(transcription.transcribedParticipantIdentity) |
| 1135 | val publication = participant?.trackPublications?.get(transcription.trackId) | 1143 | val publication = participant?.trackPublications?.get(transcription.trackId) |
| 1136 | val segments = transcription.segmentsList | 1144 | val segments = transcription.segmentsList |
| 1137 | - .map { it.toSDKType() } | 1145 | + .map { it.toSDKType(firstReceivedTime = transcriptionReceivedTimes[it.id] ?: Date().time) } |
| 1146 | + | ||
| 1147 | + // Update receive times | ||
| 1148 | + for (segment in segments) { | ||
| 1149 | + if (segment.final) { | ||
| 1150 | + transcriptionReceivedTimes.remove(segment.id) | ||
| 1151 | + } else { | ||
| 1152 | + transcriptionReceivedTimes[segment.id] = segment.firstReceivedTime | ||
| 1153 | + } | ||
| 1154 | + } | ||
| 1138 | 1155 | ||
| 1139 | val event = RoomEvent.TranscriptionReceived( | 1156 | val event = RoomEvent.TranscriptionReceived( |
| 1140 | room = this, | 1157 | room = this, |
| @@ -18,27 +18,69 @@ package io.livekit.android.room.types | @@ -18,27 +18,69 @@ package io.livekit.android.room.types | ||
| 18 | 18 | ||
| 19 | import io.livekit.android.util.LKLog | 19 | import io.livekit.android.util.LKLog |
| 20 | import livekit.LivekitModels | 20 | import livekit.LivekitModels |
| 21 | +import java.util.Date | ||
| 21 | 22 | ||
| 22 | data class TranscriptionSegment( | 23 | data class TranscriptionSegment( |
| 24 | + /** | ||
| 25 | + * The id of the transcription segment. | ||
| 26 | + */ | ||
| 23 | val id: String, | 27 | val id: String, |
| 28 | + /** | ||
| 29 | + * The text of the transcription. | ||
| 30 | + */ | ||
| 24 | val text: String, | 31 | val text: String, |
| 32 | + /** | ||
| 33 | + * Language | ||
| 34 | + */ | ||
| 25 | val language: String, | 35 | val language: String, |
| 26 | - val startTime: Long, | ||
| 27 | - val endTime: Long, | 36 | + /** |
| 37 | + * If false, the user can expect this transcription to update in the future. | ||
| 38 | + */ | ||
| 28 | val final: Boolean, | 39 | val final: Boolean, |
| 40 | + /** | ||
| 41 | + * When this client first locally received this segment. | ||
| 42 | + * | ||
| 43 | + * Defined as milliseconds from epoch date (using [Date.getTime]) | ||
| 44 | + */ | ||
| 45 | + val firstReceivedTime: Long = Date().time, | ||
| 46 | + /** | ||
| 47 | + * When this client last locally received this segment. | ||
| 48 | + * | ||
| 49 | + * Defined as milliseconds from epoch date (using [Date.getTime]) | ||
| 50 | + */ | ||
| 51 | + val lastReceivedTime: Long = Date().time, | ||
| 29 | ) { | 52 | ) { |
| 30 | - override fun equals(other: Any?): Boolean { | ||
| 31 | - if (this === other) return true | ||
| 32 | - if (javaClass != other?.javaClass) return false | 53 | + override fun hashCode(): Int { |
| 54 | + return id.hashCode() | ||
| 55 | + } | ||
| 56 | +} | ||
| 33 | 57 | ||
| 34 | - other as TranscriptionSegment | 58 | +/** |
| 59 | + * Merges [newSegment] info into this segment if the ids are equal. | ||
| 60 | + * | ||
| 61 | + * Returns `this` if a different segment is passed. | ||
| 62 | + */ | ||
| 63 | +fun TranscriptionSegment?.merge(newSegment: TranscriptionSegment): TranscriptionSegment { | ||
| 64 | + if (this == null) { | ||
| 65 | + return newSegment | ||
| 66 | + } | ||
| 35 | 67 | ||
| 36 | - return id == other.id | 68 | + if (this.id != newSegment.id) { |
| 69 | + return this | ||
| 37 | } | 70 | } |
| 38 | 71 | ||
| 39 | - override fun hashCode(): Int { | ||
| 40 | - return id.hashCode() | 72 | + if (this.final) { |
| 73 | + LKLog.d { "new segment for $id overwriting final segment?" } | ||
| 41 | } | 74 | } |
| 75 | + | ||
| 76 | + return copy( | ||
| 77 | + id = this.id, | ||
| 78 | + text = newSegment.text, | ||
| 79 | + language = newSegment.language, | ||
| 80 | + final = newSegment.final, | ||
| 81 | + firstReceivedTime = this.firstReceivedTime, | ||
| 82 | + lastReceivedTime = newSegment.lastReceivedTime, | ||
| 83 | + ) | ||
| 42 | } | 84 | } |
| 43 | 85 | ||
| 44 | /** | 86 | /** |
| @@ -47,22 +89,18 @@ data class TranscriptionSegment( | @@ -47,22 +89,18 @@ data class TranscriptionSegment( | ||
| 47 | fun MutableMap<String, TranscriptionSegment>.mergeNewSegments(newSegments: Collection<TranscriptionSegment>) { | 89 | fun MutableMap<String, TranscriptionSegment>.mergeNewSegments(newSegments: Collection<TranscriptionSegment>) { |
| 48 | for (segment in newSegments) { | 90 | for (segment in newSegments) { |
| 49 | val existingSegment = get(segment.id) | 91 | val existingSegment = get(segment.id) |
| 50 | - if (existingSegment?.final == true) { | ||
| 51 | - LKLog.d { "new segment for ${segment.id} overwriting final segment?" } | ||
| 52 | - } | ||
| 53 | - put(segment.id, segment) | 92 | + put(segment.id, existingSegment.merge(segment)) |
| 54 | } | 93 | } |
| 55 | } | 94 | } |
| 56 | 95 | ||
| 57 | /** | 96 | /** |
| 58 | * @suppress | 97 | * @suppress |
| 59 | */ | 98 | */ |
| 60 | -fun LivekitModels.TranscriptionSegment.toSDKType() = | 99 | +fun LivekitModels.TranscriptionSegment.toSDKType(firstReceivedTime: Long = Date().time) = |
| 61 | TranscriptionSegment( | 100 | TranscriptionSegment( |
| 62 | id = id, | 101 | id = id, |
| 63 | text = text, | 102 | text = text, |
| 64 | language = language, | 103 | language = language, |
| 65 | - startTime = startTime, | ||
| 66 | - endTime = endTime, | ||
| 67 | final = final, | 104 | final = final, |
| 105 | + firstReceivedTime = firstReceivedTime, | ||
| 68 | ) | 106 | ) |
| @@ -33,7 +33,10 @@ import io.livekit.android.test.mock.MockPeerConnection | @@ -33,7 +33,10 @@ import io.livekit.android.test.mock.MockPeerConnection | ||
| 33 | import io.livekit.android.test.mock.TestData | 33 | import io.livekit.android.test.mock.TestData |
| 34 | import io.livekit.android.test.util.toDataChannelBuffer | 34 | import io.livekit.android.test.util.toDataChannelBuffer |
| 35 | import kotlinx.coroutines.ExperimentalCoroutinesApi | 35 | import kotlinx.coroutines.ExperimentalCoroutinesApi |
| 36 | +import kotlinx.coroutines.delay | ||
| 37 | +import kotlinx.coroutines.runBlocking | ||
| 36 | import org.junit.Assert.assertEquals | 38 | import org.junit.Assert.assertEquals |
| 39 | +import org.junit.Assert.assertTrue | ||
| 37 | import org.junit.Test | 40 | import org.junit.Test |
| 38 | 41 | ||
| 39 | @OptIn(ExperimentalCoroutinesApi::class) | 42 | @OptIn(ExperimentalCoroutinesApi::class) |
| @@ -97,4 +100,60 @@ class RoomTranscriptionMockE2ETest : MockE2ETest() { | @@ -97,4 +100,60 @@ class RoomTranscriptionMockE2ETest : MockE2ETest() { | ||
| 97 | assertIsClass(TrackPublicationEvent.TranscriptionReceived::class.java, publicationEvents[0]) | 100 | assertIsClass(TrackPublicationEvent.TranscriptionReceived::class.java, publicationEvents[0]) |
| 98 | } | 101 | } |
| 99 | } | 102 | } |
| 103 | + | ||
| 104 | + @Test | ||
| 105 | + fun transcriptionFirstReceivedStaysSame() = runTest { | ||
| 106 | + connect() | ||
| 107 | + room.localParticipant.publishAudioTrack( | ||
| 108 | + LocalAudioTrack( | ||
| 109 | + name = "", | ||
| 110 | + mediaTrack = MockAudioStreamTrack(id = TestData.LOCAL_TRACK_PUBLISHED.trackPublished.cid), | ||
| 111 | + options = LocalAudioTrackOptions(), | ||
| 112 | + audioProcessingController = MockAudioProcessingController(), | ||
| 113 | + dispatcher = coroutineRule.dispatcher, | ||
| 114 | + ), | ||
| 115 | + options = AudioTrackPublishOptions( | ||
| 116 | + source = Track.Source.MICROPHONE, | ||
| 117 | + ), | ||
| 118 | + ) | ||
| 119 | + val subPeerConnection = component.rtcEngine().getSubscriberPeerConnection() as MockPeerConnection | ||
| 120 | + val subDataChannel = MockDataChannel(RTCEngine.RELIABLE_DATA_CHANNEL_LABEL) | ||
| 121 | + subPeerConnection.observer?.onDataChannel(subDataChannel) | ||
| 122 | + | ||
| 123 | + val roomCollector = EventCollector(room.events, coroutineRule.scope) | ||
| 124 | + | ||
| 125 | + val firstDataBuffer = with(TestData.DATA_PACKET_TRANSCRIPTION.toBuilder()) { | ||
| 126 | + transcription = with(transcription.toBuilder()) { | ||
| 127 | + val firstSegment = with(getSegments(0).toBuilder()) { | ||
| 128 | + text = "first_text" | ||
| 129 | + language = "first_enUS" | ||
| 130 | + text = "This is a not a final transcription." | ||
| 131 | + final = false | ||
| 132 | + build() | ||
| 133 | + } | ||
| 134 | + clearSegments() | ||
| 135 | + addSegments(firstSegment) | ||
| 136 | + build() | ||
| 137 | + } | ||
| 138 | + build() | ||
| 139 | + }.toDataChannelBuffer() | ||
| 140 | + subDataChannel.observer?.onMessage(firstDataBuffer) | ||
| 141 | + | ||
| 142 | + runBlocking { | ||
| 143 | + delay(2) // to ensure start and end received times are different. | ||
| 144 | + } | ||
| 145 | + val dataBuffer = TestData.DATA_PACKET_TRANSCRIPTION.toDataChannelBuffer() | ||
| 146 | + subDataChannel.observer?.onMessage(dataBuffer) | ||
| 147 | + | ||
| 148 | + val roomEvents = roomCollector.stopCollecting() | ||
| 149 | + | ||
| 150 | + assertEquals(2, roomEvents.size) | ||
| 151 | + | ||
| 152 | + val first = (roomEvents[0] as RoomEvent.TranscriptionReceived).transcriptionSegments[0] | ||
| 153 | + val final = (roomEvents[1] as RoomEvent.TranscriptionReceived).transcriptionSegments[0] | ||
| 154 | + val expectedSegment = TestData.DATA_PACKET_TRANSCRIPTION.transcription.getSegments(0) | ||
| 155 | + assertEquals(expectedSegment.id, final.id) | ||
| 156 | + assertEquals(final.firstReceivedTime, first.firstReceivedTime) | ||
| 157 | + assertTrue(final.lastReceivedTime > final.firstReceivedTime) | ||
| 158 | + } | ||
| 100 | } | 159 | } |
livekit-android-test/src/test/java/io/livekit/android/room/types/TranscriptionSegmentTest.kt
0 → 100644
| 1 | +/* | ||
| 2 | + * Copyright 2024 LiveKit, Inc. | ||
| 3 | + * | ||
| 4 | + * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| 5 | + * you may not use this file except in compliance with the License. | ||
| 6 | + * You may obtain a copy of the License at | ||
| 7 | + * | ||
| 8 | + * http://www.apache.org/licenses/LICENSE-2.0 | ||
| 9 | + * | ||
| 10 | + * Unless required by applicable law or agreed to in writing, software | ||
| 11 | + * distributed under the License is distributed on an "AS IS" BASIS, | ||
| 12 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| 13 | + * See the License for the specific language governing permissions and | ||
| 14 | + * limitations under the License. | ||
| 15 | + */ | ||
| 16 | + | ||
| 17 | +package io.livekit.android.room.types | ||
| 18 | + | ||
| 19 | +import org.junit.Assert.assertEquals | ||
| 20 | +import org.junit.Test | ||
| 21 | + | ||
| 22 | +class TranscriptionSegmentTest { | ||
| 23 | + | ||
| 24 | + @Test | ||
| 25 | + fun mergeSegments() { | ||
| 26 | + val first = TranscriptionSegment( | ||
| 27 | + id = "1", | ||
| 28 | + text = "text", | ||
| 29 | + language = "language", | ||
| 30 | + final = false, | ||
| 31 | + firstReceivedTime = 0, | ||
| 32 | + lastReceivedTime = 0, | ||
| 33 | + ) | ||
| 34 | + | ||
| 35 | + val last = TranscriptionSegment( | ||
| 36 | + id = "1", | ||
| 37 | + text = "newtext", | ||
| 38 | + language = "newlanguage", | ||
| 39 | + final = true, | ||
| 40 | + firstReceivedTime = 100, | ||
| 41 | + lastReceivedTime = 100, | ||
| 42 | + ) | ||
| 43 | + | ||
| 44 | + val merged = first.merge(last) | ||
| 45 | + | ||
| 46 | + val expected = TranscriptionSegment( | ||
| 47 | + id = "1", | ||
| 48 | + text = "newtext", | ||
| 49 | + language = "newlanguage", | ||
| 50 | + final = true, | ||
| 51 | + firstReceivedTime = 0, | ||
| 52 | + lastReceivedTime = 100, | ||
| 53 | + ) | ||
| 54 | + assertEquals(expected, merged) | ||
| 55 | + } | ||
| 56 | +} |
-
请 注册 或 登录 后发表评论