davidliu
Committed by GitHub

Add simulcastLayers to VideoTrackPublishOptions for directly specifying the reso…

…lutions to use (#746)

Also fixes for choosing default encodings
---
"client-sdk-android": patch
---
Fix default simulcast layers using a lower than intended resolution
... ...
---
"client-sdk-android": patch
---
Properly use screenShareTrackPublishDefaults when manually publishing a screenshare track
... ...
---
"client-sdk-android": minor
---
Add simulcastLayers to VideoTrackPublishOptions for directly specifying the resolutions to use
... ...
... ... @@ -6,4 +6,11 @@
<module name="livekit-android.examples.selfie-segmentation" target="17" />
</bytecodeTargetLevel>
</component>
<component name="JavacSettings">
<option name="ADDITIONAL_OPTIONS_OVERRIDE">
<module name="livekit-android.livekit-lint" options="-proc:none" />
<module name="livekit-android.livekit-lint.main" options="-proc:none" />
<module name="livekit-android.livekit-lint.test" options="-proc:none" />
</option>
</component>
</project>
\ No newline at end of file
... ...
... ... @@ -51,6 +51,7 @@ import io.livekit.android.room.track.TrackPublication
import io.livekit.android.room.track.VideoCaptureParameter
import io.livekit.android.room.track.VideoCodec
import io.livekit.android.room.track.VideoEncoding
import io.livekit.android.room.track.VideoPreset
import io.livekit.android.room.track.screencapture.ScreenCaptureParams
import io.livekit.android.room.util.EncodingUtils
import io.livekit.android.rpc.RpcError
... ... @@ -480,7 +481,10 @@ internal constructor(
*/
suspend fun publishVideoTrack(
track: LocalVideoTrack,
options: VideoTrackPublishOptions = VideoTrackPublishOptions(null, videoTrackPublishDefaults),
options: VideoTrackPublishOptions = VideoTrackPublishOptions(
null,
if (track.options.isScreencast) screenShareTrackPublishDefaults else videoTrackPublishDefaults,
),
publishListener: PublishListener? = null,
): Boolean {
@Suppress("NAME_SHADOWING") var options = options
... ... @@ -514,7 +518,7 @@ internal constructor(
options = options.copy(scalabilityMode = "L3T3_KEY")
}
}
val encodings = computeVideoEncodings(track.dimensions, options)
val encodings = computeVideoEncodings(track.options.isScreencast, track.dimensions, options)
val videoLayers =
EncodingUtils.videoLayersFromEncodings(track.dimensions.width, track.dimensions.height, encodings, isSVC)
... ... @@ -722,7 +726,10 @@ internal constructor(
options = options.copy(videoCodec = updatedCodec)
// recompute encodings since bitrates/etc could have changed
encodings = computeVideoEncodings((track as LocalVideoTrack).dimensions, options)
val videoTrack = track as LocalVideoTrack
encodings = computeVideoEncodings(videoTrack.options.isScreencast, videoTrack.dimensions, options)
encodings // encodings is used in negotiate, this suppresses unused lint
}
}
}
... ... @@ -751,35 +758,32 @@ internal constructor(
}
private fun computeVideoEncodings(
isScreenShare: Boolean,
dimensions: Track.Dimensions,
options: VideoTrackPublishOptions,
): List<RtpParameters.Encoding> {
val (width, height) = dimensions
var encoding = options.videoEncoding
var originalEncoding = options.videoEncoding
val simulcast = options.simulcast
val scalabilityMode = options.scalabilityMode
if ((encoding == null && !simulcast) || width == 0 || height == 0) {
if ((originalEncoding == null && !simulcast) || width == 0 || height == 0) {
return emptyList()
}
if (encoding == null) {
encoding = EncodingUtils.determineAppropriateEncoding(width, height)
LKLog.d { "using video encoding: $encoding" }
if (originalEncoding == null) {
originalEncoding = EncodingUtils.determineAppropriateEncoding(isScreenShare, width, height)
LKLog.d { "using video encoding: $originalEncoding" }
}
val encodings = mutableListOf<RtpParameters.Encoding>()
if (scalabilityMode != null && isSVCCodec(options.videoCodec)) {
val rtpEncoding = encoding.toRtpEncoding()
val rtpEncoding = originalEncoding.toRtpEncoding()
rtpEncoding.scalabilityMode = scalabilityMode
encodings.add(rtpEncoding)
return encodings
} else if (simulcast) {
val presets = EncodingUtils.presetsForResolution(width, height)
val midPreset = presets[1]
val lowPreset = presets[0]
fun addEncoding(videoEncoding: VideoEncoding, scale: Double) {
if (scale < 1.0) {
LKLog.w { "Discarding encoding with a scale < 1.0: $scale." }
... ... @@ -793,27 +797,42 @@ internal constructor(
encodings.add(videoEncoding.toRtpEncoding(rid, scale))
}
val presets = options.simulcastLayers
?: EncodingUtils.defaultSimulcastLayers(
isScreenShare = isScreenShare,
width = width,
height = height,
originalEncoding = originalEncoding,
)
if (presets.isEmpty()) {
LKLog.w { "Simulcast is enabled but an empty list was set for simulcastLayers!" }
}
// if resolution is high enough, we send both h and q res.
// otherwise only send h
val size = max(width, height)
val maxFps = encoding.maxFps
val maxFps = originalEncoding.maxFps
fun calculateScaleDown(captureParam: VideoCaptureParameter): Double {
val targetSize = max(captureParam.width, captureParam.height)
return size / targetSize.toDouble()
}
if (size >= 960) {
val lowScale = calculateScaleDown(lowPreset.capture)
val midScale = calculateScaleDown(midPreset.capture)
addEncoding(lowPreset.encoding.copy(maxFps = min(lowPreset.encoding.maxFps, maxFps)), lowScale)
addEncoding(midPreset.encoding.copy(maxFps = min(midPreset.encoding.maxFps, maxFps)), midScale)
} else {
// Add encodings from smallest to largest.
val orderedPresets = presets.sortedByDescending { calculateScaleDown(it.capture) }
val lowPreset = orderedPresets.getOrNull(0)
val midPreset = orderedPresets.getOrNull(1)
if (size >= 480 && lowPreset != null) {
val lowScale = calculateScaleDown(lowPreset.capture)
addEncoding(lowPreset.encoding.copy(maxFps = min(lowPreset.encoding.maxFps, maxFps)), lowScale)
}
addEncoding(encoding, 1.0)
if (size >= 960 && midPreset != null) {
val midScale = calculateScaleDown(midPreset.capture)
addEncoding(midPreset.encoding.copy(maxFps = min(midPreset.encoding.maxFps, maxFps)), midScale)
}
addEncoding(originalEncoding, 1.0)
} else {
encodings.add(encoding.toRtpEncoding())
encodings.add(originalEncoding.toRtpEncoding())
}
// Make largest size at front. addTransceiver seems to fail if ordered from smallest to largest.
... ... @@ -838,7 +857,7 @@ internal constructor(
videoCodec = videoCodec.codecName,
videoEncoding = options.backupCodec!!.encoding,
)
val backupEncodings = computeVideoEncodings(track.dimensions, backupOptions)
val backupEncodings = computeVideoEncodings(track.options.isScreencast, track.dimensions, backupOptions)
return backupOptions to backupEncodings
}
... ... @@ -1713,8 +1732,19 @@ abstract class BaseVideoTrackPublishOptions {
* null value indicates default value (maintain framerate).
*/
abstract val degradationPreference: RtpParameters.DegradationPreference?
/**
* Up to two additional simulcast layers to publish in addition to the original
* Track. Layers should be ordered from smallest to largest. Layers beyond the
* first two will be ignored. Any layers that have larger resolutions than the
* source resolution will also be ignored.
*
* When set to null, it defaults to H180 and H360.
*/
abstract val simulcastLayers: List<VideoPreset>?
}
// Remember when adding any defaults to add it in the copy constructor of VideoTrackPublishOptions.
data class VideoTrackPublishDefaults(
override val videoEncoding: VideoEncoding? = null,
override val simulcast: Boolean = true,
... ... @@ -1722,6 +1752,7 @@ data class VideoTrackPublishDefaults(
override val scalabilityMode: String? = null,
override val backupCodec: BackupVideoCodec? = null,
override val degradationPreference: RtpParameters.DegradationPreference? = null,
override val simulcastLayers: List<VideoPreset>? = null,
) : BaseVideoTrackPublishOptions()
data class VideoTrackPublishOptions(
... ... @@ -1734,6 +1765,7 @@ data class VideoTrackPublishOptions(
override val source: Track.Source? = null,
override val stream: String? = null,
override val degradationPreference: RtpParameters.DegradationPreference? = null,
override val simulcastLayers: List<VideoPreset>? = null,
) : BaseVideoTrackPublishOptions(), TrackPublishOptions {
constructor(
name: String? = null,
... ... @@ -1750,6 +1782,7 @@ data class VideoTrackPublishOptions(
source = source,
stream = stream,
degradationPreference = base.degradationPreference,
simulcastLayers = base.simulcastLayers,
)
fun createBackupOptions(): VideoTrackPublishOptions? {
... ... @@ -1798,6 +1831,7 @@ enum class AudioPresets(
MUSIC_HIGH_QUALITY_STEREO(128_000)
}
// Remember when adding any defaults to add it in the copy constructor of VideoTrackPublishOptions.
/**
* Default options for publishing an audio track.
*/
... ...
... ... @@ -19,6 +19,9 @@ package io.livekit.android.room.track
import livekit.org.webrtc.RtpParameters
data class LocalVideoTrackOptions(
/**
* Whether this is a screenshare track.
*/
val isScreencast: Boolean = false,
/**
* Preferred deviceId to capture from. If not set or found,
... ... @@ -110,6 +113,11 @@ interface VideoPreset {
val encoding: VideoEncoding
}
data class CustomVideoPreset(
override val capture: VideoCaptureParameter,
override val encoding: VideoEncoding,
) : VideoPreset
/**
* 16:9 Video presets along with suggested bitrates
*/
... ...
/*
* Copyright 2023-2024 LiveKit, Inc.
* Copyright 2023-2025 LiveKit, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
... ... @@ -16,6 +16,9 @@
package io.livekit.android.room.util
import io.livekit.android.room.track.CustomVideoPreset
import io.livekit.android.room.track.ScreenSharePresets
import io.livekit.android.room.track.VideoCaptureParameter
import io.livekit.android.room.track.VideoEncoding
import io.livekit.android.room.track.VideoPreset
import io.livekit.android.room.track.VideoPreset169
... ... @@ -35,6 +38,15 @@ internal object EncodingUtils {
val VIDEO_RIDS = arrayOf("q", "h", "f")
// Note: maintain order from smallest to biggest.
private val SCREENSHARE_PRESETS = listOf(
ScreenSharePresets.H360_FPS3,
ScreenSharePresets.H720_FPS5,
ScreenSharePresets.H720_FPS15,
ScreenSharePresets.H1080_FPS15,
ScreenSharePresets.H1080_FPS30,
)
// Note: maintain order from smallest to biggest.
private val PRESETS_16_9 = listOf(
VideoPreset169.H90,
VideoPreset169.H180,
... ... @@ -60,8 +72,20 @@ internal object EncodingUtils {
VideoPreset43.H1440,
)
fun determineAppropriateEncoding(width: Int, height: Int): VideoEncoding {
val presets = presetsForResolution(width, height)
// Note: maintain order from smallest to biggest.
private val DEFAULT_SIMULCAST_LAYERS_169 = listOf(
VideoPreset169.H180,
VideoPreset169.H360,
)
// Note: maintain order from smallest to biggest.
private val DEFAULT_SIMULCAST_LAYERS_43 = listOf(
VideoPreset43.H180,
VideoPreset43.H360,
)
fun determineAppropriateEncoding(isScreenShare: Boolean, width: Int, height: Int): VideoEncoding {
val presets = computeSuggestedPresets(isScreenShare, width, height)
// presets assume width is longest size
val longestSize = max(width, height)
... ... @@ -72,7 +96,11 @@ internal object EncodingUtils {
return preset.encoding
}
fun presetsForResolution(width: Int, height: Int): List<VideoPreset> {
fun computeSuggestedPresets(isScreenShare: Boolean, width: Int, height: Int): List<VideoPreset> {
if (isScreenShare) {
return SCREENSHARE_PRESETS
}
val longestSize = max(width, height)
val shortestSize = min(width, height)
val aspectRatio = longestSize.toFloat() / shortestSize
... ... @@ -83,6 +111,41 @@ internal object EncodingUtils {
}
}
fun defaultSimulcastLayers(isScreenShare: Boolean, width: Int, height: Int, originalEncoding: VideoEncoding): List<VideoPreset> {
if (isScreenShare) {
return computeDefaultScreenshareSimulcastLayers(width, height, originalEncoding)
}
val longestSize = max(width, height)
val shortestSize = min(width, height)
val aspectRatio = longestSize.toFloat() / shortestSize
return if (abs(aspectRatio - 16f / 9f) < abs(aspectRatio - 4f / 3f)) {
DEFAULT_SIMULCAST_LAYERS_169
} else {
DEFAULT_SIMULCAST_LAYERS_43
}
}
fun computeDefaultScreenshareSimulcastLayers(width: Int, height: Int, originalEncoding: VideoEncoding): List<VideoPreset> {
// pairs of ScaleDownBy to FPS
val layers = listOf(2 to 3)
return layers.map { (scaleDownBy, fps) ->
CustomVideoPreset(
capture = VideoCaptureParameter(
width = width / scaleDownBy,
height = height / scaleDownBy,
maxFps = fps,
adaptOutputToDimensions = false,
),
encoding = VideoEncoding(
maxBitrate = originalEncoding.maxBitrate /
(scaleDownBy.toFloat().pow(2).roundToInt() * (originalEncoding.maxFps / fps)),
maxFps = fps,
),
)
}
}
fun videoLayersFromEncodings(
trackWidth: Int,
trackHeight: Int,
... ...
... ... @@ -28,10 +28,12 @@ import io.livekit.android.room.DefaultsManager
import io.livekit.android.room.RTCEngine
import io.livekit.android.room.track.LocalVideoTrack
import io.livekit.android.room.track.LocalVideoTrackOptions
import io.livekit.android.room.track.ScreenSharePresets
import io.livekit.android.room.track.Track
import io.livekit.android.room.track.TrackException
import io.livekit.android.room.track.VideoCaptureParameter
import io.livekit.android.room.track.VideoCodec
import io.livekit.android.room.track.VideoPreset169
import io.livekit.android.test.MockE2ETest
import io.livekit.android.test.assert.assertIsClassList
import io.livekit.android.test.coroutines.toListUntilSignal
... ... @@ -288,15 +290,15 @@ class LocalParticipantMockE2ETest : MockE2ETest() {
)
}
private fun createLocalTrack() = LocalVideoTrack(
private fun createLocalTrack(width: Int = 1280, height: Int = 720, isScreencast: Boolean = false) = LocalVideoTrack(
capturer = MockVideoCapturer(),
source = mock(VideoSource::class.java),
name = "",
options = LocalVideoTrackOptions(
isScreencast = false,
isScreencast = isScreencast,
deviceId = null,
position = null,
captureParams = VideoCaptureParameter(width = 0, height = 0, maxFps = 0),
captureParams = VideoCaptureParameter(width = width, height = height, maxFps = 30),
),
rtcTrack = MockVideoStreamTrack(),
peerConnectionFactory = component.peerConnectionFactory(),
... ... @@ -307,6 +309,219 @@ class LocalParticipantMockE2ETest : MockE2ETest() {
)
@Test
fun publishSimulcastDefaultLayers() = runTest {
connect()
val wsFactory = component.websocketFactory()
wsFactory.ws.clearRequests()
room.localParticipant.publishVideoTrack(track = createLocalTrack(width = 1280, height = 720))
testScheduler.advanceUntilIdle()
val sentRequests = wsFactory.ws.sentRequests
assertEquals(1, sentRequests.size)
assertTrue(
sentRequests.any { requestString ->
val sentRequest = LivekitRtc.SignalRequest.newBuilder()
.mergeFrom(requestString.toPBByteString())
.build()
if (sentRequest.hasAddTrack()) {
val addTrackRequest = sentRequest.addTrack
println(addTrackRequest)
if (addTrackRequest.type == LivekitModels.TrackType.VIDEO) {
val layerList = addTrackRequest.layersList
var correctLayers = layerList.size == 3
correctLayers = correctLayers && layerList.any { layer ->
// original
layer.quality == LivekitModels.VideoQuality.HIGH &&
layer.bitrate == VideoPreset169.H720.encoding.maxBitrate &&
layer.height == 720 &&
layer.width == 1280
}
correctLayers = correctLayers && layerList.any { layer ->
// default H360
layer.quality == LivekitModels.VideoQuality.MEDIUM &&
layer.bitrate == VideoPreset169.H360.encoding.maxBitrate &&
layer.height == VideoPreset169.H360.capture.height &&
layer.width == VideoPreset169.H360.capture.width
}
correctLayers = correctLayers && layerList.any { layer ->
// default H180
layer.quality == LivekitModels.VideoQuality.LOW &&
layer.bitrate == VideoPreset169.H180.encoding.maxBitrate &&
layer.height == VideoPreset169.H180.capture.height &&
layer.width == VideoPreset169.H180.capture.width
}
return@any correctLayers
}
}
return@any false
},
)
}
@Test
fun publishSimulcastCustomLayers() = runTest {
room.videoTrackPublishDefaults = room.videoTrackPublishDefaults.copy(
simulcastLayers = listOf(VideoPreset169.H540, VideoPreset169.H90),
)
connect()
val wsFactory = component.websocketFactory()
wsFactory.ws.clearRequests()
room.localParticipant.publishVideoTrack(track = createLocalTrack(width = 1920, height = 1080))
testScheduler.advanceUntilIdle()
val sentRequests = wsFactory.ws.sentRequests
assertEquals(1, sentRequests.size)
assertTrue(
sentRequests.any { requestString ->
val sentRequest = LivekitRtc.SignalRequest.newBuilder()
.mergeFrom(requestString.toPBByteString())
.build()
if (sentRequest.hasAddTrack()) {
val addTrackRequest = sentRequest.addTrack
println(addTrackRequest)
if (addTrackRequest.type == LivekitModels.TrackType.VIDEO) {
val layerList = addTrackRequest.layersList
var correctLayers = layerList.size == 3
correctLayers = correctLayers && layerList.any { layer ->
layer.quality == LivekitModels.VideoQuality.HIGH &&
layer.bitrate == VideoPreset169.H1080.encoding.maxBitrate &&
layer.height == VideoPreset169.H1080.capture.height &&
layer.width == VideoPreset169.H1080.capture.width
}
correctLayers = correctLayers && layerList.any { layer ->
layer.quality == LivekitModels.VideoQuality.MEDIUM &&
layer.bitrate == VideoPreset169.H540.encoding.maxBitrate &&
layer.height == VideoPreset169.H540.capture.height &&
layer.width == VideoPreset169.H540.capture.width
}
correctLayers = correctLayers && layerList.any { layer ->
layer.quality == LivekitModels.VideoQuality.LOW &&
layer.bitrate == VideoPreset169.H90.encoding.maxBitrate &&
layer.height == VideoPreset169.H90.capture.height &&
layer.width == VideoPreset169.H90.capture.width
}
return@any correctLayers
}
}
return@any false
},
)
}
@Test
fun publishSimulcastLargerLayersIgnored() = runTest {
room.videoTrackPublishDefaults = room.videoTrackPublishDefaults.copy(
simulcastLayers = listOf(VideoPreset169.H1080, VideoPreset169.H90),
)
connect()
val wsFactory = component.websocketFactory()
wsFactory.ws.clearRequests()
room.localParticipant.publishVideoTrack(track = createLocalTrack(width = VideoPreset169.H540.capture.width, height = VideoPreset169.H540.capture.height))
testScheduler.advanceUntilIdle()
val sentRequests = wsFactory.ws.sentRequests
assertEquals(1, sentRequests.size)
assertTrue(
sentRequests.any { requestString ->
val sentRequest = LivekitRtc.SignalRequest.newBuilder()
.mergeFrom(requestString.toPBByteString())
.build()
if (sentRequest.hasAddTrack()) {
val addTrackRequest = sentRequest.addTrack
println(addTrackRequest)
if (addTrackRequest.type == LivekitModels.TrackType.VIDEO) {
val layerList = addTrackRequest.layersList
assertEquals(2, layerList.size)
assertTrue(layerList.none { layer -> layer.quality == LivekitModels.VideoQuality.HIGH })
assertTrue(
layerList.any { layer ->
layer.quality == LivekitModels.VideoQuality.MEDIUM &&
layer.bitrate == VideoPreset169.H540.encoding.maxBitrate &&
layer.height == VideoPreset169.H540.capture.height &&
layer.width == VideoPreset169.H540.capture.width
},
)
assertTrue(
layerList.any { layer ->
layer.quality == LivekitModels.VideoQuality.LOW &&
layer.bitrate == VideoPreset169.H90.encoding.maxBitrate &&
layer.height == VideoPreset169.H90.capture.height &&
layer.width == VideoPreset169.H90.capture.width
},
)
return@any true
}
}
return@any false
},
)
}
@Test
fun publishScreencastDefaultLayers() = runTest {
connect()
val wsFactory = component.websocketFactory()
wsFactory.ws.clearRequests()
room.localParticipant.publishVideoTrack(track = createLocalTrack(width = 1280, height = 720, isScreencast = true))
testScheduler.advanceUntilIdle()
val sentRequests = wsFactory.ws.sentRequests
assertEquals(1, sentRequests.size)
assertTrue(
sentRequests.any { requestString ->
val sentRequest = LivekitRtc.SignalRequest.newBuilder()
.mergeFrom(requestString.toPBByteString())
.build()
if (sentRequest.hasAddTrack()) {
val addTrackRequest = sentRequest.addTrack
println(addTrackRequest)
if (addTrackRequest.type == LivekitModels.TrackType.VIDEO) {
val layerList = addTrackRequest.layersList
assertEquals(2, layerList.size)
assertTrue(
layerList.any { layer ->
// original
layer.quality == LivekitModels.VideoQuality.MEDIUM &&
layer.bitrate == ScreenSharePresets.ORIGINAL.encoding.maxBitrate &&
layer.height == 720 &&
layer.width == 1280
},
)
assertTrue(
layerList.any { layer ->
// default simulcast layer
layer.quality == LivekitModels.VideoQuality.LOW &&
layer.bitrate == ScreenSharePresets.ORIGINAL.encoding.maxBitrate / 40 &&
layer.height == 720 / 2 &&
layer.width == 1280 / 2
},
)
return@any true
}
}
return@any false
},
)
}
@Test
fun publishSetCodecPreferencesH264() = runTest {
room.videoTrackPublishDefaults = room.videoTrackPublishDefaults.copy(videoCodec = "h264")
connect()
... ...