davidliu
Committed by GitHub

Ensure even dimensions for simulcast layers (#55)

* Ensure video dimensions are even

* test
@@ -13,6 +13,8 @@ import io.livekit.android.room.ConnectionState @@ -13,6 +13,8 @@ import io.livekit.android.room.ConnectionState
13 import io.livekit.android.room.DefaultsManager 13 import io.livekit.android.room.DefaultsManager
14 import io.livekit.android.room.RTCEngine 14 import io.livekit.android.room.RTCEngine
15 import io.livekit.android.room.track.* 15 import io.livekit.android.room.track.*
  16 +import io.livekit.android.room.util.EncodingUtils
  17 +import io.livekit.android.room.util.EncodingUtils.findEvenScaleDownBy
16 import io.livekit.android.util.LKLog 18 import io.livekit.android.util.LKLog
17 import kotlinx.coroutines.CoroutineDispatcher 19 import kotlinx.coroutines.CoroutineDispatcher
18 import kotlinx.coroutines.cancel 20 import kotlinx.coroutines.cancel
@@ -23,10 +25,7 @@ import org.webrtc.PeerConnectionFactory @@ -23,10 +25,7 @@ import org.webrtc.PeerConnectionFactory
23 import org.webrtc.RtpParameters 25 import org.webrtc.RtpParameters
24 import org.webrtc.RtpTransceiver 26 import org.webrtc.RtpTransceiver
25 import javax.inject.Named 27 import javax.inject.Named
26 -import kotlin.math.abs  
27 import kotlin.math.max 28 import kotlin.math.max
28 -import kotlin.math.min  
29 -import kotlin.math.roundToInt  
30 29
31 class LocalParticipant 30 class LocalParticipant
32 @AssistedInject 31 @AssistedInject
@@ -219,7 +218,8 @@ internal constructor( @@ -219,7 +218,8 @@ internal constructor(
219 ) { 218 ) {
220 219
221 val encodings = computeVideoEncodings(track.dimensions, options) 220 val encodings = computeVideoEncodings(track.dimensions, options)
222 - val videoLayers = videoLayersFromEncodings(track.dimensions.width, track.dimensions.height, encodings) 221 + val videoLayers =
  222 + EncodingUtils.videoLayersFromEncodings(track.dimensions.width, track.dimensions.height, encodings)
223 223
224 val published = publishTrackImpl( 224 val published = publishTrackImpl(
225 track, 225 track,
@@ -312,35 +312,24 @@ internal constructor( @@ -312,35 +312,24 @@ internal constructor(
312 } 312 }
313 313
314 if (encoding == null) { 314 if (encoding == null) {
315 - encoding = determineAppropriateEncoding(width, height) 315 + encoding = EncodingUtils.determineAppropriateEncoding(width, height)
316 LKLog.d { "using video encoding: $encoding" } 316 LKLog.d { "using video encoding: $encoding" }
317 } 317 }
318 318
319 val encodings = mutableListOf<RtpParameters.Encoding>() 319 val encodings = mutableListOf<RtpParameters.Encoding>()
320 if (simulcast) { 320 if (simulcast) {
321 321
322 - val presets = presetsForResolution(width, height) 322 + val presets = EncodingUtils.presetsForResolution(width, height)
323 val midPreset = presets[1] 323 val midPreset = presets[1]
324 val lowPreset = presets[0] 324 val lowPreset = presets[0]
325 325
326 - fun calculateScale(parameter: VideoCaptureParameter): Double {  
327 - val longestSize = max(width, height)  
328 - return longestSize / parameter.width.toDouble()  
329 - }  
330 -  
331 - fun checkEvenDimensions(parameter: VideoCaptureParameter): Boolean {  
332 - fun isEven(value: Double) = ((value.roundToInt()) % 2 == 0)  
333 - val scale = calculateScale(parameter)  
334 -  
335 - return isEven(parameter.height * scale) && isEven(parameter.width * scale)  
336 - }  
337 326
338 fun addEncoding(videoEncoding: VideoEncoding, scale: Double) { 327 fun addEncoding(videoEncoding: VideoEncoding, scale: Double) {
339 - if (encodings.size >= VIDEO_RIDS.size) { 328 + if (encodings.size >= EncodingUtils.VIDEO_RIDS.size) {
340 throw IllegalStateException("Attempting to add more encodings than we have rids for!") 329 throw IllegalStateException("Attempting to add more encodings than we have rids for!")
341 } 330 }
342 // encodings is mutable, so this will grab next available rid 331 // encodings is mutable, so this will grab next available rid
343 - val rid = VIDEO_RIDS[encodings.size] 332 + val rid = EncodingUtils.VIDEO_RIDS[encodings.size]
344 encodings.add(videoEncoding.toRtpEncoding(rid, scale)) 333 encodings.add(videoEncoding.toRtpEncoding(rid, scale))
345 } 334 }
346 335
@@ -348,16 +337,17 @@ internal constructor( @@ -348,16 +337,17 @@ internal constructor(
348 // otherwise only send h 337 // otherwise only send h
349 val size = max(width, height) 338 val size = max(width, height)
350 if (size >= 960) { 339 if (size >= 960) {
351 - val hasEvenDimensions =  
352 - checkEvenDimensions(midPreset.capture) && checkEvenDimensions(lowPreset.capture)  
353 - val midScale = if (hasEvenDimensions) calculateScale(midPreset.capture) else 2.0  
354 - val lowScale = if (hasEvenDimensions) calculateScale(lowPreset.capture) else 4.0 340 + var lowScale = findEvenScaleDownBy(width, height, lowPreset.capture.width, lowPreset.capture.height)
  341 + var midScale = findEvenScaleDownBy(width, height, midPreset.capture.width, midPreset.capture.height)
355 342
  343 + if (midScale == null || lowScale == null) {
  344 + lowScale = 4.0
  345 + midScale = 2.0
  346 + }
356 addEncoding(lowPreset.encoding, lowScale) 347 addEncoding(lowPreset.encoding, lowScale)
357 addEncoding(midPreset.encoding, midScale) 348 addEncoding(midPreset.encoding, midScale)
358 } else { 349 } else {
359 - val hasEvenDimensions = checkEvenDimensions(lowPreset.capture)  
360 - val lowScale = if (hasEvenDimensions) calculateScale(lowPreset.capture) else 2.0 350 + val lowScale = findEvenScaleDownBy(width, height, lowPreset.capture.width, lowPreset.capture.height) ?: 2.0
361 addEncoding(lowPreset.encoding, lowScale) 351 addEncoding(lowPreset.encoding, lowScale)
362 } 352 }
363 addEncoding(encoding, 1.0) 353 addEncoding(encoding, 1.0)
@@ -370,79 +360,6 @@ internal constructor( @@ -370,79 +360,6 @@ internal constructor(
370 return encodings 360 return encodings
371 } 361 }
372 362
373 - private fun determineAppropriateEncoding(width: Int, height: Int): VideoEncoding {  
374 - val presets = presetsForResolution(width, height)  
375 -  
376 - // presets assume width is longest size  
377 - val longestSize = max(width, height)  
378 - val preset = presets  
379 - .firstOrNull { it.capture.width >= longestSize }  
380 - ?: presets.last()  
381 -  
382 - return preset.encoding  
383 - }  
384 -  
385 - private fun presetsForResolution(width: Int, height: Int): List<VideoPreset> {  
386 - val longestSize = max(width, height)  
387 - val shortestSize = min(width, height)  
388 - val aspectRatio = longestSize.toFloat() / shortestSize  
389 - return if (abs(aspectRatio - 16f / 9f) < abs(aspectRatio - 4f / 3f)) {  
390 - PRESETS_16_9  
391 - } else {  
392 - PRESETS_4_3  
393 - }  
394 - }  
395 -  
396 - private fun videoLayersFromEncodings(  
397 - trackWidth: Int,  
398 - trackHeight: Int,  
399 - encodings: List<RtpParameters.Encoding>  
400 - ): List<LivekitModels.VideoLayer> {  
401 - return if (encodings.isEmpty()) {  
402 - listOf(  
403 - LivekitModels.VideoLayer.newBuilder().apply {  
404 - width = trackWidth  
405 - height = trackHeight  
406 - quality = LivekitModels.VideoQuality.HIGH  
407 - bitrate = 0  
408 - ssrc = 0  
409 - }.build()  
410 - )  
411 - } else {  
412 - encodings.map { encoding ->  
413 - val scaleDownBy = encoding.scaleResolutionDownBy ?: 1.0  
414 - var videoQuality = videoQualityForRid(encoding.rid ?: "")  
415 - if (videoQuality == LivekitModels.VideoQuality.UNRECOGNIZED && encodings.size == 1) {  
416 - videoQuality = LivekitModels.VideoQuality.HIGH  
417 - }  
418 - LivekitModels.VideoLayer.newBuilder().apply {  
419 - width = (trackWidth / scaleDownBy).roundToInt()  
420 - height = (trackHeight / scaleDownBy).roundToInt()  
421 - quality = videoQuality  
422 - bitrate = encoding.maxBitrateBps ?: 0  
423 - ssrc = 0  
424 - }.build()  
425 - }  
426 - }  
427 - }  
428 -  
429 - private fun videoQualityForRid(rid: String): LivekitModels.VideoQuality {  
430 - return when (rid) {  
431 - "f" -> LivekitModels.VideoQuality.HIGH  
432 - "h" -> LivekitModels.VideoQuality.MEDIUM  
433 - "q" -> LivekitModels.VideoQuality.LOW  
434 - else -> LivekitModels.VideoQuality.UNRECOGNIZED  
435 - }  
436 - }  
437 -  
438 - private fun ridForVideoQuality(quality: LivekitModels.VideoQuality): String? {  
439 - return when (quality) {  
440 - LivekitModels.VideoQuality.HIGH -> "f"  
441 - LivekitModels.VideoQuality.MEDIUM -> "h"  
442 - LivekitModels.VideoQuality.LOW -> "q"  
443 - else -> null  
444 - }  
445 - }  
446 363
447 /** 364 /**
448 * Control who can subscribe to LocalParticipant's published tracks. 365 * Control who can subscribe to LocalParticipant's published tracks.
@@ -566,7 +483,7 @@ internal constructor( @@ -566,7 +483,7 @@ internal constructor(
566 483
567 var hasChanged = false 484 var hasChanged = false
568 for (quality in qualities) { 485 for (quality in qualities) {
569 - val rid = ridForVideoQuality(quality.quality) ?: continue 486 + val rid = EncodingUtils.ridForVideoQuality(quality.quality) ?: continue
570 val encoding = encodings.firstOrNull { it.rid == rid } 487 val encoding = encodings.firstOrNull { it.rid == rid }
571 // use low quality layer settings for non-simulcasted streams 488 // use low quality layer settings for non-simulcasted streams
572 ?: encodings.takeIf { it.size == 1 && quality.quality == LivekitModels.VideoQuality.LOW }?.first() 489 ?: encodings.takeIf { it.size == 1 && quality.quality == LivekitModels.VideoQuality.LOW }?.first()
@@ -630,25 +547,6 @@ internal constructor( @@ -630,25 +547,6 @@ internal constructor(
630 } 547 }
631 548
632 companion object { 549 companion object {
633 - private val VIDEO_RIDS = arrayOf("q", "h", "f")  
634 -  
635 - // Note: maintain order from smallest to biggest.  
636 - private val PRESETS_16_9 = listOf(  
637 - VideoPreset169.QVGA,  
638 - VideoPreset169.VGA,  
639 - VideoPreset169.QHD,  
640 - VideoPreset169.HD,  
641 - VideoPreset169.FHD  
642 - )  
643 -  
644 - // Note: maintain order from smallest to biggest.  
645 - private val PRESETS_4_3 = listOf(  
646 - VideoPreset43.QVGA,  
647 - VideoPreset43.VGA,  
648 - VideoPreset43.QHD,  
649 - VideoPreset43.HD,  
650 - VideoPreset43.FHD  
651 - )  
652 } 550 }
653 } 551 }
654 552
  1 +package io.livekit.android.room.util
  2 +
  3 +import io.livekit.android.room.track.VideoEncoding
  4 +import io.livekit.android.room.track.VideoPreset
  5 +import io.livekit.android.room.track.VideoPreset169
  6 +import io.livekit.android.room.track.VideoPreset43
  7 +import livekit.LivekitModels
  8 +import org.webrtc.RtpParameters
  9 +import kotlin.math.abs
  10 +import kotlin.math.max
  11 +import kotlin.math.min
  12 +
  13 +internal object EncodingUtils {
  14 +
  15 + val VIDEO_RIDS = arrayOf("q", "h", "f")
  16 +
  17 + // Note: maintain order from smallest to biggest.
  18 + private val PRESETS_16_9 = listOf(
  19 + VideoPreset169.QVGA,
  20 + VideoPreset169.VGA,
  21 + VideoPreset169.QHD,
  22 + VideoPreset169.HD,
  23 + VideoPreset169.FHD
  24 + )
  25 +
  26 + // Note: maintain order from smallest to biggest.
  27 + private val PRESETS_4_3 = listOf(
  28 + VideoPreset43.QVGA,
  29 + VideoPreset43.VGA,
  30 + VideoPreset43.QHD,
  31 + VideoPreset43.HD,
  32 + VideoPreset43.FHD
  33 + )
  34 +
  35 +
  36 + /**
  37 + * Encoders will often not be able to handle odd dimensions, so we should try to find a scale that will
  38 + * result in even dimensions.
  39 + *
  40 + * @return a scale that will result in dimensions that are both even, or null if none found.
  41 + */
  42 + fun findEvenScaleDownBy(
  43 + sourceWidth: Int,
  44 + sourceHeight: Int,
  45 + targetWidth: Int,
  46 + targetHeight: Int,
  47 + ): Double? {
  48 + fun Int.isEven() = this % 2 == 0
  49 +
  50 + val sourceSize = min(sourceWidth, sourceHeight)
  51 + val targetSize = min(targetWidth, targetHeight)
  52 + for (i in 0..20) {
  53 + val scaleDownBy = sourceSize.toDouble() / (targetSize + i)
  54 + // Internally, WebRTC casts directly to int without rounding.
  55 + // https://github.com/webrtc-sdk/webrtc/blob/8c7139f8e6fa19ddf2c91510c177a19746e1ded3/media/engine/webrtc_video_engine.cc#L3676
  56 + val scaledWidth = (sourceWidth / scaleDownBy).toInt()
  57 + val scaledHeight = (sourceHeight / scaleDownBy).toInt()
  58 +
  59 + if (scaledHeight.isEven() && scaledWidth.isEven()) {
  60 + return scaleDownBy
  61 + }
  62 + }
  63 +
  64 + return null
  65 + }
  66 +
  67 +
  68 + fun determineAppropriateEncoding(width: Int, height: Int): VideoEncoding {
  69 + val presets = presetsForResolution(width, height)
  70 +
  71 + // presets assume width is longest size
  72 + val longestSize = max(width, height)
  73 + val preset = presets
  74 + .firstOrNull { it.capture.width >= longestSize }
  75 + ?: presets.last()
  76 +
  77 + return preset.encoding
  78 + }
  79 +
  80 + fun presetsForResolution(width: Int, height: Int): List<VideoPreset> {
  81 + val longestSize = max(width, height)
  82 + val shortestSize = min(width, height)
  83 + val aspectRatio = longestSize.toFloat() / shortestSize
  84 + return if (abs(aspectRatio - 16f / 9f) < abs(aspectRatio - 4f / 3f)) {
  85 + PRESETS_16_9
  86 + } else {
  87 + PRESETS_4_3
  88 + }
  89 + }
  90 +
  91 + fun videoLayersFromEncodings(
  92 + trackWidth: Int,
  93 + trackHeight: Int,
  94 + encodings: List<RtpParameters.Encoding>
  95 + ): List<LivekitModels.VideoLayer> {
  96 + return if (encodings.isEmpty()) {
  97 + listOf(
  98 + LivekitModels.VideoLayer.newBuilder().apply {
  99 + width = trackWidth
  100 + height = trackHeight
  101 + quality = LivekitModels.VideoQuality.HIGH
  102 + bitrate = 0
  103 + ssrc = 0
  104 + }.build()
  105 + )
  106 + } else {
  107 + encodings.map { encoding ->
  108 + val scaleDownBy = encoding.scaleResolutionDownBy ?: 1.0
  109 + var videoQuality = videoQualityForRid(encoding.rid ?: "")
  110 + if (videoQuality == LivekitModels.VideoQuality.UNRECOGNIZED && encodings.size == 1) {
  111 + videoQuality = LivekitModels.VideoQuality.HIGH
  112 + }
  113 + LivekitModels.VideoLayer.newBuilder().apply {
  114 + // Internally, WebRTC casts directly to int without rounding.
  115 + // https://github.com/webrtc-sdk/webrtc/blob/8c7139f8e6fa19ddf2c91510c177a19746e1ded3/media/engine/webrtc_video_engine.cc#L3676
  116 + width = (trackWidth / scaleDownBy).toInt()
  117 + height = (trackHeight / scaleDownBy).toInt()
  118 + quality = videoQuality
  119 + bitrate = encoding.maxBitrateBps ?: 0
  120 + ssrc = 0
  121 + }.build()
  122 + }
  123 + }
  124 + }
  125 +
  126 + fun videoQualityForRid(rid: String): LivekitModels.VideoQuality {
  127 + return when (rid) {
  128 + "f" -> LivekitModels.VideoQuality.HIGH
  129 + "h" -> LivekitModels.VideoQuality.MEDIUM
  130 + "q" -> LivekitModels.VideoQuality.LOW
  131 + else -> LivekitModels.VideoQuality.UNRECOGNIZED
  132 + }
  133 + }
  134 +
  135 + fun ridForVideoQuality(quality: LivekitModels.VideoQuality): String? {
  136 + return when (quality) {
  137 + LivekitModels.VideoQuality.HIGH -> "f"
  138 + LivekitModels.VideoQuality.MEDIUM -> "h"
  139 + LivekitModels.VideoQuality.LOW -> "q"
  140 + else -> null
  141 + }
  142 + }
  143 +}
  1 +package io.livekit.android.room.util
  2 +
  3 +import org.junit.Assert
  4 +import org.junit.Test
  5 +
  6 +class EncodingUtilsTest {
  7 + @Test
  8 + fun evenScale() {
  9 + fun Int.isEven() = this % 2 == 0
  10 +
  11 + val sourceWidth = 800
  12 + val sourceHeight = 600
  13 + val scaleDownBy = EncodingUtils.findEvenScaleDownBy(sourceWidth, sourceHeight, 240, 180)
  14 + ?: throw Exception("scale should not be null!")
  15 +
  16 + Assert.assertTrue((sourceWidth / scaleDownBy).toInt().isEven())
  17 + Assert.assertTrue((sourceHeight / scaleDownBy).toInt().isEven())
  18 + }
  19 +
  20 + @Test
  21 + fun evenScaleWeirdSource() {
  22 + fun Int.isEven() = this % 2 == 0
  23 +
  24 + val sourceWidth = 800
  25 + val sourceHeight = 602
  26 + val scaleDownBy = EncodingUtils.findEvenScaleDownBy(sourceWidth, sourceHeight, 240, 180)
  27 + ?: throw Exception("scale should not be null!")
  28 +
  29 + Assert.assertTrue((sourceWidth / scaleDownBy).toInt().isEven())
  30 + Assert.assertTrue((sourceHeight / scaleDownBy).toInt().isEven())
  31 + }
  32 +}