Skip to content
226 changes: 159 additions & 67 deletions sdk/objc/components/video_codec/RTCVideoEncoderH264.mm
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,42 @@ - (void)frameWasEncoded : (OSStatus)status flags : (VTEncodeInfoFlags)infoFlags
// The ratio between kVTCompressionPropertyKey_DataRateLimits and
// kVTCompressionPropertyKey_AverageBitRate. The data rate limit is set higher
// than the average bit rate to avoid undershooting the target.
const float kLimitToAverageBitRateFactor = 1.5f;
const float kLimitToAverageBitRateFactor = 10.0f;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

any idea why this value had to be set so high compared to what it was before? it seems that 10x average bitrate would seem quite high, no?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just did some tests for camera capture, and the bit rate will be close to the target value (but not exceeding the limit). the moving picture is smooth and clear, and the bit rate of the static picture can be reduced to 1/5 of the target bit rate

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The game screen may be due to complex textures, at least 4~6mbps needs to be set to achieve smooth and clear 720p/30fps, and the camera only needs 2mbps, so this PR is good for me

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@davidzhao This is for computing data rate limits and not the average bitrate.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

got it, I don't really understand this so feel free to ignore :)

// These thresholds deviate from the default h264 QP thresholds, as they
// have been found to work better on devices that support VideoToolbox
const int kLowH264QpThreshold = 28;
const int kHighH264QpThreshold = 39;
const int kBitsPerByte = 8;

const OSType kNV12PixelFormat = kCVPixelFormatType_420YpCbCr8BiPlanarFullRange;

typedef NS_ENUM(NSInteger, RTCVideoEncodeMode) {
Variable = 0,
Constant = 1,
};

NSArray *CreateRateLimitArray(uint32_t computedBitrateBps, RTCVideoEncodeMode mode) {
switch (mode) {
case Variable: {
// 5 seconds should be an okay interval for VBR to enforce the long-term
// limit.
float avgInterval = 5.0;
uint32_t avgBytesPerSecond = computedBitrateBps / kBitsPerByte * avgInterval;
// And the peak bitrate is measured per-second in a way similar to CBR.
float peakInterval = 1.0;
uint32_t peakBytesPerSecond =
computedBitrateBps * kLimitToAverageBitRateFactor / kBitsPerByte;
return @[ @(peakBytesPerSecond), @(peakInterval), @(avgBytesPerSecond), @(avgInterval) ];
}
case Constant: {
// CBR should be enforces with granularity of a second.
float targetInterval = 1.0;
int32_t targetBitrate = computedBitrateBps / kBitsPerByte;
return @[ @(targetBitrate), @(targetInterval) ];
}
}
}

// Struct that we pass to the encoder per frame to encode. We receive it again
// in the encoder callback.
struct RTCFrameEncodeParams {
Expand Down Expand Up @@ -177,9 +205,9 @@ CFStringRef ExtractProfile(const webrtc::H264ProfileLevelId &profile_level_id, b
switch (profile_level_id.profile) {
case webrtc::H264Profile::kProfileConstrainedBaseline:
case webrtc::H264Profile::kProfileBaseline:
if(screenSharing) {
return kVTProfileLevel_H264_Baseline_AutoLevel;
}
if (screenSharing) {
return kVTProfileLevel_H264_Baseline_AutoLevel;
}
switch (profile_level_id.level) {
case webrtc::H264Level::kLevel3:
return kVTProfileLevel_H264_Baseline_3_0;
Expand Down Expand Up @@ -315,8 +343,8 @@ NSUInteger GetMaxSampleRate(const webrtc::H264ProfileLevelId &profile_level_id)

@implementation RTC_OBJC_TYPE (RTCVideoEncoderH264) {
RTC_OBJC_TYPE(RTCVideoCodecInfo) * _codecInfo;
std::unique_ptr<webrtc::BitrateAdjuster> _bitrateAdjuster;
uint32_t _targetBitrateBps;
uint32_t _targetFrameRate;
uint32_t _encoderBitrateBps;
uint32_t _encoderFrameRate;
uint32_t _maxAllowedFrameRate;
Expand All @@ -327,10 +355,16 @@ @implementation RTC_OBJC_TYPE (RTCVideoEncoderH264) {
int32_t _height;
VTCompressionSessionRef _compressionSession;
CVPixelBufferPoolRef _pixelBufferPool;
RTCVideoCodecMode _mode;
RTCVideoCodecMode _codecMode;
unsigned int _maxQP;
unsigned int _minBitrate;
unsigned int _maxBitrate;
RTCVideoEncodeMode _encodeMode;

webrtc::H264BitstreamParser _h264BitstreamParser;
std::vector<uint8_t> _frameScaleBuffer;

CMTime _previousPresentationTimeStamp;
}

// .5 is set as a mininum to prevent overcompensating for large temporary
Expand All @@ -343,12 +377,14 @@ @implementation RTC_OBJC_TYPE (RTCVideoEncoderH264) {
- (instancetype)initWithCodecInfo:(RTC_OBJC_TYPE(RTCVideoCodecInfo) *)codecInfo {
if (self = [super init]) {
_codecInfo = codecInfo;
_bitrateAdjuster.reset(new webrtc::BitrateAdjuster(.5, .95));
_packetizationMode = RTCH264PacketizationModeNonInterleaved;
_profile_level_id =
webrtc::ParseSdpForH264ProfileLevelId([codecInfo nativeSdpVideoFormat].parameters);
_previousPresentationTimeStamp = kCMTimeZero;
RTC_DCHECK(_profile_level_id);
RTC_LOG(LS_INFO) << "Using profile " << CFStringToString(ExtractProfile(*_profile_level_id, _mode == RTCVideoCodecModeScreensharing));
RTC_LOG(LS_INFO) << "Using profile "
<< CFStringToString(ExtractProfile(
*_profile_level_id, _codecMode == RTCVideoCodecModeScreensharing));
RTC_CHECK([codecInfo.name isEqualToString:kRTCVideoCodecH264Name]);
}
return self;
Expand All @@ -365,17 +401,28 @@ - (NSInteger)startEncodeWithSettings:(RTC_OBJC_TYPE(RTCVideoEncoderSettings) *)s

_width = settings.width;
_height = settings.height;
_mode = settings.mode;
_codecMode = settings.mode;
_maxQP = settings.qpMax;

_encodeMode = Variable; // Always variable mode for now
_minBitrate = settings.minBitrate * 1000; // minBitrate is in kbps.
_maxBitrate = settings.maxBitrate * 1000; // maxBitrate is in kbps.

uint32_t aligned_width = (((_width + 15) >> 4) << 4);
uint32_t aligned_height = (((_height + 15) >> 4) << 4);
_maxAllowedFrameRate = static_cast<uint32_t>(GetMaxSampleRate(*_profile_level_id) /
(aligned_width * aligned_height));

// We can only set average bitrate on the HW encoder.
_targetBitrateBps = settings.startBitrate * 1000; // startBitrate is in kbps.
_bitrateAdjuster->SetTargetBitrateBps(_targetBitrateBps);
_encoderFrameRate = MIN(settings.maxFramerate, _maxAllowedFrameRate);
if (_encodeMode == Constant) {
_targetBitrateBps = _maxBitrate;
} else {
_targetBitrateBps = settings.startBitrate * 1000; // startBitrate is in kbps.
}

_targetFrameRate = MIN(settings.maxFramerate, _maxAllowedFrameRate);
_encoderBitrateBps = 0;
_encoderFrameRate = 0;
if (settings.maxFramerate > _maxAllowedFrameRate && _maxAllowedFrameRate > 0) {
RTC_LOG(LS_WARNING) << "Initial encoder frame rate setting " << settings.maxFramerate
<< " is larger than the "
Expand All @@ -396,8 +443,15 @@ - (NSInteger)encode:(RTC_OBJC_TYPE(RTCVideoFrame) *)frame
if (!_callback || !_compressionSession) {
return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
}
BOOL isKeyframeRequired = NO;

CMTime presentationTimeStamp = CMTimeMake(frame.timeStampNs / rtc::kNumNanosecsPerMillisec, 1000);
if (CMTimeCompare(presentationTimeStamp, _previousPresentationTimeStamp) == 0) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

when does this happen? Is it an optimization for screenshare?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is extra safety to avoid this issue:
https://developer.apple.com/forums/thread/702891

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks! looks like this is part of reason why it's unable to keep up.

// Same PTS
return WEBRTC_VIDEO_CODEC_NO_OUTPUT;
}
_previousPresentationTimeStamp = presentationTimeStamp;

BOOL isKeyframeRequired = NO;
// Get a pixel buffer from the pool and copy frame data over.
if ([self resetCompressionSessionIfNeededWithFrame:frame]) {
isKeyframeRequired = YES;
Expand All @@ -424,8 +478,8 @@ - (NSInteger)encode:(RTC_OBJC_TYPE(RTCVideoFrame) *)frame
int dstWidth = CVPixelBufferGetWidth(pixelBuffer);
int dstHeight = CVPixelBufferGetHeight(pixelBuffer);
if ([rtcPixelBuffer requiresScalingToWidth:dstWidth height:dstHeight]) {
int size =
[rtcPixelBuffer bufferSizeForCroppingAndScalingToWidth:dstWidth height:dstHeight];
int size = [rtcPixelBuffer bufferSizeForCroppingAndScalingToWidth:dstWidth
height:dstHeight];
_frameScaleBuffer.resize(size);
} else {
_frameScaleBuffer.clear();
Expand Down Expand Up @@ -462,7 +516,6 @@ - (NSInteger)encode:(RTC_OBJC_TYPE(RTCVideoFrame) *)frame
}
}

CMTime presentationTimeStamp = CMTimeMake(frame.timeStampNs / rtc::kNumNanosecsPerMillisec, 1000);
CFDictionaryRef frameProperties = nullptr;
if (isKeyframeRequired) {
CFTypeRef keys[] = {kVTEncodeFrameOptionKey_ForceKeyFrame};
Expand All @@ -480,8 +533,8 @@ - (NSInteger)encode:(RTC_OBJC_TYPE(RTCVideoFrame) *)frame
frame.rotation));
encodeParams->codecSpecificInfo.packetizationMode = _packetizationMode;

// Update the bitrate if needed.
[self setBitrateBps:_bitrateAdjuster->GetAdjustedBitrateBps() frameRate:_encoderFrameRate];
// Update encoder bitrate or frameRate if needed.
[self updateEncoderBitrateAndFrameRate];

OSStatus status = VTCompressionSessionEncodeFrame(_compressionSession,
pixelBuffer,
Expand Down Expand Up @@ -522,14 +575,19 @@ - (void)setCallback:(RTCVideoEncoderCallback)callback {
}

- (int)setBitrate:(uint32_t)bitrateKbit framerate:(uint32_t)framerate {
_targetBitrateBps = 1000 * bitrateKbit;
_bitrateAdjuster->SetTargetBitrateBps(_targetBitrateBps);
// set target bitrate bps
_targetBitrateBps = bitrateKbit * 1000;

RTC_LOG(LS_INFO) << "setBitrateKBit: " << bitrateKbit << " targetBps: " << _targetBitrateBps
<< " frameRate: " << framerate;

if (framerate > _maxAllowedFrameRate && _maxAllowedFrameRate > 0) {
RTC_LOG(LS_WARNING) << "Encoder frame rate setting " << framerate << " is larger than the "
<< "maximal allowed frame rate " << _maxAllowedFrameRate << ".";
}
framerate = MIN(framerate, _maxAllowedFrameRate);
[self setBitrateBps:_bitrateAdjuster->GetAdjustedBitrateBps() frameRate:framerate];

_targetFrameRate = MIN(framerate, _maxAllowedFrameRate);

return WEBRTC_VIDEO_CODEC_OK;
}

Expand Down Expand Up @@ -621,14 +679,19 @@ - (int)resetCompressionSessionWithPixelFormat:(OSType)framePixelFormat {
(NSString *)kCVPixelBufferPixelFormatTypeKey : @(framePixelFormat),
};

NSDictionary *encoder_specs;
NSMutableDictionary *encoder_specs;
#if defined(WEBRTC_MAC) && !defined(WEBRTC_IOS)
// Currently hw accl is supported above 360p on mac, below 360p
// the compression session will be created with hw accl disabled.
encoder_specs = @{
encoder_specs = [@{
(NSString *)kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder : @(YES),
};

} mutableCopy];
// Enable low-latency video encoding
if (@available(iOS 14.5, macOS 11.3, *)) {
[encoder_specs addEntriesFromDictionary:@{
(NSString *)kVTVideoEncoderSpecification_EnableLowLatencyRateControl : @(YES),
}];
}
#endif
OSStatus status = VTCompressionSessionCreate(
nullptr, // use default allocator
Expand Down Expand Up @@ -669,11 +732,30 @@ - (int)resetCompressionSessionWithPixelFormat:(OSType)framePixelFormat {
- (void)configureCompressionSession {
RTC_DCHECK(_compressionSession);
SetVTSessionProperty(_compressionSession, kVTCompressionPropertyKey_RealTime, true);
SetVTSessionProperty(_compressionSession,
kVTCompressionPropertyKey_ProfileLevel,
ExtractProfile(*_profile_level_id, _mode == RTCVideoCodecModeScreensharing));
// Sacrifice encoding speed over quality when necessary
if (@available(iOS 14.0, macOS 11.0, *)) {
SetVTSessionProperty(
_compressionSession, kVTCompressionPropertyKey_PrioritizeEncodingSpeedOverQuality, true);
}
// Set maximum QP for screen sharing mode, range must be within 1 to 51
// https://developer.apple.com/documentation/videotoolbox/kvtcompressionpropertykey_maxallowedframeqp
if (@available(iOS 15.0, macOS 12.0, *)) {
// Only enable for screen sharing and let VideoToolbox do the optimizing as much as possible.
if (_codecMode == RTCVideoCodecModeScreensharing) {
RTC_LOG(LS_INFO) << "Configuring VideoToolbox to use maxQP: " << kHighH264QpThreshold
<< " mode: " << _codecMode;
SetVTSessionProperty(
_compressionSession, kVTCompressionPropertyKey_MaxAllowedFrameQP, kHighH264QpThreshold);
}
}
SetVTSessionProperty(
_compressionSession,
kVTCompressionPropertyKey_ProfileLevel,
ExtractProfile(*_profile_level_id, _codecMode == RTCVideoCodecModeScreensharing));
SetVTSessionProperty(_compressionSession, kVTCompressionPropertyKey_AllowFrameReordering, false);
[self setEncoderBitrateBps:_targetBitrateBps frameRate:_encoderFrameRate];

// [self updateEncoderBitrateAndFrameRate];

// TODO(tkchin): Look at entropy mode and colorspace matrices.
// TODO(tkchin): Investigate to see if there's any way to make this work.
// May need it to interop with Android. Currently this call just fails.
Expand Down Expand Up @@ -701,49 +783,59 @@ - (NSString *)implementationName {
return @"VideoToolbox";
}

- (void)setBitrateBps:(uint32_t)bitrateBps frameRate:(uint32_t)frameRate {
if (_encoderBitrateBps != bitrateBps || _encoderFrameRate != frameRate) {
[self setEncoderBitrateBps:bitrateBps frameRate:frameRate];
- (void)updateEncoderBitrateAndFrameRate {
// If no compression session simply return
if (!_compressionSession) {
return;
}
}
// Initial status
OSStatus status = noErr;

- (void)setEncoderBitrateBps:(uint32_t)bitrateBps frameRate:(uint32_t)frameRate {
if (_compressionSession) {
SetVTSessionProperty(_compressionSession, kVTCompressionPropertyKey_AverageBitRate, bitrateBps);
uint32_t computedBitrateBps = _targetBitrateBps;

// With zero `_maxAllowedFrameRate`, we fall back to automatic frame rate detection.
if (_maxAllowedFrameRate > 0) {
SetVTSessionProperty(
_compressionSession, kVTCompressionPropertyKey_ExpectedFrameRate, frameRate);
}
// With zero `_maxAllowedFrameRate`, we fall back to automatic frame rate detection.
uint32_t computedFrameRate = _maxAllowedFrameRate > 0 ? _targetFrameRate : 0;

// TODO(tkchin): Add a helper method to set array value.
int64_t dataLimitBytesPerSecondValue =
static_cast<int64_t>(bitrateBps * kLimitToAverageBitRateFactor / 8);
CFNumberRef bytesPerSecond =
CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt64Type, &dataLimitBytesPerSecondValue);
int64_t oneSecondValue = 1;
CFNumberRef oneSecond =
CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt64Type, &oneSecondValue);
const void *nums[2] = {bytesPerSecond, oneSecond};
CFArrayRef dataRateLimits = CFArrayCreate(nullptr, nums, 2, &kCFTypeArrayCallBacks);
OSStatus status = VTSessionSetProperty(
_compressionSession, kVTCompressionPropertyKey_DataRateLimits, dataRateLimits);
if (bytesPerSecond) {
CFRelease(bytesPerSecond);
}
if (oneSecond) {
CFRelease(oneSecond);
// Set frame rate
if (computedFrameRate != _encoderFrameRate) {
status = VTSessionSetProperty(_compressionSession,
kVTCompressionPropertyKey_ExpectedFrameRate,
(__bridge CFTypeRef) @(computedFrameRate));
// Ensure the bitrate was set successfully
if (status != noErr) {
RTC_LOG(LS_ERROR) << "Failed to set frame rate: " << computedFrameRate
<< " error: " << status;
} else {
RTC_LOG(LS_INFO) << "Did update encoder frame rate: " << computedFrameRate;
}
if (dataRateLimits) {
CFRelease(dataRateLimits);
_encoderFrameRate = computedFrameRate;
}

// Set bitrate
if (computedBitrateBps != _encoderBitrateBps) {
status = VTSessionSetProperty(_compressionSession,
kVTCompressionPropertyKey_AverageBitRate,
(__bridge CFTypeRef) @(computedBitrateBps));

// Ensure the bitrate was set successfully
if (status != noErr) {
RTC_LOG(LS_ERROR) << "Failed to update encoder bitrate: " << computedBitrateBps
<< "error: " << status;
} else {
RTC_LOG(LS_INFO) << "Did update encoder bitrate: " << computedBitrateBps;
}

status = VTSessionSetProperty(
_compressionSession,
kVTCompressionPropertyKey_DataRateLimits,
(__bridge CFArrayRef)CreateRateLimitArray(computedBitrateBps, _encodeMode));
if (status != noErr) {
RTC_LOG(LS_ERROR) << "Failed to set data rate limit with code: " << status;
RTC_LOG(LS_ERROR) << "Failed to update encoder data rate limits";
} else {
RTC_LOG(LS_INFO) << "Did update encoder data rate limits";
}

_encoderBitrateBps = bitrateBps;
_encoderFrameRate = frameRate;
_encoderBitrateBps = computedBitrateBps;
}
}

Expand Down Expand Up @@ -799,8 +891,9 @@ - (void)frameWasEncoded:(OSStatus)status
frame.captureTimeMs = renderTimeMs;
frame.timeStamp = timestamp;
frame.rotation = rotation;
frame.contentType = (_mode == RTCVideoCodecModeScreensharing) ? RTCVideoContentTypeScreenshare :
RTCVideoContentTypeUnspecified;
frame.contentType = (_codecMode == RTCVideoCodecModeScreensharing) ?
RTCVideoContentTypeScreenshare :
RTCVideoContentTypeUnspecified;
frame.flags = webrtc::VideoSendTiming::kInvalid;

_h264BitstreamParser.ParseBitstream(*buffer);
Expand All @@ -811,7 +904,6 @@ - (void)frameWasEncoded:(OSStatus)status
RTC_LOG(LS_ERROR) << "Encode callback failed";
return;
}
_bitrateAdjuster->Update(frame.buffer.length);
}

- (nullable RTC_OBJC_TYPE(RTCVideoEncoderQpThresholds) *)scalingSettings {
Expand Down
2 changes: 1 addition & 1 deletion sdk/objc/native/src/objc_video_track_source.mm
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ - (void)capturer:(RTC_OBJC_TYPE(RTCVideoCapturer) *)capturer
ObjCVideoTrackSource::ObjCVideoTrackSource() : ObjCVideoTrackSource(false) {}

ObjCVideoTrackSource::ObjCVideoTrackSource(bool is_screencast)
: AdaptedVideoTrackSource(/* required resolution alignment */ is_screencast? 16 : 2),
: AdaptedVideoTrackSource(/* required resolution alignment */ 2),
is_screencast_(is_screencast) {}

ObjCVideoTrackSource::ObjCVideoTrackSource(RTCObjCVideoSourceAdapter *adapter) : adapter_(adapter) {
Expand Down