-
Notifications
You must be signed in to change notification settings - Fork 134
Improve H264 encoder #70
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
c005df9
8ce2df8
7892c03
062d40d
8ba74bf
726fb9e
ea7db41
54aabc3
808223b
b4ad069
67fb003
bfd99df
4d21a6f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -54,14 +54,42 @@ - (void)frameWasEncoded : (OSStatus)status flags : (VTEncodeInfoFlags)infoFlags | |
// The ratio between kVTCompressionPropertyKey_DataRateLimits and | ||
// kVTCompressionPropertyKey_AverageBitRate. The data rate limit is set higher | ||
// than the average bit rate to avoid undershooting the target. | ||
const float kLimitToAverageBitRateFactor = 1.5f; | ||
const float kLimitToAverageBitRateFactor = 10.0f; | ||
// These thresholds deviate from the default h264 QP thresholds, as they | ||
// have been found to work better on devices that support VideoToolbox | ||
const int kLowH264QpThreshold = 28; | ||
const int kHighH264QpThreshold = 39; | ||
const int kBitsPerByte = 8; | ||
|
||
const OSType kNV12PixelFormat = kCVPixelFormatType_420YpCbCr8BiPlanarFullRange; | ||
|
||
typedef NS_ENUM(NSInteger, RTCVideoEncodeMode) { | ||
Variable = 0, | ||
Constant = 1, | ||
}; | ||
|
||
NSArray *CreateRateLimitArray(uint32_t computedBitrateBps, RTCVideoEncodeMode mode) { | ||
switch (mode) { | ||
case Variable: { | ||
// 5 seconds should be an okay interval for VBR to enforce the long-term | ||
// limit. | ||
float avgInterval = 5.0; | ||
uint32_t avgBytesPerSecond = computedBitrateBps / kBitsPerByte * avgInterval; | ||
// And the peak bitrate is measured per-second in a way similar to CBR. | ||
float peakInterval = 1.0; | ||
uint32_t peakBytesPerSecond = | ||
computedBitrateBps * kLimitToAverageBitRateFactor / kBitsPerByte; | ||
return @[ @(peakBytesPerSecond), @(peakInterval), @(avgBytesPerSecond), @(avgInterval) ]; | ||
} | ||
case Constant: { | ||
// CBR should be enforces with granularity of a second. | ||
float targetInterval = 1.0; | ||
int32_t targetBitrate = computedBitrateBps / kBitsPerByte; | ||
return @[ @(targetBitrate), @(targetInterval) ]; | ||
} | ||
} | ||
} | ||
|
||
// Struct that we pass to the encoder per frame to encode. We receive it again | ||
// in the encoder callback. | ||
struct RTCFrameEncodeParams { | ||
|
@@ -177,9 +205,9 @@ CFStringRef ExtractProfile(const webrtc::H264ProfileLevelId &profile_level_id, b | |
switch (profile_level_id.profile) { | ||
case webrtc::H264Profile::kProfileConstrainedBaseline: | ||
case webrtc::H264Profile::kProfileBaseline: | ||
if(screenSharing) { | ||
return kVTProfileLevel_H264_Baseline_AutoLevel; | ||
} | ||
if (screenSharing) { | ||
return kVTProfileLevel_H264_Baseline_AutoLevel; | ||
} | ||
switch (profile_level_id.level) { | ||
case webrtc::H264Level::kLevel3: | ||
return kVTProfileLevel_H264_Baseline_3_0; | ||
|
@@ -315,8 +343,8 @@ NSUInteger GetMaxSampleRate(const webrtc::H264ProfileLevelId &profile_level_id) | |
|
||
@implementation RTC_OBJC_TYPE (RTCVideoEncoderH264) { | ||
RTC_OBJC_TYPE(RTCVideoCodecInfo) * _codecInfo; | ||
std::unique_ptr<webrtc::BitrateAdjuster> _bitrateAdjuster; | ||
uint32_t _targetBitrateBps; | ||
uint32_t _targetFrameRate; | ||
uint32_t _encoderBitrateBps; | ||
uint32_t _encoderFrameRate; | ||
uint32_t _maxAllowedFrameRate; | ||
|
@@ -327,10 +355,16 @@ @implementation RTC_OBJC_TYPE (RTCVideoEncoderH264) { | |
int32_t _height; | ||
VTCompressionSessionRef _compressionSession; | ||
CVPixelBufferPoolRef _pixelBufferPool; | ||
RTCVideoCodecMode _mode; | ||
RTCVideoCodecMode _codecMode; | ||
unsigned int _maxQP; | ||
unsigned int _minBitrate; | ||
unsigned int _maxBitrate; | ||
RTCVideoEncodeMode _encodeMode; | ||
|
||
webrtc::H264BitstreamParser _h264BitstreamParser; | ||
std::vector<uint8_t> _frameScaleBuffer; | ||
|
||
CMTime _previousPresentationTimeStamp; | ||
} | ||
|
||
// .5 is set as a mininum to prevent overcompensating for large temporary | ||
|
@@ -343,12 +377,14 @@ @implementation RTC_OBJC_TYPE (RTCVideoEncoderH264) { | |
- (instancetype)initWithCodecInfo:(RTC_OBJC_TYPE(RTCVideoCodecInfo) *)codecInfo { | ||
if (self = [super init]) { | ||
_codecInfo = codecInfo; | ||
_bitrateAdjuster.reset(new webrtc::BitrateAdjuster(.5, .95)); | ||
_packetizationMode = RTCH264PacketizationModeNonInterleaved; | ||
_profile_level_id = | ||
webrtc::ParseSdpForH264ProfileLevelId([codecInfo nativeSdpVideoFormat].parameters); | ||
_previousPresentationTimeStamp = kCMTimeZero; | ||
RTC_DCHECK(_profile_level_id); | ||
RTC_LOG(LS_INFO) << "Using profile " << CFStringToString(ExtractProfile(*_profile_level_id, _mode == RTCVideoCodecModeScreensharing)); | ||
RTC_LOG(LS_INFO) << "Using profile " | ||
<< CFStringToString(ExtractProfile( | ||
*_profile_level_id, _codecMode == RTCVideoCodecModeScreensharing)); | ||
RTC_CHECK([codecInfo.name isEqualToString:kRTCVideoCodecH264Name]); | ||
} | ||
return self; | ||
|
@@ -365,17 +401,28 @@ - (NSInteger)startEncodeWithSettings:(RTC_OBJC_TYPE(RTCVideoEncoderSettings) *)s | |
|
||
_width = settings.width; | ||
_height = settings.height; | ||
_mode = settings.mode; | ||
_codecMode = settings.mode; | ||
_maxQP = settings.qpMax; | ||
|
||
_encodeMode = Variable; // Always variable mode for now | ||
_minBitrate = settings.minBitrate * 1000; // minBitrate is in kbps. | ||
_maxBitrate = settings.maxBitrate * 1000; // maxBitrate is in kbps. | ||
|
||
uint32_t aligned_width = (((_width + 15) >> 4) << 4); | ||
uint32_t aligned_height = (((_height + 15) >> 4) << 4); | ||
_maxAllowedFrameRate = static_cast<uint32_t>(GetMaxSampleRate(*_profile_level_id) / | ||
(aligned_width * aligned_height)); | ||
|
||
// We can only set average bitrate on the HW encoder. | ||
_targetBitrateBps = settings.startBitrate * 1000; // startBitrate is in kbps. | ||
_bitrateAdjuster->SetTargetBitrateBps(_targetBitrateBps); | ||
_encoderFrameRate = MIN(settings.maxFramerate, _maxAllowedFrameRate); | ||
if (_encodeMode == Constant) { | ||
_targetBitrateBps = _maxBitrate; | ||
} else { | ||
_targetBitrateBps = settings.startBitrate * 1000; // startBitrate is in kbps. | ||
} | ||
|
||
_targetFrameRate = MIN(settings.maxFramerate, _maxAllowedFrameRate); | ||
_encoderBitrateBps = 0; | ||
_encoderFrameRate = 0; | ||
if (settings.maxFramerate > _maxAllowedFrameRate && _maxAllowedFrameRate > 0) { | ||
RTC_LOG(LS_WARNING) << "Initial encoder frame rate setting " << settings.maxFramerate | ||
<< " is larger than the " | ||
|
@@ -396,8 +443,15 @@ - (NSInteger)encode:(RTC_OBJC_TYPE(RTCVideoFrame) *)frame | |
if (!_callback || !_compressionSession) { | ||
return WEBRTC_VIDEO_CODEC_UNINITIALIZED; | ||
} | ||
BOOL isKeyframeRequired = NO; | ||
|
||
CMTime presentationTimeStamp = CMTimeMake(frame.timeStampNs / rtc::kNumNanosecsPerMillisec, 1000); | ||
if (CMTimeCompare(presentationTimeStamp, _previousPresentationTimeStamp) == 0) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. when does this happen? Is it an optimization for screenshare? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is extra safety to avoid this issue: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thanks! looks like this is part of reason why it's unable to keep up. |
||
// Same PTS | ||
return WEBRTC_VIDEO_CODEC_NO_OUTPUT; | ||
} | ||
_previousPresentationTimeStamp = presentationTimeStamp; | ||
|
||
BOOL isKeyframeRequired = NO; | ||
// Get a pixel buffer from the pool and copy frame data over. | ||
if ([self resetCompressionSessionIfNeededWithFrame:frame]) { | ||
isKeyframeRequired = YES; | ||
|
@@ -424,8 +478,8 @@ - (NSInteger)encode:(RTC_OBJC_TYPE(RTCVideoFrame) *)frame | |
int dstWidth = CVPixelBufferGetWidth(pixelBuffer); | ||
int dstHeight = CVPixelBufferGetHeight(pixelBuffer); | ||
if ([rtcPixelBuffer requiresScalingToWidth:dstWidth height:dstHeight]) { | ||
int size = | ||
[rtcPixelBuffer bufferSizeForCroppingAndScalingToWidth:dstWidth height:dstHeight]; | ||
int size = [rtcPixelBuffer bufferSizeForCroppingAndScalingToWidth:dstWidth | ||
height:dstHeight]; | ||
_frameScaleBuffer.resize(size); | ||
} else { | ||
_frameScaleBuffer.clear(); | ||
|
@@ -462,7 +516,6 @@ - (NSInteger)encode:(RTC_OBJC_TYPE(RTCVideoFrame) *)frame | |
} | ||
} | ||
|
||
CMTime presentationTimeStamp = CMTimeMake(frame.timeStampNs / rtc::kNumNanosecsPerMillisec, 1000); | ||
CFDictionaryRef frameProperties = nullptr; | ||
if (isKeyframeRequired) { | ||
CFTypeRef keys[] = {kVTEncodeFrameOptionKey_ForceKeyFrame}; | ||
|
@@ -480,8 +533,8 @@ - (NSInteger)encode:(RTC_OBJC_TYPE(RTCVideoFrame) *)frame | |
frame.rotation)); | ||
encodeParams->codecSpecificInfo.packetizationMode = _packetizationMode; | ||
|
||
// Update the bitrate if needed. | ||
[self setBitrateBps:_bitrateAdjuster->GetAdjustedBitrateBps() frameRate:_encoderFrameRate]; | ||
// Update encoder bitrate or frameRate if needed. | ||
[self updateEncoderBitrateAndFrameRate]; | ||
|
||
OSStatus status = VTCompressionSessionEncodeFrame(_compressionSession, | ||
pixelBuffer, | ||
|
@@ -522,14 +575,19 @@ - (void)setCallback:(RTCVideoEncoderCallback)callback { | |
} | ||
|
||
- (int)setBitrate:(uint32_t)bitrateKbit framerate:(uint32_t)framerate { | ||
_targetBitrateBps = 1000 * bitrateKbit; | ||
_bitrateAdjuster->SetTargetBitrateBps(_targetBitrateBps); | ||
// set target bitrate bps | ||
_targetBitrateBps = bitrateKbit * 1000; | ||
|
||
RTC_LOG(LS_INFO) << "setBitrateKBit: " << bitrateKbit << " targetBps: " << _targetBitrateBps | ||
<< " frameRate: " << framerate; | ||
|
||
if (framerate > _maxAllowedFrameRate && _maxAllowedFrameRate > 0) { | ||
RTC_LOG(LS_WARNING) << "Encoder frame rate setting " << framerate << " is larger than the " | ||
<< "maximal allowed frame rate " << _maxAllowedFrameRate << "."; | ||
} | ||
framerate = MIN(framerate, _maxAllowedFrameRate); | ||
[self setBitrateBps:_bitrateAdjuster->GetAdjustedBitrateBps() frameRate:framerate]; | ||
|
||
_targetFrameRate = MIN(framerate, _maxAllowedFrameRate); | ||
|
||
return WEBRTC_VIDEO_CODEC_OK; | ||
} | ||
|
||
|
@@ -621,14 +679,19 @@ - (int)resetCompressionSessionWithPixelFormat:(OSType)framePixelFormat { | |
(NSString *)kCVPixelBufferPixelFormatTypeKey : @(framePixelFormat), | ||
}; | ||
|
||
NSDictionary *encoder_specs; | ||
NSMutableDictionary *encoder_specs; | ||
#if defined(WEBRTC_MAC) && !defined(WEBRTC_IOS) | ||
// Currently hw accl is supported above 360p on mac, below 360p | ||
// the compression session will be created with hw accl disabled. | ||
encoder_specs = @{ | ||
encoder_specs = [@{ | ||
(NSString *)kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder : @(YES), | ||
}; | ||
|
||
} mutableCopy]; | ||
// Enable low-latency video encoding | ||
if (@available(iOS 14.5, macOS 11.3, *)) { | ||
[encoder_specs addEntriesFromDictionary:@{ | ||
(NSString *)kVTVideoEncoderSpecification_EnableLowLatencyRateControl : @(YES), | ||
}]; | ||
} | ||
#endif | ||
OSStatus status = VTCompressionSessionCreate( | ||
nullptr, // use default allocator | ||
|
@@ -669,11 +732,30 @@ - (int)resetCompressionSessionWithPixelFormat:(OSType)framePixelFormat { | |
- (void)configureCompressionSession { | ||
RTC_DCHECK(_compressionSession); | ||
SetVTSessionProperty(_compressionSession, kVTCompressionPropertyKey_RealTime, true); | ||
SetVTSessionProperty(_compressionSession, | ||
kVTCompressionPropertyKey_ProfileLevel, | ||
ExtractProfile(*_profile_level_id, _mode == RTCVideoCodecModeScreensharing)); | ||
// Sacrifice encoding speed over quality when necessary | ||
if (@available(iOS 14.0, macOS 11.0, *)) { | ||
SetVTSessionProperty( | ||
_compressionSession, kVTCompressionPropertyKey_PrioritizeEncodingSpeedOverQuality, true); | ||
} | ||
// Set maximum QP for screen sharing mode, range must be within 1 to 51 | ||
// https://developer.apple.com/documentation/videotoolbox/kvtcompressionpropertykey_maxallowedframeqp | ||
if (@available(iOS 15.0, macOS 12.0, *)) { | ||
// Only enable for screen sharing and let VideoToolbox do the optimizing as much as possible. | ||
if (_codecMode == RTCVideoCodecModeScreensharing) { | ||
RTC_LOG(LS_INFO) << "Configuring VideoToolbox to use maxQP: " << kHighH264QpThreshold | ||
<< " mode: " << _codecMode; | ||
SetVTSessionProperty( | ||
_compressionSession, kVTCompressionPropertyKey_MaxAllowedFrameQP, kHighH264QpThreshold); | ||
} | ||
} | ||
SetVTSessionProperty( | ||
_compressionSession, | ||
kVTCompressionPropertyKey_ProfileLevel, | ||
ExtractProfile(*_profile_level_id, _codecMode == RTCVideoCodecModeScreensharing)); | ||
SetVTSessionProperty(_compressionSession, kVTCompressionPropertyKey_AllowFrameReordering, false); | ||
[self setEncoderBitrateBps:_targetBitrateBps frameRate:_encoderFrameRate]; | ||
|
||
// [self updateEncoderBitrateAndFrameRate]; | ||
|
||
// TODO(tkchin): Look at entropy mode and colorspace matrices. | ||
// TODO(tkchin): Investigate to see if there's any way to make this work. | ||
// May need it to interop with Android. Currently this call just fails. | ||
|
@@ -701,49 +783,59 @@ - (NSString *)implementationName { | |
return @"VideoToolbox"; | ||
} | ||
|
||
- (void)setBitrateBps:(uint32_t)bitrateBps frameRate:(uint32_t)frameRate { | ||
if (_encoderBitrateBps != bitrateBps || _encoderFrameRate != frameRate) { | ||
[self setEncoderBitrateBps:bitrateBps frameRate:frameRate]; | ||
- (void)updateEncoderBitrateAndFrameRate { | ||
// If no compression session simply return | ||
if (!_compressionSession) { | ||
return; | ||
} | ||
} | ||
// Initial status | ||
OSStatus status = noErr; | ||
|
||
- (void)setEncoderBitrateBps:(uint32_t)bitrateBps frameRate:(uint32_t)frameRate { | ||
if (_compressionSession) { | ||
SetVTSessionProperty(_compressionSession, kVTCompressionPropertyKey_AverageBitRate, bitrateBps); | ||
uint32_t computedBitrateBps = _targetBitrateBps; | ||
|
||
// With zero `_maxAllowedFrameRate`, we fall back to automatic frame rate detection. | ||
if (_maxAllowedFrameRate > 0) { | ||
SetVTSessionProperty( | ||
_compressionSession, kVTCompressionPropertyKey_ExpectedFrameRate, frameRate); | ||
} | ||
// With zero `_maxAllowedFrameRate`, we fall back to automatic frame rate detection. | ||
uint32_t computedFrameRate = _maxAllowedFrameRate > 0 ? _targetFrameRate : 0; | ||
|
||
// TODO(tkchin): Add a helper method to set array value. | ||
int64_t dataLimitBytesPerSecondValue = | ||
static_cast<int64_t>(bitrateBps * kLimitToAverageBitRateFactor / 8); | ||
CFNumberRef bytesPerSecond = | ||
CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt64Type, &dataLimitBytesPerSecondValue); | ||
int64_t oneSecondValue = 1; | ||
CFNumberRef oneSecond = | ||
CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt64Type, &oneSecondValue); | ||
const void *nums[2] = {bytesPerSecond, oneSecond}; | ||
CFArrayRef dataRateLimits = CFArrayCreate(nullptr, nums, 2, &kCFTypeArrayCallBacks); | ||
OSStatus status = VTSessionSetProperty( | ||
_compressionSession, kVTCompressionPropertyKey_DataRateLimits, dataRateLimits); | ||
if (bytesPerSecond) { | ||
CFRelease(bytesPerSecond); | ||
} | ||
if (oneSecond) { | ||
CFRelease(oneSecond); | ||
// Set frame rate | ||
if (computedFrameRate != _encoderFrameRate) { | ||
status = VTSessionSetProperty(_compressionSession, | ||
kVTCompressionPropertyKey_ExpectedFrameRate, | ||
(__bridge CFTypeRef) @(computedFrameRate)); | ||
// Ensure the bitrate was set successfully | ||
if (status != noErr) { | ||
RTC_LOG(LS_ERROR) << "Failed to set frame rate: " << computedFrameRate | ||
<< " error: " << status; | ||
} else { | ||
RTC_LOG(LS_INFO) << "Did update encoder frame rate: " << computedFrameRate; | ||
} | ||
if (dataRateLimits) { | ||
CFRelease(dataRateLimits); | ||
_encoderFrameRate = computedFrameRate; | ||
} | ||
|
||
// Set bitrate | ||
if (computedBitrateBps != _encoderBitrateBps) { | ||
status = VTSessionSetProperty(_compressionSession, | ||
kVTCompressionPropertyKey_AverageBitRate, | ||
(__bridge CFTypeRef) @(computedBitrateBps)); | ||
|
||
// Ensure the bitrate was set successfully | ||
if (status != noErr) { | ||
RTC_LOG(LS_ERROR) << "Failed to update encoder bitrate: " << computedBitrateBps | ||
<< "error: " << status; | ||
} else { | ||
RTC_LOG(LS_INFO) << "Did update encoder bitrate: " << computedBitrateBps; | ||
} | ||
|
||
status = VTSessionSetProperty( | ||
_compressionSession, | ||
kVTCompressionPropertyKey_DataRateLimits, | ||
(__bridge CFArrayRef)CreateRateLimitArray(computedBitrateBps, _encodeMode)); | ||
if (status != noErr) { | ||
RTC_LOG(LS_ERROR) << "Failed to set data rate limit with code: " << status; | ||
RTC_LOG(LS_ERROR) << "Failed to update encoder data rate limits"; | ||
} else { | ||
RTC_LOG(LS_INFO) << "Did update encoder data rate limits"; | ||
} | ||
|
||
_encoderBitrateBps = bitrateBps; | ||
_encoderFrameRate = frameRate; | ||
_encoderBitrateBps = computedBitrateBps; | ||
} | ||
} | ||
|
||
|
@@ -799,8 +891,9 @@ - (void)frameWasEncoded:(OSStatus)status | |
frame.captureTimeMs = renderTimeMs; | ||
frame.timeStamp = timestamp; | ||
frame.rotation = rotation; | ||
frame.contentType = (_mode == RTCVideoCodecModeScreensharing) ? RTCVideoContentTypeScreenshare : | ||
RTCVideoContentTypeUnspecified; | ||
frame.contentType = (_codecMode == RTCVideoCodecModeScreensharing) ? | ||
RTCVideoContentTypeScreenshare : | ||
RTCVideoContentTypeUnspecified; | ||
frame.flags = webrtc::VideoSendTiming::kInvalid; | ||
|
||
_h264BitstreamParser.ParseBitstream(*buffer); | ||
|
@@ -811,7 +904,6 @@ - (void)frameWasEncoded:(OSStatus)status | |
RTC_LOG(LS_ERROR) << "Encode callback failed"; | ||
return; | ||
} | ||
_bitrateAdjuster->Update(frame.buffer.length); | ||
} | ||
|
||
- (nullable RTC_OBJC_TYPE(RTCVideoEncoderQpThresholds) *)scalingSettings { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
any idea why this value had to be set so high compared to what it was before? it seems that 10x average bitrate would seem quite high, no?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just did some tests for camera capture, and the bit rate will be close to the target value (but not exceeding the limit). the moving picture is smooth and clear, and the bit rate of the static picture can be reduced to 1/5 of the target bit rate
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The game screen may be due to complex textures, at least 4~6mbps needs to be set to achieve smooth and clear 720p/30fps, and the camera only needs 2mbps, so this PR is good for me
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@davidzhao This is for computing data rate limits and not the average bitrate.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
got it, I don't really understand this so feel free to ignore :)