In today's virtual conferencing landscape, the ability to seamlessly swap out your surroundings with personalized images or videos is not just a feature, but a necessity for fostering a professional and engaging remote presence.
👉 By end of this wiki, you can expect the virtual background feature to look like this:
Warning
In this document, I use VisionKit to separate background, but VNGeneratePersonInstanceMaskRequest only support iOS 17+
- VideoFrame: It contains the buffer of the frame captured by the camera device in I420 format.
- VideoSink: It is used to send the frame back to WebRTC native source.
- VideoSource: It reads the camera device, produces VideoFrames, and delivers them to VideoSinks.
- VideoProcessor: It is an interface provided by WebRTC to update videoFrames produced by videoSource .
- MediaStream: It is an API related to WebRTC which provides support for streaming audio and video data. It consists of zero or more MediaStreamTrack objects, representing various audio or video tracks
import Foundation
import WebRTC
@objc public class RTCVideoPipe: NSObject, RTCVideoCapturerDelegate {
var virtualBackground: RTCVirtualBackground?
var videoSource: RTCVideoSource?
var latestTimestampNs: Int64 = 0
var frameCount: Int = 0
var lastProcessedTimestamp: Int64 = 0
var fpsInterval: Int64 = 1000000000 / 15 // 15 fps to ensure VNGen can proccess
var backgroundImage: UIImage?
@objc public init(videoSource: RTCVideoSource) {
self.videoSource = videoSource
self.virtualBackground = RTCVirtualBackground()
super.init()
}
@objc public func setBackgroundImage(image: UIImage?) {
backgroundImage = image
}
@objc public func capturer(_ capturer: RTCVideoCapturer, didCapture frame: RTCVideoFrame) {
let currentTimestamp = frame.timeStampNs
// Calculate the time since the last processed frame
let elapsedTimeSinceLastProcessedFrame = currentTimestamp - lastProcessedTimestamp
if elapsedTimeSinceLastProcessedFrame < fpsInterval {
// Skip processing the frame if it's too soon
return
}
if backgroundImage == nil {
self.videoSource?.emitFrame(frame)
return
}
virtualBackground?.processForegroundMask(from: frame, backgroundImage: backgroundImage!) { processedFrame, error in
if let error = error {
// Handle error
print("Error processing foreground mask: \(error.localizedDescription)")
} else if let processedFrame = processedFrame {
self.lastProcessedTimestamp = currentTimestamp
if processedFrame.timeStampNs <= self.latestTimestampNs {
// Skip emitting frame if its timestamp is not newer than the latest one
return
}
self.latestTimestampNs = processedFrame.timeStampNs
self.videoSource?.emitFrame(processedFrame)
}
}
}
}videoPipe = [[RTCVideoPipe alloc] initWithVideoSource: videoSource];
[videoSource setDelegate:videoPipe];import Foundation
import AVFoundation
import Vision
import VisionKit
import OpenGLES
@available(iOS 17.0, *)
var maskRequest: VNGeneratePersonInstanceMaskRequest?
@objc public class RTCVirtualBackground: NSObject {
public typealias ForegroundMaskCompletion = (RTCVideoFrame?, Error?) -> Void
public override init() {
if #available(iOS 17.0, *) {
DispatchQueue.main.async {
maskRequest = VNGeneratePersonInstanceMaskRequest()
}
}
}
public func processForegroundMask(from videoFrame: RTCVideoFrame, backgroundImage: UIImage, completion: @escaping ForegroundMaskCompletion) {
guard let pixelBuffer = convertRTCVideoFrameToPixelBuffer(videoFrame) else {
print("Failed to convert RTCVideoFrame to CVPixelBuffer")
return
}
DispatchQueue.main.async(execute: {
if #available(iOS 17.0, *) {
let inputFrameImage = CIImage(cvPixelBuffer: pixelBuffer).resize()
let handler = VNImageRequestHandler(ciImage: inputFrameImage!, options: [:])
do {
try handler.perform([maskRequest!])
if let observation = maskRequest!.results?.first {
let allInstances = observation.allInstances
do {
let maskedImage = try observation.generateMaskedImage(ofInstances: allInstances, from: handler, croppedToInstancesExtent: false)
self.applyForegroundMask(to: maskedImage, backgroundImage: backgroundImage) { maskedPixelBuffer, error in
if let maskedPixelBuffer = maskedPixelBuffer {
let frameProcessed = self.convertPixelBufferToRTCVideoFrame(maskedPixelBuffer, rotation: videoFrame.rotation, timeStampNs: videoFrame.timeStampNs)
completion(frameProcessed, nil)
} else {
completion(nil, error)
}
}
} catch {
print("Error: \(error.localizedDescription)")
completion(nil, error)
}
}
} catch {
print("Failed to perform Vision request: \(error)")
completion(nil, error)
}
}
})
}
}func applyForegroundMask(to pixelBuffer: CVPixelBuffer, backgroundImage: UIImage, completion: @escaping (CVPixelBuffer?, Error?) -> Void) {
DispatchQueue.global(qos: .userInitiated).async {
let maskedUIImage = UIImage(ciImage: CIImage(cvPixelBuffer: pixelBuffer))
let size = CGSize(width: CGFloat(CVPixelBufferGetWidth(pixelBuffer)), height: CGFloat(CVPixelBufferGetHeight(pixelBuffer)))
let rotatedBackgroundImage = backgroundImage.rotateImage(orientation: UIImage.Orientation.up)
UIGraphicsBeginImageContextWithOptions(size, false, 0.0)
rotatedBackgroundImage.draw(in: CGRect(x: 0, y: 0, width: size.width, height: size.height))
maskedUIImage.draw(in: CGRect(x: 0, y: 0, width: size.width, height: size.height))
let composedImage = UIGraphicsGetImageFromCurrentImageContext()
UIGraphicsEndImageContext()
DispatchQueue.main.async {
if let composedImage = composedImage {
guard let composedPixelBuffer = self.pixelBufferFromImage(image: composedImage) else {
completion(nil, nil)
return
}
completion(composedPixelBuffer, nil)
}
}
}
}
thanks @lambiengcode for the quickest reply. really appreciating your help.
i m using react native for my app development and trying to update the npm library react-native-webrtc jitsi based webrtc framework written in objective c.
so i m using below method to convert the UIImage which i got after applying filters from RTCVideoFrame, but i m not able to convert the image with filters back to RTCVideoFrame.
with this below method, i m getting a blank video after sending it back to videosource.
can you please help me in correcting the below method code or suggest necessary changes to make it working n give me the nonblank video frame.
this help would be much appreciated as i got struck here from few days and i m also new to this objective c code as well. so it will be a big help for me..!
(CVPixelBufferRef *)videoFrameFromImage:(UIImage *)image {
CGSize frameSize = CGSizeMake(image.size.width, image.size.height);
// Create a CVPixelBuffer from the UIImage
NSDictionary *options = @{(NSString *)kCVPixelBufferCGImageCompatibilityKey: @(YES),
(NSString *)kCVPixelBufferCGBitmapContextCompatibilityKey: @(YES)};
CVPixelBufferRef pixelBuffer = NULL;
CVReturn status = CVPixelBufferCreate(kCFAllocatorDefault,
frameSize.width,
frameSize.height,
kCVPixelFormatType_32BGRA,
(__bridge CFDictionaryRef)options,
&pixelBuffer);
if (status != kCVReturnSuccess) {
NSLog(@"Error creating pixel buffer");
return nil;
}
CVPixelBufferLockBaseAddress(pixelBuffer, 0);
void *pixelData = CVPixelBufferGetBaseAddress(pixelBuffer);
CGColorSpaceRef rgbColorSpace = CGColorSpaceCreateDeviceRGB();
CGContextRef context = CGBitmapContextCreate(pixelData,
frameSize.width,
frameSize.height,
8,
CVPixelBufferGetBytesPerRow(pixelBuffer),
rgbColorSpace,
kCGImageAlphaNoneSkipFirst);
CGContextDrawImage(context, CGRectMake(0, 0, frameSize.width, frameSize.height), image.CGImage);
CGContextRelease(context);
CGColorSpaceRelease(rgbColorSpace);
CVPixelBufferUnlockBaseAddress(pixelBuffer, 0);
RCTLog(@"[VideoCaptureController] videoFrameFromImage pixelBuffer created...");
// Create RTCVideoFrame
RTCVideoFrame *videoFrame = [[RTCVideoFrame alloc] initWithPixelBuffer:pixelBuffer
rotation:RTCVideoRotation_0
timeStampNs:0];
CVPixelBufferRelease(pixelBuffer);
return videoFrame;
}