AudioUnit + Opus codec = crackle issue

问题

I am creating a voip app for iOS in objective-c. Currently i am trying to create the audio part: recording the audio data from microphone, encoding with Opus, decoding, and then playing. For the recording and playing i use AudioUnit. Also i made a buffer implementation which allocates places of memory each with initially set size. There are three main methods: - setBufferSize - for setting buffer's sub allocated spaces. - writeDataToBuffer - for creating new space(if needed), and filling data into current writing space. - readDataFromBuffer - read data from current reading space.

I use the buffer for storing the audio data there. It works good. I've tested it. Also if i try to use it without Opus just reading audio data, storing it into the buffer, reading from the buffer and then playing, everything works great. But the problem comes when i include opus. Actually it encodes and decodes the audio data, but the quality is not so good and there are some crackle as well. I was wondering what am i doing wrong? Here are pieces of my code:

AudioUnit:

OSStatus status;


m_sAudioDescription.componentType = kAudioUnitType_Output;
m_sAudioDescription.componentSubType = kAudioUnitSubType_VoiceProcessingIO/*kAudioUnitSubType_RemoteIO*/;
m_sAudioDescription.componentFlags = 0;
m_sAudioDescription.componentFlagsMask = 0;
m_sAudioDescription.componentManufacturer = kAudioUnitManufacturer_Apple;

AudioComponent inputComponent = AudioComponentFindNext(NULL, &m_sAudioDescription);

status = AudioComponentInstanceNew(inputComponent, &m_audioUnit);


// Enable IO for recording
UInt32 flag = 1;
status = AudioUnitSetProperty(m_audioUnit,
                              kAudioOutputUnitProperty_EnableIO,
                              kAudioUnitScope_Input,
                              VOIP_AUDIO_INPUT_ELEMENT,
                              &flag,
                              sizeof(flag));

// Enable IO for playback
status = AudioUnitSetProperty(m_audioUnit,
                              kAudioOutputUnitProperty_EnableIO,
                              kAudioUnitScope_Output,
                              VOIP_AUDIO_OUTPUT_ELEMENT,
                              &flag,
                              sizeof(flag));

// Describe format
m_sAudioFormat.mSampleRate          = 48000.00;//48000.00;/*44100.00*/;
m_sAudioFormat.mFormatID            = kAudioFormatLinearPCM;
m_sAudioFormat.mFormatFlags         = kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked/* | kAudioFormatFlagsCanonical*/;
m_sAudioFormat.mFramesPerPacket     = 1;
m_sAudioFormat.mChannelsPerFrame    = 1;
m_sAudioFormat.mBitsPerChannel      = 16; //8 * bytesPerSample
m_sAudioFormat.mBytesPerFrame       = /*(UInt32)bytesPerSample;*/2; //bitsPerChannel / 8 * channelsPerFrame
m_sAudioFormat.mBytesPerPacket      = 2; //bytesPerFrame * framesPerPacket


// Apply format
status = AudioUnitSetProperty(m_audioUnit,
                              kAudioUnitProperty_StreamFormat,
                              kAudioUnitScope_Output,
                              VOIP_AUDIO_INPUT_ELEMENT,
                              &m_sAudioFormat,
                              sizeof(m_sAudioFormat));

status = AudioUnitSetProperty(m_audioUnit,
                              kAudioUnitProperty_StreamFormat,
                              kAudioUnitScope_Input,
                              VOIP_AUDIO_OUTPUT_ELEMENT,
                              &m_sAudioFormat,
                              sizeof(m_sAudioFormat));


// Set input callback
AURenderCallbackStruct callbackStruct;
callbackStruct.inputProc = inputRenderCallback;
callbackStruct.inputProcRefCon = this;
status = AudioUnitSetProperty(m_audioUnit,
                              kAudioOutputUnitProperty_SetInputCallback,
                              kAudioUnitScope_Global,
                              VOIP_AUDIO_INPUT_ELEMENT,
                              &callbackStruct,
                              sizeof(callbackStruct));

// Set output callback
callbackStruct.inputProc = outputRenderCallback;
callbackStruct.inputProcRefCon = this;
status = AudioUnitSetProperty(m_audioUnit,
                              kAudioUnitProperty_SetRenderCallback,
                              kAudioUnitScope_Global,
                              VOIP_AUDIO_OUTPUT_ELEMENT,
                              &callbackStruct,
                              sizeof(callbackStruct));

//Enable Echo cancelation:
this->_setEchoCancelation(true);

//Enable Automatic Gain control:
this->_setAGC(false);

// Initialise
status = AudioUnitInitialize(m_audioUnit);

return noErr;

Input buffer allocation and setting the size of storing buffers:

void VoipAudio::_allocBuffer()
{
   UInt32 numFramesPerBuffer;
   UInt32 size = sizeof(/*VoipUInt32*/VoipInt16);
   AudioUnitGetProperty(m_audioUnit,
                     kAudioUnitProperty_MaximumFramesPerSlice,
                     kAudioUnitScope_Global,
                     VOIP_AUDIO_OUTPUT_ELEMENT,                         &numFramesPerBuffer,                         &siz    

   UInt32 inputBufferListSize = offsetof(AudioBufferList, mBuffers[0]) + (sizeof(AudioBuffer) * m_sAudioFormat.mChannelsPerFrame);
   inputBuffer = (AudioBufferList *)malloc(inputBufferListSize);
   inputBuffer->mNumberBuffers = m_sAudioFormat.mChannelsPerFrame;

   //pre-malloc buffers for AudioBufferLists
   for(VoipUInt32 tmp_int1 = 0; tmp_int1 < inputBuffer->mNumberBuffers; tmp_int1++)
   {
      inputBuffer->mBuffers[tmp_int1].mNumberChannels = 1;
      inputBuffer->mBuffers[tmp_int1].mDataByteSize = 2048;
      inputBuffer->mBuffers[tmp_int1].mData = malloc(2048);
      memset(inputBuffer->mBuffers[tmp_int1].mData, 0, 2048);
   }

   this->m_oAudioBuffer = new VoipBuffer();
   this->m_oAudioBuffer->setBufferSize(2048);

   this->m_oAudioReadBuffer = new VoipBuffer();
   this->m_oAudioReadBuffer->setBufferSize(2880);
 }

Record callback:

this->m_oAudioReadBuffer->writeDataToBuffer(samples, samplesSize);
void* tmp_buffer = this->m_oAudioReadBuffer->readDataFromBuffer();
if (tmp_buffer != nullptr)
{
   sVoipAudioCodecOpusEncodedResult* encodedSamples = VoipAudioCodecs::Opus_Encode((VoipInt16*)tmp_buffer, 2880);

   sVoipAudioCodecOpusDecodedResult* decodedSamples = VoipAudioCodecs::Opus_Decode(encodedSamples->m_data, encodedSamples->m_dataSize);


   this->m_oAudioBuffer->writeDataToBuffer(decodedSamples->m_data, decodedSamples->m_dataSize);

   free(encodedSamples->m_data);
   free(encodedSamples);
   free(decodedSamples->m_data);
   free(decodedSamples);
}

Playing callback:

void* tmp_buffer = this->m_oAudioBuffer->readDataFromBuffer();

if (tmp_buffer != nullptr)
{
   memset(buffer->mBuffers[0].mData, 0, 2048);
   memcpy(buffer->mBuffers[0].mData, tmp_buffer, 2048);
   buffer->mBuffers[0].mDataByteSize = 2048;
} else {
   memset(buffer->mBuffers[0].mData, 0, 2048);
   buffer->mBuffers[0].mDataByteSize = 2048;
}

Opus Init Code:

int _error = 0;

VoipAudioCodecs::m_oEncoder = opus_encoder_create(SAMPLE_RATE, CHANNELS, APPLICATION, &_error);
if (_error < 0)
{
    fprintf(stderr, "VoipAudioCodecs error: failed to create an encoder: %s\n", opus_strerror(_error));

    return;
}

_error = opus_encoder_ctl(VoipAudioCodecs::m_oEncoder, OPUS_SET_BITRATE(BITRATE/*OPUS_BITRATE_MAX*/));
if (_error < 0)
{
    fprintf(stderr, "VoipAudioCodecs error: failed to set bitrate: %s\n", opus_strerror(_error));

    return;
}

VoipAudioCodecs::m_oDecoder = opus_decoder_create(SAMPLE_RATE, CHANNELS, &_error);
if (_error < 0)
{
    fprintf(stderr, "VoipAudioCodecs error: failed to create decoder: %s\n", opus_strerror(_error));

    return;
}

Opus encode/decode:

sVoipAudioCodecOpusEncodedResult* VoipAudioCodecs::Opus_Encode(VoipInt16* number, int samplesCount)
{
   unsigned char cbits[MAX_PACKET_SIZE];
   VoipInt32 nbBytes;

   nbBytes = opus_encode(VoipAudioCodecs::m_oEncoder, number, FRAME_SIZE, cbits, MAX_PACKET_SIZE);
   if (nbBytes < 0)
   {
      fprintf(stderr, "VoipAudioCodecs error: encode failed: %s\n", opus_strerror(nbBytes));

      return nullptr;
   }    

   sVoipAudioCodecOpusEncodedResult* result = (sVoipAudioCodecOpusEncodedResult* )malloc(sizeof(sVoipAudioCodecOpusEncodedResult));

   result->m_data = (unsigned char*)malloc(nbBytes);
   memcpy(result->m_data, cbits, nbBytes);
   result->m_dataSize = nbBytes;

   return result;
}

sVoipAudioCodecOpusDecodedResult* VoipAudioCodecs::Opus_Decode(void* encoded, VoipInt32 nbBytes)
{
    VoipInt16 decodedPacket[MAX_FRAME_SIZE];


    int frame_size = opus_decode(VoipAudioCodecs::m_oDecoder, (const unsigned char*)encoded, nbBytes, decodedPacket, MAX_FRAME_SIZE, 0);

    if (frame_size < 0)
    {
       fprintf(stderr, "VoipAudioCodecs error: decoder failed: %s\n", opus_strerror(frame_size));

       return nullptr;
    }

    sVoipAudioCodecOpusDecodedResult* result = (sVoipAudioCodecOpusDecodedResult* )malloc(sizeof(sVoipAudioCodecOpusDecodedResult));

    result->m_data = (VoipInt16*)malloc(frame_size / sizeof(VoipInt16));
    memcpy(result->m_data, decodedPacket, (frame_size / sizeof(VoipInt16)));
    result->m_dataSize = frame_size / sizeof(VoipInt16);

    return result;
 }

Here are some constants i use:

#define FRAME_SIZE 2880 //120, 240, 480, 960, 1920, 2880 
#define SAMPLE_RATE 48000
#define CHANNELS 1
#define APPLICATION OPUS_APPLICATION_VOIP//OPUS_APPLICATION_AUDIO
#define BITRATE 64000
#define MAX_FRAME_SIZE 4096
#define MAX_PACKET_SIZE (3*1276)

Can you help me please?

回答1:

Your audio call back time may need increased. Try increasing your session setPreferredIOBufferDuration time. I have used opus on iOS and have measured the decoding time. It takes 2 to 3 ms to decode about 240 frames of data. There is a good chance you are missing your subsequent callbacks because it is taking to long to decode the audio.

回答2:

i was have same problem in my project, the problem was the iOS give me unstable frame size, i used audio queue service and audio unit, they give me same result ( crackled voice ). all you have to do is, save some samples in ring buffer in audio callback. then in separate thread, do audio processing to make fixed frame to each round. for example : audioUnit give you frames or samples like this: [2048 .. 2048 .. 2048] and opus codec need, 2880 fame for each packet, so you need to get 2048 from first buffer and 832 remain frames from next buffer to get fixed frame size to send it to opus encoder.

this function i used in my project

    func audioProcessing(){
        DispatchQueue.global(qos: .default).async {
             
             // this to save remain data from ring buffer
             var remainData:NSMutableData = NSMutableData()
             var remainDataSize = 0
             
             while self.room_oppened{
                
                 // here we define the fixed frame we want to use in our opus encoder
                 
                 var packetOffset = 0
                 let fixedFrameSize:Int     = 5760
                 var dataToGetFullFrame:Int = 5760
                 let packetData:NSMutableData = NSMutableData(length: fixedFrameSize)!// this need to filled with data
                 

                 if remainDataSize > 0 {
                     if remainDataSize < fixedFrameSize{
                         memcpy(packetData.mutableBytes.advanced(by: packetOffset), remainData.mutableBytes.advanced(by: 0), remainDataSize)// add the remain data
                         dataToGetFullFrame = dataToGetFullFrame - remainDataSize
                         packetOffset = packetOffset + remainDataSize// - 1
                     }else{
                         memcpy(packetData.mutableBytes.advanced(by: packetOffset), remainData.mutableBytes.advanced(by: 0), fixedFrameSize)// add the remain data
                         dataToGetFullFrame = 0
                     }
                     remainDataSize = 0
                 }
                                  
                 
                 // if the packet not fill full, we need to get more data from circle buffer
                 if dataToGetFullFrame > 0 {
                     
                     while dataToGetFullFrame > 0 {
                         
                         let bufferData = self.ringBufferEncodedAudio.read()// read chunk of data from bufer
                         
                         if bufferData != nil{
                                      
                             
                             var chunkOffset = 0
                             
                             if dataToGetFullFrame > bufferData!.length{
                                 memcpy(packetData.mutableBytes.advanced(by: packetOffset) , bufferData!.mutableBytes , bufferData!.length)
                                 chunkOffset = bufferData!.length// this how much data we read
                                 dataToGetFullFrame = dataToGetFullFrame - bufferData!.length // how much of data we need to fill packet
                                 packetOffset = packetOffset + bufferData!.length// + 1
                             }else{
                                 memcpy(packetData.mutableBytes.advanced(by: packetOffset) , bufferData!.mutableBytes , dataToGetFullFrame)
                                 chunkOffset = dataToGetFullFrame// this how much data we read
                                 packetOffset = packetOffset + dataToGetFullFrame// + 1
                                 dataToGetFullFrame = dataToGetFullFrame - dataToGetFullFrame // how much of data we need to fill packet
                             }
                             
                             
                             if dataToGetFullFrame <= 0 {
                                 var size       = bufferData!.length - chunkOffset
                                 remainData     = NSMutableData(bytes: bufferData?.mutableBytes.advanced(by: chunkOffset), length: size)
                                 remainDataSize = size
                             }

        
                         }
                     
                         usleep(useconds_t(8 * 1000))
                         
                     }
                                                           
                 }
                 
                 // send packet to encoder
                if self.enable_streaming {
                    let dataToEncode:Data = packetData as Data
                    let packet = OpusSwiftPort.shared.encodeData(dataToEncode)
                                    
                    if packet != nil{
                        self.sendAudioPacket(packet: packet!)// <--- this to network
                    }
                }
                
              
             }
         }
     }

after i did this audio processing i get very clear audio. i hope this was helpful for you.

来源：https://stackoverflow.com/questions/33589354/audiounit-opus-codec-crackle-issue

标签

ios

audio

audiounit

opus