/* * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "webrtc/modules/audio_coding/neteq/audio_decoder_impl.h" #include #include // memmove #ifdef WEBRTC_CODEC_CELT #include "webrtc/modules/audio_coding/codecs/celt/include/celt_interface.h" #endif #include "webrtc/modules/audio_coding/codecs/cng/include/webrtc_cng.h" #include "webrtc/modules/audio_coding/codecs/g711/include/g711_interface.h" #ifdef WEBRTC_CODEC_G722 #include "webrtc/modules/audio_coding/codecs/g722/include/g722_interface.h" #endif #ifdef WEBRTC_CODEC_ILBC #include "webrtc/modules/audio_coding/codecs/ilbc/interface/ilbc.h" #endif #ifdef WEBRTC_CODEC_ISACFX #include "webrtc/modules/audio_coding/codecs/isac/fix/interface/isacfix.h" #endif #ifdef WEBRTC_CODEC_ISAC #include "webrtc/modules/audio_coding/codecs/isac/main/interface/isac.h" #endif #ifdef WEBRTC_CODEC_OPUS #include "webrtc/modules/audio_coding/codecs/opus/interface/opus_interface.h" #endif #ifdef WEBRTC_CODEC_PCM16 #include "webrtc/modules/audio_coding/codecs/pcm16b/include/pcm16b.h" #endif namespace webrtc { // PCMu int AudioDecoderPcmU::Decode(const uint8_t* encoded, size_t encoded_len, int16_t* decoded, size_t decodedSize, SpeechType* speech_type) { int16_t temp_type = 1; // Default is speech. int16_t ret = WebRtcG711_DecodeU( state_, reinterpret_cast(const_cast(encoded)), static_cast(encoded_len), decoded, &temp_type); *speech_type = ConvertSpeechType(temp_type); return ret; } int AudioDecoderPcmU::PacketDuration(const uint8_t* encoded, size_t encoded_len) { // One encoded byte per sample per channel. return static_cast(encoded_len / channels_); } // PCMa int AudioDecoderPcmA::Decode(const uint8_t* encoded, size_t encoded_len, int16_t* decoded, SpeechType* speech_type) { int16_t temp_type = 1; // Default is speech. int16_t ret = WebRtcG711_DecodeA( state_, reinterpret_cast(const_cast(encoded)), static_cast(encoded_len), decoded, &temp_type); *speech_type = ConvertSpeechType(temp_type); return ret; } int AudioDecoderPcmA::PacketDuration(const uint8_t* encoded, size_t encoded_len) { // One encoded byte per sample per channel. return static_cast(encoded_len / channels_); } // PCM16B #ifdef WEBRTC_CODEC_PCM16 AudioDecoderPcm16B::AudioDecoderPcm16B(enum NetEqDecoder type) : AudioDecoder(type) { assert(type == kDecoderPCM16B || type == kDecoderPCM16Bwb || type == kDecoderPCM16Bswb32kHz || type == kDecoderPCM16Bswb48kHz); } int AudioDecoderPcm16B::Decode(const uint8_t* encoded, size_t encoded_len, int16_t* decoded, SpeechType* speech_type) { int16_t temp_type = 1; // Default is speech. int16_t ret = WebRtcPcm16b_DecodeW16( state_, reinterpret_cast(const_cast(encoded)), static_cast(encoded_len), decoded, &temp_type); *speech_type = ConvertSpeechType(temp_type); return ret; } int AudioDecoderPcm16B::PacketDuration(const uint8_t* encoded, size_t encoded_len) { // Two encoded byte per sample per channel. return static_cast(encoded_len / (2 * channels_)); } AudioDecoderPcm16BMultiCh::AudioDecoderPcm16BMultiCh( enum NetEqDecoder type) : AudioDecoderPcm16B(kDecoderPCM16B) { // This will be changed below. codec_type_ = type; // Changing to actual type here. switch (codec_type_) { case kDecoderPCM16B_2ch: case kDecoderPCM16Bwb_2ch: case kDecoderPCM16Bswb32kHz_2ch: case kDecoderPCM16Bswb48kHz_2ch: channels_ = 2; break; case kDecoderPCM16B_5ch: channels_ = 5; break; default: assert(false); } } #endif // iLBC #ifdef WEBRTC_CODEC_ILBC AudioDecoderIlbc::AudioDecoderIlbc() : AudioDecoder(kDecoderILBC) { WebRtcIlbcfix_DecoderCreate(reinterpret_cast(&state_)); } AudioDecoderIlbc::~AudioDecoderIlbc() { WebRtcIlbcfix_DecoderFree(static_cast(state_)); } int AudioDecoderIlbc::Decode(const uint8_t* encoded, size_t encoded_len, int16_t* decoded, SpeechType* speech_type) { int16_t temp_type = 1; // Default is speech. int16_t ret = WebRtcIlbcfix_Decode(static_cast(state_), reinterpret_cast(encoded), static_cast(encoded_len), decoded, &temp_type); *speech_type = ConvertSpeechType(temp_type); return ret; } int AudioDecoderIlbc::DecodePlc(int num_frames, int16_t* decoded) { return WebRtcIlbcfix_NetEqPlc(static_cast(state_), decoded, num_frames); } int AudioDecoderIlbc::Init() { return WebRtcIlbcfix_Decoderinit30Ms(static_cast(state_)); } #endif // iSAC float #ifdef WEBRTC_CODEC_ISAC AudioDecoderIsac::AudioDecoderIsac() : AudioDecoder(kDecoderISAC) { WebRtcIsac_Create(reinterpret_cast(&state_)); WebRtcIsac_SetDecSampRate(static_cast(state_), 16000); } AudioDecoderIsac::~AudioDecoderIsac() { WebRtcIsac_Free(static_cast(state_)); } int AudioDecoderIsac::Decode(const uint8_t* encoded, size_t encoded_len, int16_t* decoded, SpeechType* speech_type) { int16_t temp_type = 1; // Default is speech. int16_t ret = WebRtcIsac_Decode(static_cast(state_), reinterpret_cast(encoded), static_cast(encoded_len), decoded, &temp_type); *speech_type = ConvertSpeechType(temp_type); return ret; } int AudioDecoderIsac::DecodeRedundant(const uint8_t* encoded, size_t encoded_len, int16_t* decoded, SpeechType* speech_type) { int16_t temp_type = 1; // Default is speech. int16_t ret = WebRtcIsac_DecodeRcu(static_cast(state_), reinterpret_cast(encoded), static_cast(encoded_len), decoded, &temp_type); *speech_type = ConvertSpeechType(temp_type); return ret; } int AudioDecoderIsac::DecodePlc(int num_frames, int16_t* decoded) { return WebRtcIsac_DecodePlc(static_cast(state_), decoded, num_frames); } int AudioDecoderIsac::Init() { return WebRtcIsac_DecoderInit(static_cast(state_)); } int AudioDecoderIsac::IncomingPacket(const uint8_t* payload, size_t payload_len, uint16_t rtp_sequence_number, uint32_t rtp_timestamp, uint32_t arrival_timestamp) { return WebRtcIsac_UpdateBwEstimate(static_cast(state_), reinterpret_cast(payload), static_cast(payload_len), rtp_sequence_number, rtp_timestamp, arrival_timestamp); } int AudioDecoderIsac::ErrorCode() { return WebRtcIsac_GetErrorCode(static_cast(state_)); } // iSAC SWB AudioDecoderIsacSwb::AudioDecoderIsacSwb() : AudioDecoderIsac() { codec_type_ = kDecoderISACswb; WebRtcIsac_SetDecSampRate(static_cast(state_), 32000); } // iSAC FB AudioDecoderIsacFb::AudioDecoderIsacFb() : AudioDecoderIsacSwb() { codec_type_ = kDecoderISACfb; } #endif // iSAC fix #ifdef WEBRTC_CODEC_ISACFX AudioDecoderIsacFix::AudioDecoderIsacFix() : AudioDecoder(kDecoderISAC) { WebRtcIsacfix_Create(reinterpret_cast(&state_)); } AudioDecoderIsacFix::~AudioDecoderIsacFix() { WebRtcIsacfix_Free(static_cast(state_)); } int AudioDecoderIsacFix::Decode(const uint8_t* encoded, size_t encoded_len, int16_t* decoded, SpeechType* speech_type) { int16_t temp_type = 1; // Default is speech. int16_t ret = WebRtcIsacfix_Decode(static_cast(state_), reinterpret_cast(encoded), static_cast(encoded_len), decoded, &temp_type); *speech_type = ConvertSpeechType(temp_type); return ret; } int AudioDecoderIsacFix::Init() { return WebRtcIsacfix_DecoderInit(static_cast(state_)); } int AudioDecoderIsacFix::IncomingPacket(const uint8_t* payload, size_t payload_len, uint16_t rtp_sequence_number, uint32_t rtp_timestamp, uint32_t arrival_timestamp) { return WebRtcIsacfix_UpdateBwEstimate( static_cast(state_), reinterpret_cast(payload), static_cast(payload_len), rtp_sequence_number, rtp_timestamp, arrival_timestamp); } int AudioDecoderIsacFix::ErrorCode() { return WebRtcIsacfix_GetErrorCode(static_cast(state_)); } #endif // G.722 #ifdef WEBRTC_CODEC_G722 AudioDecoderG722::AudioDecoderG722() : AudioDecoder(kDecoderG722) { WebRtcG722_CreateDecoder(reinterpret_cast(&state_)); } AudioDecoderG722::~AudioDecoderG722() { WebRtcG722_FreeDecoder(static_cast(state_)); } int AudioDecoderG722::Decode(const uint8_t* encoded, size_t encoded_len, int16_t* decoded, SpeechType* speech_type) { int16_t temp_type = 1; // Default is speech. int16_t ret = WebRtcG722_Decode( static_cast(state_), const_cast(reinterpret_cast(encoded)), static_cast(encoded_len), decoded, &temp_type); *speech_type = ConvertSpeechType(temp_type); return ret; } int AudioDecoderG722::Init() { return WebRtcG722_DecoderInit(static_cast(state_)); } int AudioDecoderG722::PacketDuration(const uint8_t* encoded, size_t encoded_len) { // 1/2 encoded byte per sample per channel. return static_cast(2 * encoded_len / channels_); } AudioDecoderG722Stereo::AudioDecoderG722Stereo() : AudioDecoderG722(), state_left_(state_), // Base member |state_| is used for left channel. state_right_(NULL) { channels_ = 2; // |state_left_| already created by the base class AudioDecoderG722. WebRtcG722_CreateDecoder(reinterpret_cast(&state_right_)); } AudioDecoderG722Stereo::~AudioDecoderG722Stereo() { // |state_left_| will be freed by the base class AudioDecoderG722. WebRtcG722_FreeDecoder(static_cast(state_right_)); } int AudioDecoderG722Stereo::Decode(const uint8_t* encoded, size_t encoded_len, int16_t* decoded, SpeechType* speech_type) { int16_t temp_type = 1; // Default is speech. // De-interleave the bit-stream into two separate payloads. uint8_t* encoded_deinterleaved = new uint8_t[encoded_len]; SplitStereoPacket(encoded, encoded_len, encoded_deinterleaved); // Decode left and right. int16_t ret = WebRtcG722_Decode( static_cast(state_left_), reinterpret_cast(encoded_deinterleaved), static_cast(encoded_len / 2), decoded, &temp_type); if (ret >= 0) { int decoded_len = ret; ret = WebRtcG722_Decode( static_cast(state_right_), reinterpret_cast(&encoded_deinterleaved[encoded_len / 2]), static_cast(encoded_len / 2), &decoded[decoded_len], &temp_type); if (ret == decoded_len) { decoded_len += ret; // Interleave output. for (int k = decoded_len / 2; k < decoded_len; k++) { int16_t temp = decoded[k]; memmove(&decoded[2 * k - decoded_len + 2], &decoded[2 * k - decoded_len + 1], (decoded_len - k - 1) * sizeof(int16_t)); decoded[2 * k - decoded_len + 1] = temp; } ret = decoded_len; // Return total number of samples. } } *speech_type = ConvertSpeechType(temp_type); delete [] encoded_deinterleaved; return ret; } int AudioDecoderG722Stereo::Init() { int ret = WebRtcG722_DecoderInit(static_cast(state_right_)); if (ret != 0) { return ret; } return AudioDecoderG722::Init(); } // Split the stereo packet and place left and right channel after each other // in the output array. void AudioDecoderG722Stereo::SplitStereoPacket(const uint8_t* encoded, size_t encoded_len, uint8_t* encoded_deinterleaved) { assert(encoded); // Regroup the 4 bits/sample so |l1 l2| |r1 r2| |l3 l4| |r3 r4| ..., // where "lx" is 4 bits representing left sample number x, and "rx" right // sample. Two samples fit in one byte, represented with |...|. for (size_t i = 0; i + 1 < encoded_len; i += 2) { uint8_t right_byte = ((encoded[i] & 0x0F) << 4) + (encoded[i + 1] & 0x0F); encoded_deinterleaved[i] = (encoded[i] & 0xF0) + (encoded[i + 1] >> 4); encoded_deinterleaved[i + 1] = right_byte; } // Move one byte representing right channel each loop, and place it at the // end of the bytestream vector. After looping the data is reordered to: // |l1 l2| |l3 l4| ... |l(N-1) lN| |r1 r2| |r3 r4| ... |r(N-1) r(N)|, // where N is the total number of samples. for (size_t i = 0; i < encoded_len / 2; i++) { uint8_t right_byte = encoded_deinterleaved[i + 1]; memmove(&encoded_deinterleaved[i + 1], &encoded_deinterleaved[i + 2], encoded_len - i - 2); encoded_deinterleaved[encoded_len - 1] = right_byte; } } #endif // CELT #ifdef WEBRTC_CODEC_CELT AudioDecoderCelt::AudioDecoderCelt(enum NetEqDecoder type) : AudioDecoder(type) { assert(type == kDecoderCELT_32 || type == kDecoderCELT_32_2ch); if (type == kDecoderCELT_32) { channels_ = 1; } else { channels_ = 2; } WebRtcCelt_CreateDec(reinterpret_cast(&state_), static_cast(channels_)); } AudioDecoderCelt::~AudioDecoderCelt() { WebRtcCelt_FreeDec(static_cast(state_)); } int AudioDecoderCelt::Decode(const uint8_t* encoded, size_t encoded_len, int16_t* decoded, SpeechType* speech_type) { int16_t temp_type = 1; // Default to speech. int ret = WebRtcCelt_DecodeUniversal(static_cast(state_), encoded, static_cast(encoded_len), decoded, &temp_type); *speech_type = ConvertSpeechType(temp_type); if (ret < 0) { return -1; } // Return the total number of samples. return ret * static_cast(channels_); } int AudioDecoderCelt::Init() { return WebRtcCelt_DecoderInit(static_cast(state_)); } bool AudioDecoderCelt::HasDecodePlc() const { return true; } int AudioDecoderCelt::DecodePlc(int num_frames, int16_t* decoded) { int ret = WebRtcCelt_DecodePlc(static_cast(state_), decoded, num_frames); if (ret < 0) { return -1; } // Return the total number of samples. return ret * static_cast(channels_); } #endif // Opus #ifdef WEBRTC_CODEC_OPUS AudioDecoderOpus::AudioDecoderOpus(enum NetEqDecoder type) : AudioDecoder(type) { if (type == kDecoderOpus_2ch) { channels_ = 2; } else { channels_ = 1; } WebRtcOpus_DecoderCreate(reinterpret_cast(&state_), static_cast(channels_)); } AudioDecoderOpus::~AudioDecoderOpus() { WebRtcOpus_DecoderFree(static_cast(state_)); } int AudioDecoderOpus::Decode(const uint8_t* encoded, size_t encoded_len, int16_t* decoded, SpeechType* speech_type) { int16_t temp_type = 1; // Default is speech. int16_t ret = WebRtcOpus_DecodeNew(static_cast(state_), encoded, static_cast(encoded_len), decoded, &temp_type); if (ret > 0) ret *= static_cast(channels_); // Return total number of samples. *speech_type = ConvertSpeechType(temp_type); return ret; } int AudioDecoderOpus::DecodeRedundant(const uint8_t* encoded, size_t encoded_len, int16_t* decoded, SpeechType* speech_type) { int16_t temp_type = 1; // Default is speech. int16_t ret = WebRtcOpus_DecodeFec(static_cast(state_), encoded, static_cast(encoded_len), decoded, &temp_type); if (ret > 0) ret *= static_cast(channels_); // Return total number of samples. *speech_type = ConvertSpeechType(temp_type); return ret; } int AudioDecoderOpus::Init() { return WebRtcOpus_DecoderInitNew(static_cast(state_)); } int AudioDecoderOpus::PacketDuration(const uint8_t* encoded, size_t encoded_len) { return WebRtcOpus_DurationEst(static_cast(state_), encoded, static_cast(encoded_len)); } int AudioDecoderOpus::PacketDurationRedundant(const uint8_t* encoded, size_t encoded_len) const { return WebRtcOpus_FecDurationEst(encoded, static_cast(encoded_len)); } bool AudioDecoderOpus::PacketHasFec(const uint8_t* encoded, size_t encoded_len) const { int fec; fec = WebRtcOpus_PacketHasFec(encoded, static_cast(encoded_len)); return (fec == 1); } #endif AudioDecoderCng::AudioDecoderCng(enum NetEqDecoder type) : AudioDecoder(type) { assert(type == kDecoderCNGnb || type == kDecoderCNGwb || kDecoderCNGswb32kHz || type == kDecoderCNGswb48kHz); WebRtcCng_CreateDec(reinterpret_cast(&state_)); assert(state_); } AudioDecoderCng::~AudioDecoderCng() { if (state_) { WebRtcCng_FreeDec(static_cast(state_)); } } int AudioDecoderCng::Init() { assert(state_); return WebRtcCng_InitDec(static_cast(state_)); } } // namespace webrtc