/*************************************************************************** * * Copyright 2015-2019 BES. * All rights reserved. All unpublished rights reserved. * * No part of this work may be used or reproduced in any form or by any * means, or stored in a database or retrieval system, without prior written * permission of BES. * * Use of this work is governed by a license granted by BES. * This work contains confidential and proprietary information of * BES. which is protected by copyright, trade secret, * trademark and other intellectual property rights. * ****************************************************************************/ #include "speech_process.h" #include "cmsis.h" #include "hal_sysfreq.h" #include "hal_timer.h" #include "hal_trace.h" #if defined(SPEECH_TX_AEC2FLOAT) && !defined(KEEP_SAME_LATENCY) #error "capture/playback size should be equal when enable AEC" #endif #include "bt_sco_chain.h" #include "iir_resample.h" #include "speech_memory.h" #include "speech_utils.h" #define MED_MEM_POOL_SIZE (1024 * 160) static uint8_t g_medMemPool[MED_MEM_POOL_SIZE]; #define FFSE_SAMPLE_RATE (16000) int capture_channel_num = 1; int capture_sample_rate = 48000; int capture_sample_bit = 16; int playback_channel_num = 2; int playback_sample_rate = 48000; int playback_sample_bit = 24; int send_channel_num = 2; int recv_channel_num = 2; // resample related static bool resample_needed_flag = false; static IirResampleState *upsample_st; static IirResampleState *downsample_st[4]; int16_t *capture_buffer_deinter = NULL; int16_t *process_buffer = NULL; int16_t *process_buffer_inter = NULL; static short *aec_echo_buf = NULL; static IirResampleState *rx_downsample_st; static volatile bool is_speech_init = false; static void speech_deinterleave(int16_t *in, int16_t *out, int len, int ch_num) { int len_per_channel = len / ch_num; for (int i = 0, j = 0; i < len; i += ch_num, j++) { int16_t *pout = &out[j]; int16_t *pin = &in[i]; for (int c = 0; c < ch_num; c++) { *pout = *pin; pout += len_per_channel; pin += 1; } } } static void speech_interleave(int16_t *in, int16_t *out, int len, int ch_num) { int len_per_channel = len / ch_num; for (int i = 0, j = 0; j < len; i++, j += ch_num) { int16_t *pout = &out[j]; int16_t *pin = &in[i]; for (int c = 0; c < ch_num; c++) { *pout = *pin; pout += 1; pin += len_per_channel; } } } static void speech_extend(int16_t *in, int16_t *out, int len, int ch_num) { int16_t *pout = out + len * ch_num - 1; for (int i = len - 1; i >= 0; i--) { for (int c = 0; c < ch_num; c++) { *pout-- = in[i]; } } } // This function output remains the same sample rate as input, // output channel number shoule be CHAN_NUM_SEND. // TODO: add multi-channel support in iir resampler void speech_process_capture_run(uint8_t *buf, uint32_t *len) { // TRACE(2,"[%s], pcm_len: %d", __FUNCTION__, *len / 2); if (is_speech_init == false) return; int16_t *pcm_buf = (int16_t *)buf; int pcm_len = *len / 2; int process_len = pcm_len * FFSE_SAMPLE_RATE / capture_sample_rate; if (resample_needed_flag == true) { if (capture_channel_num > 1) speech_deinterleave(pcm_buf, capture_buffer_deinter, pcm_len, capture_channel_num); else speech_copy_int16(capture_buffer_deinter, pcm_buf, pcm_len); int in_len_per_channel = pcm_len / capture_channel_num; int out_len_per_channel = process_len / capture_channel_num; for (int i = 0; i < capture_channel_num; i++) { iir_resample_process( downsample_st[i], &capture_buffer_deinter[i * in_len_per_channel], &process_buffer[i * out_len_per_channel], in_len_per_channel); } if (capture_channel_num > 1) speech_interleave(process_buffer, process_buffer_inter, process_len, capture_channel_num); else speech_copy_int16(process_buffer_inter, process_buffer, process_len); speech_tx_process(process_buffer_inter, aec_echo_buf, &process_len); iir_resample_process(upsample_st, process_buffer_inter, pcm_buf, process_len); if (send_channel_num > 1) speech_extend(pcm_buf, pcm_buf, in_len_per_channel, send_channel_num); } else { speech_tx_process(pcm_buf, aec_echo_buf, &process_len); if (send_channel_num > 1) speech_extend(pcm_buf, pcm_buf, process_len, send_channel_num); } pcm_len = pcm_len / capture_channel_num * send_channel_num; *len = pcm_len * sizeof(int16_t); } void speech_process_playback_run(uint8_t *buf, uint32_t *len) { // TRACE(2,"[%s] pcm_len: %d", __FUNCTION__, *len / 2); if (is_speech_init == false) return; #if defined(SPEECH_TX_AEC2FLOAT) int16_t *pcm_buf = (int16_t *)buf; int pcm_len = *len / 2; if (resample_needed_flag == true) { // Convert to 16bit if necessary if (playback_sample_bit == 24) { int32_t *pcm32 = (int32_t *)buf; for (int i = 0; i < pcm_len / 2; i++) { pcm_buf[i] = (pcm32[i] >> 8); } pcm_len >>= 1; } // Convert to mono if necessary, choose left channel if (playback_channel_num == 2) { for (int i = 0, j = 0; i < pcm_len; i += 2, j++) pcm_buf[j] = pcm_buf[i]; pcm_len >>= 1; } iir_resample_process(rx_downsample_st, pcm_buf, pcm_buf, pcm_len); } speech_copy_int16(aec_echo_buf, pcm_buf, pcm_len * FFSE_SAMPLE_RATE / capture_sample_rate); #endif } void speech_process_init(int tx_sample_rate, int tx_channel_num, int tx_sample_bit, int rx_sample_rate, int rx_channel_num, int rx_sample_bit, int tx_frame_ms, int rx_frame_ms, int tx_send_channel_num, int rx_recv_channel_num) { ASSERT(tx_sample_rate == 16000 || tx_sample_rate == 48000, "[%s] sample rate(%d) not supported", __FUNCTION__, tx_sample_rate); ASSERT(tx_frame_ms == 16, "[%s] just support 16ms frame", __func__); capture_sample_rate = tx_sample_rate; capture_channel_num = tx_channel_num; capture_sample_bit = tx_sample_bit; playback_sample_rate = rx_sample_rate; playback_channel_num = rx_channel_num; playback_sample_bit = rx_sample_bit; send_channel_num = tx_send_channel_num; recv_channel_num = rx_recv_channel_num; resample_needed_flag = (capture_sample_rate != FFSE_SAMPLE_RATE); TRACE(5, "[%s] sample_rate: %d, frame_ms: %d, channel_num: %d, " "resample_needed_flag: %d", __FUNCTION__, tx_sample_rate, tx_frame_ms, tx_channel_num, resample_needed_flag); speech_init(FFSE_SAMPLE_RATE, FFSE_SAMPLE_RATE, tx_frame_ms, tx_frame_ms, tx_frame_ms, &g_medMemPool[0], MED_MEM_POOL_SIZE); if (resample_needed_flag == true) { capture_buffer_deinter = speech_calloc(SPEECH_FRAME_MS_TO_LEN(capture_sample_rate, tx_frame_ms) * capture_channel_num, sizeof(int16_t)); // Resample state must be created after speech init, as it uses speech heap process_buffer = speech_calloc(SPEECH_FRAME_MS_TO_LEN(FFSE_SAMPLE_RATE, tx_frame_ms) * capture_channel_num, sizeof(int16_t)); process_buffer_inter = speech_calloc(SPEECH_FRAME_MS_TO_LEN(FFSE_SAMPLE_RATE, tx_frame_ms) * capture_channel_num, sizeof(int16_t)); upsample_st = iir_resample_init( SPEECH_FRAME_MS_TO_LEN(FFSE_SAMPLE_RATE, tx_frame_ms), iir_resample_choose_mode(FFSE_SAMPLE_RATE, capture_sample_rate)); // as iir resample can only deal with mono signal, we should init // channel_num states for (int i = 0; i < capture_channel_num; i++) { downsample_st[i] = iir_resample_init( SPEECH_FRAME_MS_TO_LEN(capture_sample_rate, tx_frame_ms), iir_resample_choose_mode(capture_sample_rate, FFSE_SAMPLE_RATE)); } // aec_echo_buf = speech_calloc( SPEECH_FRAME_MS_TO_LEN(FFSE_SAMPLE_RATE, rx_frame_ms), sizeof(int16_t)); rx_downsample_st = iir_resample_init( SPEECH_FRAME_MS_TO_LEN(playback_sample_rate, rx_frame_ms), iir_resample_choose_mode(playback_sample_rate, FFSE_SAMPLE_RATE)); } is_speech_init = true; } void speech_process_deinit(void) { if (is_speech_init == false) return; if (resample_needed_flag == true) { speech_free(capture_buffer_deinter); speech_free(process_buffer); speech_free(process_buffer_inter); iir_resample_destroy(upsample_st); for (int i = 0; i < capture_channel_num; i++) { iir_resample_destroy(downsample_st[i]); } speech_free(aec_echo_buf); iir_resample_destroy(rx_downsample_st); resample_needed_flag = false; } speech_deinit(); is_speech_init = false; } enum HAL_CMU_FREQ_T speech_process_need_freq(void) { return HAL_CMU_FREQ_208M; }