pinebuds/tests/anc_usb/speech_process.c

280 lines
8.8 KiB
C

/***************************************************************************
*
* Copyright 2015-2019 BES.
* All rights reserved. All unpublished rights reserved.
*
* No part of this work may be used or reproduced in any form or by any
* means, or stored in a database or retrieval system, without prior written
* permission of BES.
*
* Use of this work is governed by a license granted by BES.
* This work contains confidential and proprietary information of
* BES. which is protected by copyright, trade secret,
* trademark and other intellectual property rights.
*
****************************************************************************/
#include "speech_process.h"
#include "cmsis.h"
#include "hal_sysfreq.h"
#include "hal_timer.h"
#include "hal_trace.h"
#if defined(SPEECH_TX_AEC2FLOAT) && !defined(KEEP_SAME_LATENCY)
#error "capture/playback size should be equal when enable AEC"
#endif
#include "bt_sco_chain.h"
#include "iir_resample.h"
#include "speech_memory.h"
#include "speech_utils.h"
#define MED_MEM_POOL_SIZE (1024 * 160)
static uint8_t g_medMemPool[MED_MEM_POOL_SIZE];
#define FFSE_SAMPLE_RATE (16000)
int capture_channel_num = 1;
int capture_sample_rate = 48000;
int capture_sample_bit = 16;
int playback_channel_num = 2;
int playback_sample_rate = 48000;
int playback_sample_bit = 24;
int send_channel_num = 2;
int recv_channel_num = 2;
// resample related
static bool resample_needed_flag = false;
static IirResampleState *upsample_st;
static IirResampleState *downsample_st[4];
int16_t *capture_buffer_deinter = NULL;
int16_t *process_buffer = NULL;
int16_t *process_buffer_inter = NULL;
static short *aec_echo_buf = NULL;
static IirResampleState *rx_downsample_st;
static volatile bool is_speech_init = false;
static void speech_deinterleave(int16_t *in, int16_t *out, int len,
int ch_num) {
int len_per_channel = len / ch_num;
for (int i = 0, j = 0; i < len; i += ch_num, j++) {
int16_t *pout = &out[j];
int16_t *pin = &in[i];
for (int c = 0; c < ch_num; c++) {
*pout = *pin;
pout += len_per_channel;
pin += 1;
}
}
}
static void speech_interleave(int16_t *in, int16_t *out, int len, int ch_num) {
int len_per_channel = len / ch_num;
for (int i = 0, j = 0; j < len; i++, j += ch_num) {
int16_t *pout = &out[j];
int16_t *pin = &in[i];
for (int c = 0; c < ch_num; c++) {
*pout = *pin;
pout += 1;
pin += len_per_channel;
}
}
}
static void speech_extend(int16_t *in, int16_t *out, int len, int ch_num) {
int16_t *pout = out + len * ch_num - 1;
for (int i = len - 1; i >= 0; i--) {
for (int c = 0; c < ch_num; c++) {
*pout-- = in[i];
}
}
}
// This function output remains the same sample rate as input,
// output channel number shoule be CHAN_NUM_SEND.
// TODO: add multi-channel support in iir resampler
void speech_process_capture_run(uint8_t *buf, uint32_t *len) {
// TRACE(2,"[%s], pcm_len: %d", __FUNCTION__, *len / 2);
if (is_speech_init == false)
return;
int16_t *pcm_buf = (int16_t *)buf;
int pcm_len = *len / 2;
int process_len = pcm_len * FFSE_SAMPLE_RATE / capture_sample_rate;
if (resample_needed_flag == true) {
if (capture_channel_num > 1)
speech_deinterleave(pcm_buf, capture_buffer_deinter, pcm_len,
capture_channel_num);
else
speech_copy_int16(capture_buffer_deinter, pcm_buf, pcm_len);
int in_len_per_channel = pcm_len / capture_channel_num;
int out_len_per_channel = process_len / capture_channel_num;
for (int i = 0; i < capture_channel_num; i++) {
iir_resample_process(
downsample_st[i], &capture_buffer_deinter[i * in_len_per_channel],
&process_buffer[i * out_len_per_channel], in_len_per_channel);
}
if (capture_channel_num > 1)
speech_interleave(process_buffer, process_buffer_inter, process_len,
capture_channel_num);
else
speech_copy_int16(process_buffer_inter, process_buffer, process_len);
speech_tx_process(process_buffer_inter, aec_echo_buf, &process_len);
iir_resample_process(upsample_st, process_buffer_inter, pcm_buf,
process_len);
if (send_channel_num > 1)
speech_extend(pcm_buf, pcm_buf, in_len_per_channel, send_channel_num);
} else {
speech_tx_process(pcm_buf, aec_echo_buf, &process_len);
if (send_channel_num > 1)
speech_extend(pcm_buf, pcm_buf, process_len, send_channel_num);
}
pcm_len = pcm_len / capture_channel_num * send_channel_num;
*len = pcm_len * sizeof(int16_t);
}
void speech_process_playback_run(uint8_t *buf, uint32_t *len) {
// TRACE(2,"[%s] pcm_len: %d", __FUNCTION__, *len / 2);
if (is_speech_init == false)
return;
#if defined(SPEECH_TX_AEC2FLOAT)
int16_t *pcm_buf = (int16_t *)buf;
int pcm_len = *len / 2;
if (resample_needed_flag == true) {
// Convert to 16bit if necessary
if (playback_sample_bit == 24) {
int32_t *pcm32 = (int32_t *)buf;
for (int i = 0; i < pcm_len / 2; i++) {
pcm_buf[i] = (pcm32[i] >> 8);
}
pcm_len >>= 1;
}
// Convert to mono if necessary, choose left channel
if (playback_channel_num == 2) {
for (int i = 0, j = 0; i < pcm_len; i += 2, j++)
pcm_buf[j] = pcm_buf[i];
pcm_len >>= 1;
}
iir_resample_process(rx_downsample_st, pcm_buf, pcm_buf, pcm_len);
}
speech_copy_int16(aec_echo_buf, pcm_buf,
pcm_len * FFSE_SAMPLE_RATE / capture_sample_rate);
#endif
}
void speech_process_init(int tx_sample_rate, int tx_channel_num,
int tx_sample_bit, int rx_sample_rate,
int rx_channel_num, int rx_sample_bit, int tx_frame_ms,
int rx_frame_ms, int tx_send_channel_num,
int rx_recv_channel_num) {
ASSERT(tx_sample_rate == 16000 || tx_sample_rate == 48000,
"[%s] sample rate(%d) not supported", __FUNCTION__, tx_sample_rate);
ASSERT(tx_frame_ms == 16, "[%s] just support 16ms frame", __func__);
capture_sample_rate = tx_sample_rate;
capture_channel_num = tx_channel_num;
capture_sample_bit = tx_sample_bit;
playback_sample_rate = rx_sample_rate;
playback_channel_num = rx_channel_num;
playback_sample_bit = rx_sample_bit;
send_channel_num = tx_send_channel_num;
recv_channel_num = rx_recv_channel_num;
resample_needed_flag = (capture_sample_rate != FFSE_SAMPLE_RATE);
TRACE(5,
"[%s] sample_rate: %d, frame_ms: %d, channel_num: %d, "
"resample_needed_flag: %d",
__FUNCTION__, tx_sample_rate, tx_frame_ms, tx_channel_num,
resample_needed_flag);
speech_init(FFSE_SAMPLE_RATE, FFSE_SAMPLE_RATE, tx_frame_ms, tx_frame_ms,
tx_frame_ms, &g_medMemPool[0], MED_MEM_POOL_SIZE);
if (resample_needed_flag == true) {
capture_buffer_deinter =
speech_calloc(SPEECH_FRAME_MS_TO_LEN(capture_sample_rate, tx_frame_ms) *
capture_channel_num,
sizeof(int16_t));
// Resample state must be created after speech init, as it uses speech heap
process_buffer =
speech_calloc(SPEECH_FRAME_MS_TO_LEN(FFSE_SAMPLE_RATE, tx_frame_ms) *
capture_channel_num,
sizeof(int16_t));
process_buffer_inter =
speech_calloc(SPEECH_FRAME_MS_TO_LEN(FFSE_SAMPLE_RATE, tx_frame_ms) *
capture_channel_num,
sizeof(int16_t));
upsample_st = iir_resample_init(
SPEECH_FRAME_MS_TO_LEN(FFSE_SAMPLE_RATE, tx_frame_ms),
iir_resample_choose_mode(FFSE_SAMPLE_RATE, capture_sample_rate));
// as iir resample can only deal with mono signal, we should init
// channel_num states
for (int i = 0; i < capture_channel_num; i++) {
downsample_st[i] = iir_resample_init(
SPEECH_FRAME_MS_TO_LEN(capture_sample_rate, tx_frame_ms),
iir_resample_choose_mode(capture_sample_rate, FFSE_SAMPLE_RATE));
}
//
aec_echo_buf = speech_calloc(
SPEECH_FRAME_MS_TO_LEN(FFSE_SAMPLE_RATE, rx_frame_ms), sizeof(int16_t));
rx_downsample_st = iir_resample_init(
SPEECH_FRAME_MS_TO_LEN(playback_sample_rate, rx_frame_ms),
iir_resample_choose_mode(playback_sample_rate, FFSE_SAMPLE_RATE));
}
is_speech_init = true;
}
void speech_process_deinit(void) {
if (is_speech_init == false)
return;
if (resample_needed_flag == true) {
speech_free(capture_buffer_deinter);
speech_free(process_buffer);
speech_free(process_buffer_inter);
iir_resample_destroy(upsample_st);
for (int i = 0; i < capture_channel_num; i++) {
iir_resample_destroy(downsample_st[i]);
}
speech_free(aec_echo_buf);
iir_resample_destroy(rx_downsample_st);
resample_needed_flag = false;
}
speech_deinit();
is_speech_init = false;
}
enum HAL_CMU_FREQ_T speech_process_need_freq(void) { return HAL_CMU_FREQ_208M; }