Skip to content

Commit e96fcf4

Browse files
author
LittleMouse
committed
[update] llm_asr suooported sensevoice, update llm_audio supported alsa cap & play, update static lib version
1 parent f12314e commit e96fcf4

File tree

13 files changed

+635
-207
lines changed

13 files changed

+635
-207
lines changed

projects/llm_framework/SConstruct

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import shutil
55
os.environ['SDK_PATH'] = os.path.normpath(str(Path(os.getcwd())/'..'/'..'/'SDK'))
66
os.environ['EXT_COMPONENTS_PATH'] = os.path.normpath(str(Path(os.getcwd())/'..'/'..'/'ext_components'))
77

8-
version = 'v0.1.2'
8+
version = 'v0.1.3'
99
static_lib = 'static_lib'
1010
update = False
1111

projects/llm_framework/main_asr/SConstruct

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ with open(env['PROJECT_TOOL_S']) as f:
77
SRCS = Glob('src/*.c*')
88
INCLUDE = [ADir('include'), ADir('.')]
99
PRIVATE_INCLUDE = []
10-
REQUIREMENTS = ['pthread', 'utilities', 'eventpp', 'StackFlow', 'single_header_libs']
10+
REQUIREMENTS = ['pthread', 'utilities', 'ax_msp', 'eventpp', 'StackFlow', 'single_header_libs']
1111
STATIC_LIB = []
1212
DYNAMIC_LIB = []
1313
DEFINITIONS = []
@@ -17,12 +17,22 @@ LINK_SEARCH_PATH = []
1717
STATIC_FILES = []
1818

1919
LDFLAGS+=['-Wl,-rpath=/opt/m5stack/lib', '-Wl,-rpath=/usr/local/m5stack/lib', '-Wl,-rpath=/usr/local/m5stack/lib/gcc-10.3', '-Wl,-rpath=/opt/lib', '-Wl,-rpath=/opt/usr/lib', '-Wl,-rpath=./']
20-
DEFINITIONS += ['-std=c++17']
20+
DEFINITIONS += ['-std=c++17', '-fopenmp']
2121
LINK_SEARCH_PATH += [ADir('../static_lib')]
22+
REQUIREMENTS += ['ax_engine', 'ax_interpreter', 'ax_sys']
2223

2324
INCLUDE += [ADir('../static_lib/include/sherpa'), ADir('../static_lib/include/sherpa/sherpa-ncnn')]
2425
LINK_SEARCH_PATH += [ADir('../static_lib/sherpa/ncnn')]
25-
REQUIREMENTS += ['ncnn', 'sherpa-ncnn-core']
26+
LINK_SEARCH_PATH += [ADir('../static_lib/sherpa/onnx')]
27+
REQUIREMENTS += ['ncnn', '', 'onnxruntime']
28+
LDFLAGS += [
29+
'-l:libsherpa-onnx-core.a', '-l:libkaldi-native-fbank-core.a','-l:libkissfft-float.a',
30+
'-l:libkaldi-decoder-core.a', '-l:libssentencepiece_core.a', '-l:libsherpa-onnx-fst.a',
31+
'-l:libsherpa-onnx-kaldifst-core.a', '-l:libsherpa-onnx-fstfar.a',
32+
33+
'-l:libsherpa-ncnn-core.a', '-l:libsherpa-ncnn-fst.a', '-l:libsherpa-ncnn-kaldifst-core.a',
34+
'-lgomp',
35+
]
2636

2737
STATIC_FILES += Glob('mode_*.json')
2838

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
{
2+
"mode": "sense-voice-small-10s-ax650",
3+
"type": "asr",
4+
"homepage": "https://huggingface.co/yunyu1258/qwen2.5-0.5b-ha",
5+
"compile_flage": "pulsar2 build --input model-10-seconds.onnx --config config_sensevoice_main_u16.json --output_dir sensevoice-axmodel --output_name model-10-seconds.axmodel --target_hardware AX650 --compiler.check 0",
6+
"pulsar_version": "5.0-patch1-fd447d0d",
7+
"capabilities": [
8+
"Chinese",
9+
"English",
10+
"Cantonese",
11+
"Japanese",
12+
"Korean"
13+
],
14+
"input_type": [
15+
"sys.pcm",
16+
"sys.cap.0_0"
17+
],
18+
"output_type": [
19+
"asr.utf-8"
20+
],
21+
"mode_param": {
22+
"model_config.sense_voice.model": "model.axmodel",
23+
"model_config.tokens": "tokens.txt",
24+
"silero_vad.model": "silero_vad.onnx",
25+
"model_config.provider": "axera",
26+
"silence_timeout": 2000,
27+
"awake_delay": 50
28+
},
29+
"mode_param_bak": {}
30+
}

projects/llm_framework/main_asr/src/main.cpp

Lines changed: 355 additions & 114 deletions
Large diffs are not rendered by default.

projects/llm_framework/main_audio/SConstruct

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,11 @@ with open(env['PROJECT_TOOL_S']) as f:
55
exec(f.read())
66

77
# SRCS = append_srcs_dir(ADir('src'))
8-
SRCS = Glob('src/*.c*')
8+
if 'CONFIG_AX_620E_MSP_ENABLED' in os.environ:
9+
SRCS = [AFile('src/sample_audio.c'), AFile('src/main.cpp')]
10+
else:
11+
SRCS = [AFile('src/alsa_audio.c'), AFile('src/main.cpp')]
12+
913
INCLUDE = [ADir('include'), ADir('.')]
1014
PRIVATE_INCLUDE = []
1115
REQUIREMENTS = ['pthread', 'utilities', 'ax_msp', 'eventpp', 'StackFlow', 'single_header_libs']

projects/llm_framework/main_audio/src/alsa_audio.c

Lines changed: 93 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,11 @@
66
#include <errno.h>
77
#include <string.h>
88

9-
static int gcapLoopExit = 0;
9+
static struct pcm *g_play_pcm = NULL;
10+
static int gplayLoopExit = 1;
11+
static int gcapLoopExit = 1;
12+
AlsaConfig cap_config;
13+
AlsaConfig play_config;
1014

1115
void alsa_cap_start(unsigned int card, unsigned int device, float Volume, int channel, int rate, int bit,
1216
AUDIOCallback callback)
@@ -23,7 +27,7 @@ void alsa_cap_start(unsigned int card, unsigned int device, float Volume, int ch
2327

2428
memset(&config, 0, sizeof(config));
2529
config.channels = channel;
26-
config.rate = 48000; // TODO: 部分USB MIC仅支持48k,暂时固定采集为48k
30+
config.rate = 48000;
2731
config.period_size = 120;
2832
config.period_count = 4;
2933
config.format = PCM_FORMAT_S16_LE;
@@ -57,14 +61,16 @@ void alsa_cap_start(unsigned int card, unsigned int device, float Volume, int ch
5761

5862
SRC_STATE *src_state = NULL;
5963
float *in_float = NULL, *out_float = NULL;
64+
6065
int in_frames = pcm_get_buffer_size(pcm);
6166
int out_frames = (int)((float)in_frames * ((float)rate / 48000.0f) + 1);
62-
int out_bytes = out_frames * channel * sizeof(short);
67+
68+
int src_channels = 1;
6369

6470
if (rate != 48000) {
65-
src_state = src_new(SRC_SINC_FASTEST, channel, NULL);
66-
in_float = malloc(in_frames * channel * sizeof(float));
67-
out_float = malloc(out_frames * channel * sizeof(float));
71+
src_state = src_new(SRC_SINC_FASTEST, src_channels, NULL);
72+
in_float = malloc(in_frames * src_channels * sizeof(float));
73+
out_float = malloc(out_frames * src_channels * sizeof(float));
6874
if (!src_state || !in_float || !out_float) {
6975
fprintf(stderr, "Unable to allocate resample buffers\n");
7076
free(buffer);
@@ -82,37 +88,62 @@ void alsa_cap_start(unsigned int card, unsigned int device, float Volume, int ch
8288
fprintf(stderr, "Error capturing samples - %d (%s)\n", errno, strerror(errno));
8389
break;
8490
}
91+
8592
frames_read = ret;
8693
total_frames_read += frames_read;
8794

8895
if (rate == 48000) {
89-
callback(buffer, frames_read * bytes_per_frame);
96+
int in_channels = channel; // 比如 4
97+
int16_t *in = (int16_t *)buffer;
98+
99+
int16_t *ch0 = malloc(frames_read * sizeof(int16_t));
100+
if (!ch0) {
101+
fprintf(stderr, "Unable to allocate ch0 buffer\n");
102+
break;
103+
}
104+
105+
for (unsigned int i = 0; i < frames_read; ++i) {
106+
ch0[i] = in[i * in_channels + 0];
107+
}
108+
109+
callback((const char *)ch0, frames_read * sizeof(int16_t));
110+
free(ch0);
111+
90112
} else {
113+
int in_channels = channel; // 比如 4
91114
short *in_short = (short *)buffer;
92-
for (int i = 0; i < frames_read * channel; ++i) {
93-
in_float[i] = in_short[i] / 32768.0f;
115+
116+
for (int i = 0; i < frames_read; ++i) {
117+
short s = in_short[i * in_channels + 0];
118+
in_float[i] = s / 32768.0f;
94119
}
120+
95121
SRC_DATA src_data;
96122
src_data.data_in = in_float;
97123
src_data.input_frames = frames_read;
98124
src_data.data_out = out_float;
99125
src_data.output_frames = out_frames;
100126
src_data.src_ratio = (double)rate / 48000.0;
101127
src_data.end_of_input = 0;
102-
int error = src_process(src_state, &src_data);
128+
129+
int error = src_process(src_state, &src_data);
103130
if (error) {
104131
fprintf(stderr, "SRC error: %s\n", src_strerror(error));
105132
break;
106133
}
107-
// float转short
108-
short *out_short = malloc(src_data.output_frames_gen * channel * sizeof(short));
109-
for (int i = 0; i < src_data.output_frames_gen * channel; ++i) {
134+
int out_samples = src_data.output_frames_gen;
135+
short *out_short = malloc(out_samples * sizeof(short));
136+
if (!out_short) {
137+
fprintf(stderr, "Unable to allocate out_short buffer\n");
138+
break;
139+
}
140+
for (int i = 0; i < out_samples; ++i) {
110141
float sample = out_float[i];
111142
if (sample > 1.0f) sample = 1.0f;
112143
if (sample < -1.0f) sample = -1.0f;
113144
out_short[i] = (short)(sample * 32767.0f);
114145
}
115-
callback((const char *)out_short, src_data.output_frames_gen * channel * sizeof(short));
146+
callback((const char *)out_short, out_samples * sizeof(short));
116147
free(out_short);
117148
}
118149
}
@@ -124,7 +155,6 @@ void alsa_cap_start(unsigned int card, unsigned int device, float Volume, int ch
124155
}
125156
free(buffer);
126157
pcm_close(pcm);
127-
printf("Total frames captured: %u\n", total_frames_read);
128158
}
129159

130160
void alsa_close_cap()
@@ -135,4 +165,52 @@ void alsa_close_cap()
135165
int alsa_cap_status()
136166
{
137167
return gcapLoopExit;
168+
}
169+
170+
void alsa_play(unsigned int card, unsigned int device, float Volume, int channel, int rate, int bit, const void *data,
171+
int size)
172+
{
173+
gplayLoopExit = 0;
174+
175+
struct pcm_config config;
176+
memset(&config, 0, sizeof(config));
177+
config.channels = channel;
178+
config.rate = rate;
179+
config.period_size = 1024;
180+
config.period_count = 2;
181+
config.format = PCM_FORMAT_S16_LE;
182+
config.silence_threshold = config.period_size * config.period_count;
183+
config.stop_threshold = config.period_size * config.period_count;
184+
config.start_threshold = config.period_size;
185+
186+
unsigned int pcm_open_flags = PCM_OUT;
187+
188+
struct pcm *pcm = pcm_open(card, device, pcm_open_flags, &config);
189+
if (!pcm || !pcm_is_ready(pcm)) {
190+
fprintf(stderr, "Unable to open PCM playback device (%s)\n", pcm ? pcm_get_error(pcm) : "invalid pcm");
191+
if (pcm) {
192+
pcm_close(pcm);
193+
}
194+
gplayLoopExit = 2;
195+
return;
196+
}
197+
198+
int frames = pcm_bytes_to_frames(pcm, size);
199+
int written_frames = pcm_writei(pcm, data, frames);
200+
if (written_frames < 0) {
201+
fprintf(stderr, "PCM playback error %s\n", pcm_get_error(pcm));
202+
}
203+
printf("Played %d frames\n", written_frames);
204+
pcm_close(pcm);
205+
gplayLoopExit = 2;
206+
}
207+
208+
void alsa_close_play()
209+
{
210+
gplayLoopExit = 1;
211+
}
212+
213+
int alsa_play_status()
214+
{
215+
return gplayLoopExit;
138216
}

projects/llm_framework/main_audio/src/alsa_audio.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,18 @@
11
#pragma once
22

3+
typedef struct AlsaConfig
4+
{
5+
unsigned int card;
6+
unsigned int device;
7+
float volume;
8+
int channel;
9+
int rate;
10+
int bit;
11+
} AlsaConfig;
12+
13+
extern AlsaConfig cap_config;
14+
extern AlsaConfig play_config;
15+
316
#ifdef __cplusplus
417
extern "C" {
518
#endif
@@ -10,6 +23,13 @@ void alsa_cap_start(unsigned int card, unsigned int device, float Volume, int ch
1023
AUDIOCallback callback);
1124
void alsa_close_cap();
1225

26+
void alsa_play(unsigned int card, unsigned int device, float Volume, int channel, int rate, int bit, const void *data,
27+
int size);
28+
29+
void alsa_close_play();
30+
int alsa_cap_status();
31+
int alsa_play_status();
32+
1333
#ifdef __cplusplus
1434
}
1535
#endif

0 commit comments

Comments
 (0)