静音检测算法:
下面的静音检测算法,对噪音有很好的过滤作用,如果是噪音,能够准确判断,即使全部是噪音也可以很好判断是静音。
如果整个文件都是静音,就返回1,否则返回0,出错返回-1。
#include <stdio.h>
#include <math.h>
#include "pesq.h"
#include "dsp.h"
int main (int argc, const char *argv []);
void usage (void);
int pesq_measure(SIGNAL_INFO * ref_info, ERROR_INFO * err_info, long * Error_Flag, char ** Error_Type);
void usage (void) {
printf ("Silence detector, (C) Kingron, 2011:\n");
printf ("Return 1 for noise/silence file, otherwise return 0, -1 for error.\n");
printf ("Only support PCM, mono, 16K sample rate, 16 bit wav file.\n");
printf ("Usage:\n");
printf (" Siledet [WavFile]\n");
}
int main (int argc, const char *argv []) {
SIGNAL_INFO ref_info;
ERROR_INFO err_info;
long Error_Flag = 0;
int ret = -1;
char * Error_Type = "Unknown error type.";
if (argc <= 1) {
usage();
return -1;
}
if (!file_exist(argv[1])) {
printf("File not found: %s\n", argv[1]);
return -1;
}
ref_info.apply_swap = 0;
strcpy(ref_info.path_name, argv[1]);
err_info.subj_mos = 0;
err_info.cond_nr = 0;
strcpy (ref_info.file_name, ref_info.path_name);
if (strrchr (ref_info.file_name, '\\') != NULL) {
strcpy (ref_info.file_name, 1 + strrchr (ref_info.file_name, '\\'));
}
select_rate(16000L, &Error_Flag, &Error_Type);
ret = pesq_measure(&ref_info, &err_info, &Error_Flag, &Error_Type);
if (Error_Flag == 0) {
if (ret == 1) printf("%s is a silence file\n", argv[1]);
return ret;
} else {
printf ("An error of type %d ", Error_Flag);
if (Error_Type != NULL) {
printf (" (%s) occurred during processing.\n", Error_Type);
} else {
printf ("occurred during processing.\n");
}
return -1;
}
}
double align_filter_dB [26] [2] = {{0.,-500},
{50., -500},
{100., -500},
{125., -500},
{160., -500},
{200., -500},
{250., -500},
{300., -500},
{350., 0},
{400., 0},
{500., 0},
{600., 0},
{630., 0},
{800., 0},
{1000., 0},
{1250., 0},
{1600., 0},
{2000., 0},
{2500., 0},
{3000., 0},
{3250., 0},
{3500., -500},
{4000., -500},
{5000., -500},
{6300., -500},
{8000., -500}};
double standard_IRS_filter_dB [26] [2] = {{ 0., -200},
{ 50., -40},
{100., -20},
{125., -12},
{160., -6},
{200., 0},
{250., 4},
{300., 6},
{350., 8},
{400., 10},
{500., 11},
{600., 12},
{700., 12},
{800., 12},
{1000., 12},
{1300., 12},
{1600., 12},
{2000., 12},
{2500., 12},
{3000., 12},
{3250., 12},
{3500., 4},
{4000., -200},
{5000., -200},
{6300., -200},
{8000., -200}};
#define TARGET_AVG_POWER 1E7
void fix_power_level (SIGNAL_INFO *info, char *name, long maxNsamples)
{
long n = info-> Nsamples;
long i;
float *align_filtered = (float *) safe_malloc ((n + DATAPADDING_MSECS * (Fs / 1000)) * sizeof (float));
float global_scale;
float power_above_300Hz;
for (i = 0; i < n + DATAPADDING_MSECS * (Fs / 1000); i++) {
align_filtered [i] = info-> data [i];
}
apply_filter (align_filtered, info-> Nsamples, 26, align_filter_dB);
power_above_300Hz = (float) pow_of (align_filtered,
SEARCHBUFFER * Downsample,
n - SEARCHBUFFER * Downsample + DATAPADDING_MSECS * (Fs / 1000),
maxNsamples - 2 * SEARCHBUFFER * Downsample + DATAPADDING_MSECS * (Fs / 1000));
global_scale = (float) sqrt (TARGET_AVG_POWER / power_above_300Hz);
for (i = 0; i < n; i++) {
info-> data [i] *= global_scale;
}
safe_free (align_filtered);
}
int pesq_measure(SIGNAL_INFO * ref_info, ERROR_INFO * err_info, long * Error_Flag, char ** Error_Type)
{
int maxNsamples = ref_info-> Nsamples;
float * model_ref;
float * model_deg;
long i;
int ret = -1;
ref_info->data = NULL;
ref_info->VAD = NULL;
ref_info->logVAD = NULL;
load_src(Error_Flag, Error_Type, ref_info);
if ((*Error_Flag) != 0)
return ret;
// fix_power_level(ref_info, "reference", maxNsamples);
apply_filter(ref_info->data, ref_info->Nsamples, 26, standard_IRS_filter_dB);
// model_ref = (float *)safe_malloc ((ref_info->Nsamples + DATAPADDING_MSECS * (Fs / 1000)) * sizeof (float));
// for (i = 0; i < ref_info-> Nsamples + DATAPADDING_MSECS * (Fs / 1000); i++) {
// model_ref[i] = ref_info->data[i];
// }
// DC_block((*ref_info).data, (*ref_info).Nsamples);
// apply_filters((*ref_info).data, (*ref_info).Nsamples);
calc_VAD(ref_info);
ret = (ref_info->VAD[1] != 0) ? 1 : 0;
safe_free (model_ref);
safe_free (ref_info-> data);
safe_free (ref_info-> VAD);
safe_free (ref_info-> logVAD);
return ret;
}
/* END OF FILE */
如果要计算比例,则把后面的改改即可:
calc_VAD(ref_info);
// 在计算VAD之后,计算VAD中比例计数
Nwin = ref_info->Nsamples / Downsample;
for (i = 0; i < Nwin; i++) {
if (ref_info->VAD[i] > 2) ret++; // 2 表示对一些底噪进行过滤,因为有的简单的底噪其VAD算出来之后有1.x的值。
}
return (100 * ret / Nwin); // 返回静音比例