Speaker identification with MFCC and GMM.
constexpr uint32_t
DIM = 1;
DIM, FFT::HAMMING_WINDOW,
true,
false));
MFCC::Options options;
options.start_freq = 300;
options.end_freq = 3700;
options.num_tri_filter = 26;
options.num_cepstral_coeff = 12;
options.lifter_param = 22;
options.use_vad = true;
pipeline.addFeatureExtractionModule(MFCC(options));
ClassLabelFilter(num_predictions *
post_ratio, num_predictions));
auto ratio_updater = [](double new_ratio) {
ClassLabelFilter* filter =
dynamic_cast<ClassLabelFilter*
>(
pipeline.getPostProcessingModule(0));
filter->setMinimumCount(new_ratio * num_predictions);
};
auto duration_updater = [](int new_duration) {
ClassLabelFilter* filter =
dynamic_cast<ClassLabelFilter*
>(
pipeline.getPostProcessingModule(0));
filter->setBufferSize(num_predictions);
};
auto noise_updater = [](int new_noise_level) {
MFCC *mfcc =
dynamic_cast<MFCC*
>(
pipeline.getFeatureExtractionModule(1));
mfcc->setNoiseLevel(new_noise_level);
};
"Noise Level",
"The threshold for the system to distinguish between "
"ambient noise and speech/sound",
noise_updater);
"Duration",
"Time (in ms) that is considered as a whole "
"for smoothing the prediction",
duration_updater);
"Ratio",
"The portion of time in duration that "
"should be from the same class",
ratio_updater);
}