ESP  0.1
The Example-based Sensor Predictions (ESP) system tries to bring machine learning to the maker community.
MFCC.h
Go to the documentation of this file.
1 #ifndef ESP_MFCC_H_
2 #define ESP_MFCC_H_
3 
4 #include "GRT/CoreModules/FeatureExtraction.h"
5 
6 #include <math.h>
7 #include <stdint.h>
8 #include <vector>
9 
10 namespace GRT {
11 
12 using std::vector;
13 
14 // TriFilterBanks contains the matrix that would perform the filter operation.
15 // Specifically, the multiplication will take the following form:
16 //
17 // [ filter bank 1 ] |----|
18 // [ filter bank 2 ]
19 // [ ........... ] fft
20 // [ ........... ]
21 // [ filter bank N ] |____|
23  public:
26 
27  void initialize(uint32_t num_filter, uint32_t filter_size);
28  void setFilter(uint32_t idx, double left, double middle, double right,
29  uint32_t fs);
30 
31  static inline double toMelScale(double freq) {
32  return 1127.0f * log(1.0f + freq / 700.0f);
33  }
34 
35  static inline double fromMelScale(double mel_freq) {
36  return 700.0f * (exp(mel_freq / 1127.0f) - 1.0f);
37  }
38 
39  inline uint32_t getNumFilters() const {
40  return num_filter_;
41  }
42 
43  void filter(const vector<double>& input, vector<double>& output);
44 
45  private:
46  bool initialized_;
47  double* filter_;
48  uint32_t num_filter_;
49  uint32_t filter_size_;
50 };
51 
52 /* @brief MFCC class implements a variant of the Mel Frequency Cepstral
53  * Coefficient algorithm. Typically MFCC would include pre-emphasis and FFT in
54  * its own; in GRT these two steps can be achieved with a filter pre-processing
55  * module and an FFT feature extraction module. Therefore, this MFCC
56  * implementation assumes the input data is FFT (only one side, magnitude only
57  * data). A typical parameter settings with GRT::FFT is the following:
58  *
59  * GRT::FFT fft(512, 128, 1, GRT::FFT::HAMMING_WINDOW, true, false)`
60  *
61  * To use this class, create an MFCC::Options struct and fill in the desired
62  * parameter. Below is an example that works for 16k audio and using the FFT
63  * parameters above.
64  *
65  * GRT::MFCC::Options options;
66  * options.sample_rate = 16000;
67  * options.fft_size = 512 / 2;
68  * options.start_freq = 300;
69  * options.end_freq = 8000;
70  * options.num_tri_filter = 26;
71  * options.num_cepstral_coeff = 12;
72  * options.lifter_param = 22;
73  * options.use_vad = true;
74  * GRT::MFCC mfcc(options);
75  *
76  * For more information about MFCC, please refer to the HTK Book [1]. This
77  * implementation closely follows that's presented in the book and cross verfied
78  * by the Matlab implementation.
79  *
80  * Note: This class has been optimized to use BLAS for matrix/vector
81  * multiplication.
82  *
83  * [1] Young, S., Evermann, G., Gales, M., Hain, T., Kershaw, D., Liu, X.,
84  * Moore, G., Odell, J., Ollason, D., Povey, D., Valtchev, V., Woodland, P.,
85  * 2006. The HTK Book (for HTK Version 3.4.1). Engineering Department,
86  * Cambridge University. (see also: http://htk.eng.cam.ac.uk)
87 */
88 
89 class MFCC : public FeatureExtraction {
90  public:
91  struct Options {
92  uint32_t sample_rate; // The sampling frequency (Hz)
93  uint32_t fft_size; // The window size of FFT
94  double start_freq; // Higher frequency (Hz)
95  double end_freq; // Upper frequency (Hz)
96  uint32_t num_tri_filter; // Number of filter banks
97  uint32_t num_cepstral_coeff; // Number of coefficient produced
98  uint32_t lifter_param; // Sinusoidal Lifter parameter
99  bool use_vad; // Voice Activity Detector
100  double noise_level; // Simple threshold for VAD
102  : sample_rate(0), fft_size(0), start_freq(-1), end_freq(-1),
103  num_tri_filter(0), num_cepstral_coeff(0), lifter_param(0),
104  use_vad(false), noise_level(0) {
105  }
106 
107  bool operator==(const Options& rhs) {
108  return this->sample_rate == rhs.sample_rate &&
109  this->fft_size == rhs.fft_size &&
110  this->start_freq == rhs.start_freq &&
111  this->end_freq == rhs.end_freq &&
112  this->num_tri_filter == rhs.num_tri_filter &&
113  this->num_cepstral_coeff == rhs.num_cepstral_coeff &&
114  this->lifter_param == rhs.lifter_param &&
115  this->use_vad == rhs.use_vad &&
116  this->noise_level == rhs.noise_level;
117  }
118  };
119 
120  MFCC(struct Options options = Options());
121 
122  MFCC(const MFCC& rhs);
123  MFCC& operator=(const MFCC& rhs);
124  bool deepCopyFrom(const FeatureExtraction* featureExtraction) override;
125  ~MFCC() override {
126  delete[] dct_matrix_;
127  }
128 
129  void initialize();
130 
131  bool computeFeatures(const VectorDouble& inputVector) override;
132  bool reset() override;
133 
134  // Configurable Parameters
135  bool setNoiseLevel(double noise_level) {
136  options_.noise_level = noise_level;
137  return true;
138  }
139 
140  // Save and Load from file
141  bool saveModelToFile(string filename) const override;
142  bool loadModelFromFile(string filename) override;
143  bool saveModelToFile(fstream &file) const override;
144  bool loadModelFromFile(fstream &file) override;
145 
146  struct Options getOptions() const {
147  return options_;
148  }
150  return filters_;
151  }
152 
153  public:
154  void computeLFBE(const vector<double>& fft, vector<double>& lfbe);
155  void computeCC(const vector<double>& lfbe, vector<double>& cc);
156  vector<double> getCC(const vector<double>& lfbe);
157  vector<double> lifterCC(const vector<double>& cc);
158 
159  protected:
162 
163  // The information below can be generated with options_. We fill them during
164  // the initialize() function.
165  double* dct_matrix_;
167 
168  vector<double> tmp_lfbe_;
169  vector<double> tmp_cc_;
170 
171  static RegisterFeatureExtractionModule<MFCC> registerModule;
172 };
173 
174 } // namespace GRT
175 
176 #endif // ESP_MFCC_H_
Definition: MFCC.h:22
TriFilterBanks filters_
Definition: MFCC.h:166
vector< double > tmp_cc_
Definition: MFCC.h:169
~MFCC() override
Definition: MFCC.h:125
Options()
Definition: MFCC.h:101
uint32_t fft_size
Definition: MFCC.h:93
bool use_vad
Definition: MFCC.h:99
~TriFilterBanks()
Definition: MFCC.cpp:53
Definition: Filter.cpp:23
vector< double > tmp_lfbe_
Definition: MFCC.h:168
double noise_level
Definition: user_speaker.cpp:20
Definition: MFCC.h:89
Definition: MFCC.h:91
void setFilter(uint32_t idx, double left, double middle, double right, uint32_t fs)
Definition: MFCC.cpp:31
TriFilterBanks getFilters() const
Definition: MFCC.h:149
TriFilterBanks()
Definition: MFCC.cpp:21
double start_freq
Definition: MFCC.h:94
Options options_
Definition: MFCC.h:161
uint32_t num_tri_filter
Definition: MFCC.h:96
uint32_t lifter_param
Definition: MFCC.h:98
bool setNoiseLevel(double noise_level)
Definition: MFCC.h:135
bool operator==(const Options &rhs)
Definition: MFCC.h:107
void filter(const vector< double > &input, vector< double > &output)
Definition: MFCC.cpp:59
double noise_level
Definition: MFCC.h:100
uint32_t getNumFilters() const
Definition: MFCC.h:39
static double toMelScale(double freq)
Definition: MFCC.h:31
void initialize(uint32_t num_filter, uint32_t filter_size)
Definition: MFCC.cpp:24
static double fromMelScale(double mel_freq)
Definition: MFCC.h:35
uint32_t sample_rate
Definition: MFCC.h:92
static RegisterFeatureExtractionModule< MFCC > registerModule
Definition: MFCC.h:171
uint32_t num_cepstral_coeff
Definition: MFCC.h:97
bool initialized_
Definition: MFCC.h:160
double end_freq
Definition: MFCC.h:95
double * dct_matrix_
Definition: MFCC.h:165