22 #include "SoxWindows.H" 30 #define DEFAULT_FBANK_CNT 50 35 cerr<<
"Usage: "<<str<<
" -h or --help"<<endl;
36 cerr<<
"Usage: "<<str<<
" fileName.wav windowSize"<<endl;
37 cerr<<
"\t the fileName can be any readable audio file format."<<endl;
38 cerr<<
"\t the windowSize specifies how many samples to use as a window."<<endl;
39 cerr<<
"\n Outputs are in three files (where applicable, interleaved row per channel) :"<<endl;
40 cerr<<
"\t The audio windows are in the file fileName.wav.audio.dat"<<endl;
41 cerr<<
"\t The masking threshold is in the file fileName.wav.mask.dat"<<endl;
42 cerr<<
"\t The frquency indexes for the masking thresholds are in the file fileName.wav.f.dat"<<endl;
43 cerr<<
"\n\t In the audio and mask, the channels are interleaved as rows, M rows per window, where M is the channel count."<<endl;
44 cerr<<
"\n Author : Matt Flax <flatmax@flatmax.org>"<<endl;
48 int main(
int argc,
char *argv[]){
55 if (op.
getArg<
string>(
"h", argc, argv, help, i=0)!=0)
57 if (op.
getArg<
string>(
"help", argc, argv, help, i=0)!=0)
62 cout<<
"using windowSize = "<<windowSize<<
" s"<<endl;
64 string fileName(argv[argc-2]);
65 cout<<
"input file = "<<fileName<<endl;
74 Matrix<FP_TYPE, Dynamic, Dynamic> audioData;
75 ret=sox.
read(audioData, windowSize);
76 if (audioData.rows()!=windowSize){
77 cerr<<
"couldn't read audio, wanted "<<windowSize<<
" samples got "<<audioData.rows()<<endl;
80 if (audioData.cols()!=chCnt){
81 cerr<<
"couldn't read audio, wanted "<<windowSize<<
" channels got "<<audioData.cols()<<endl;
89 cout<<
"output file = "<<(fileName+
'.'+argv[argc-1]+
".mask.dat")<<endl;
90 ofstream maskOut((fileName+
'.'+argv[argc-1]+
".mask.dat").c_str());
92 cerr<<
"Couldn't open the output file "<<(fileName+
'.'+argv[argc-1]+
".mask.dat")<<endl;
97 ofstream audioOut((fileName+
'.'+argv[argc-1]+
".audio.dat").c_str());
99 cerr<<
"Couldn't open the output file "<<(fileName+
'.'+argv[argc-1]+
".audio.dat")<<endl;
102 audioOut<<scientific;
104 ofstream fOut((fileName+
'.'+argv[argc-1]+
".f.dat").c_str());
106 cerr<<
"Couldn't open the output file "<<(fileName+
'.'+argv[argc-1]+
".f.dat")<<endl;
111 int halfSampleCount=audioData.rows()/2+1;
112 double fact=fs/((double)windowSize-1.0);
114 fOut<<masker.
pfb->
cf[i]<<
'\t';
123 for (
int i=0; i<chCnt; i++){
125 int ret=masker.
excite(audioData.block(0, i, audioData.rows(), 1));
131 maskOut<<masker.
mask[j]<<
'\t';
133 for (
int j=0; j<windowSize; j++)
134 audioOut<<audioData(j,i)<<
'\t';
139 ret=sox.
read(audioData, windowSize);
142 if (audioData.rows()!=windowSize){
143 cout<<
"Couldn't read audio, wanted "<<windowSize<<
" samples got "<<audioData.rows()<<endl;
144 cout<<
"This must be the end of the file"<<endl;
153 cout<<
"Please run the following .m file to see the output "<<endl;
154 cout<<
"function view"<<endl;
155 cout<<
"fs="<<fs<<
";"<<endl;
156 cout<<
"load /tmp/11.Neutral.44k.wav."<<windowSize<<
".audio.dat"<<endl;
157 cout<<
"load /tmp/11.Neutral.44k.wav."<<windowSize<<
".f.dat"<<endl;
158 cout<<
"load /tmp/11.Neutral.44k.wav."<<windowSize<<
".mask.dat"<<endl;
159 cout<<
"[M,Nm]=size(X11_Neutral_44k_wav_"<<windowSize<<
"_mask);"<<endl;
160 cout<<
"[M,N]=size(X11_Neutral_44k_wav_"<<windowSize<<
"_audio);"<<endl;
161 cout<<
"f=linspace(0,fs,N);"<<endl;
162 cout<<
"for i=1:M"<<endl;
163 cout<<
" loglog(f, abs(fft(X11_Neutral_44k_wav_"<<windowSize<<
"_audio(i,:)))); hold on"<<endl;
164 cout<<
" loglog(X11_Neutral_44k_wav_"<<windowSize<<
"_f, X11_Neutral_44k_wav_"<<windowSize<<
"_mask(i,:), 'r'); hold off"<<endl;
165 cout<<
" legend('audio','mask'); xlabel('f (Hz)'); ylabel('amplitude');"<<endl;
166 cout<<
" pause"<<endl;
#define SOX_READ_MAXSCALE_ERROR
Sox couldn't open the filename.max to read the rescale value for the audio file.
int getArg(string key, int argc, char *argv[], TYPE &ret, int i)
int main(int argc, char *argv[])
int openRead(string fileName)
double * cf
The filter centre frequencies.
virtual int evaluateError(int errorNum)
#define NO_ERROR
There is no error.
void convertArg(const char *arg, TYPE &ret)
double * mask
The audio mask.
void setMaxVal(double newMax)
DepUKFB * pfb
roex filters
void excite(short int *Input, int sCount)
#define DEFAULT_FBANK_CNT
The default number of auditory filters.
void printUsage(const char *str)
int read(Eigen::DenseBase< Derived > &audioData, int count=0)