gtkIOStream  1.7.0
GTK+ << C++ IOStream operators for GTK+. Now with ORBing, numerical computation, audio client and more ...
audioMasker.C
Go to the documentation of this file.
1 /* Copyright 2000-2018 Matt Flax <flatmax@flatmax.org>
2  This file is part of GTK+ IOStream class set
3 
4  GTK+ IOStream is free software; you can redistribute it and/or modify
5  it under the terms of the GNU General Public License as published by
6  the Free Software Foundation; either version 2 of the License, or
7  (at your option) any later version.
8 
9  GTK+ IOStream is distributed in the hope that it will be useful,
10  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  GNU General Public License for more details.
13 
14  You have received a copy of the GNU General Public License
15  along with GTK+ IOStream
16  */
17 #ifndef _MSC_VER
18 #include "Sox.H"
19 #else
20 // Note : Microsoft doesn't understand the different between upper and lower case in file names.
21 // on microsoft, you have to manually rename Sox.H to SoxWindows.H
22 #include "SoxWindows.H"
23 #endif
24 
25 typedef float FP_TYPE;
26 
27 #include "OptionParser.H"
28 #include "AudioMask/AudioMasker.H"
29 
30 #define DEFAULT_FBANK_CNT 50
31 
32 #include <fstream>
33 
34 void printUsage(const char *str){
35  cerr<<"Usage: "<<str<<" -h or --help"<<endl;
36  cerr<<"Usage: "<<str<<" fileName.wav windowSize"<<endl;
37  cerr<<"\t the fileName can be any readable audio file format."<<endl;
38  cerr<<"\t the windowSize specifies how many samples to use as a window."<<endl;
39  cerr<<"\n Outputs are in three files (where applicable, interleaved row per channel) :"<<endl;
40  cerr<<"\t The audio windows are in the file fileName.wav.audio.dat"<<endl;
41  cerr<<"\t The masking threshold is in the file fileName.wav.mask.dat"<<endl;
42  cerr<<"\t The frquency indexes for the masking thresholds are in the file fileName.wav.f.dat"<<endl;
43  cerr<<"\n\t In the audio and mask, the channels are interleaved as rows, M rows per window, where M is the channel count."<<endl;
44  cerr<<"\n Author : Matt Flax <flatmax@flatmax.org>"<<endl;
45  exit(0);
46 }
47 
48 int main(int argc, char *argv[]){
49  if (argc<3)
50  printUsage(argv[0]);
51 
52  OptionParser op;
53  int i=0;
54  string help;
55  if (op.getArg<string>("h", argc, argv, help, i=0)!=0)
56  printUsage(argv[0]);
57  if (op.getArg<string>("help", argc, argv, help, i=0)!=0)
58  printUsage(argv[0]);
59 
60  FP_TYPE windowSize;
61  op.convertArg<FP_TYPE>(argv[argc-1], windowSize);
62  cout<<"using windowSize = "<<windowSize<<" s"<<endl;
63 
64  string fileName(argv[argc-2]);
65  cout<<"input file = "<<fileName<<endl;
66  Sox<FP_TYPE> sox;
67  int ret;
68  if ((ret=sox.openRead(fileName))<0 && ret!=SOX_READ_MAXSCALE_ERROR)
69  return SoxDebug().evaluateError(ret, argv[argc-2]);
70  sox.setMaxVal(1.0);
71 
72  int chCnt=sox.getChCntIn(); // the channel count
73 
74  Matrix<FP_TYPE, Dynamic, Dynamic> audioData;
75  ret=sox.read(audioData, windowSize);
76  if (audioData.rows()!=windowSize){
77  cerr<<"couldn't read audio, wanted "<<windowSize<<" samples got "<<audioData.rows()<<endl;
78  return -1;
79  }
80  if (audioData.cols()!=chCnt){
81  cerr<<"couldn't read audio, wanted "<<windowSize<<" channels got "<<audioData.cols()<<endl;
82  return -1;
83  }
84 
85  double fs=sox.getFSIn();
86  AudioMasker masker(fs, DEFAULT_FBANK_CNT);
87 
88  string fileNameOut;
89  cout<<"output file = "<<(fileName+'.'+argv[argc-1]+".mask.dat")<<endl;
90  ofstream maskOut((fileName+'.'+argv[argc-1]+".mask.dat").c_str());
91  if (!maskOut){
92  cerr<<"Couldn't open the output file "<<(fileName+'.'+argv[argc-1]+".mask.dat")<<endl;
93  exit(-1);
94  }
95  maskOut<<scientific; // put output into sci mode
96 
97  ofstream audioOut((fileName+'.'+argv[argc-1]+".audio.dat").c_str());
98  if (!audioOut){
99  cerr<<"Couldn't open the output file "<<(fileName+'.'+argv[argc-1]+".audio.dat")<<endl;
100  exit(-1);
101  }
102  audioOut<<scientific; // put output into sci mode
103 
104  ofstream fOut((fileName+'.'+argv[argc-1]+".f.dat").c_str());
105  if (!audioOut){
106  cerr<<"Couldn't open the output file "<<(fileName+'.'+argv[argc-1]+".f.dat")<<endl;
107  exit(-1);
108  }
109  fOut<<scientific; // put output into sci mode
110  // output the frequencies as the first row
111  int halfSampleCount=audioData.rows()/2+1;
112  double fact=fs/((double)windowSize-1.0); // convert from an index to the equivalent Fourier bin frequency
113  for (int i=0; i<DEFAULT_FBANK_CNT; i++)
114  fOut<<masker.pfb->cf[i]<<'\t';
115 // for (int i=0; i<halfSampleCount; i++)
116 // maskOut<<i*fact<<'\t';
117  fOut<<endl;
118  fOut.close();
119 
120  ret=NO_ERROR;
121  while (ret==NO_ERROR){
122  // process each channel of the audio data
123  for (int i=0; i<chCnt; i++){
124 // masker.excite<FP_TYPE>(audioData.block(0, i, audioData.rows(), 1).data(), windowSize);
125  int ret=masker.excite(audioData.block(0, i, audioData.rows(), 1));
126  if (ret!=NO_ERROR)
128 // for (int j=0; j<halfSampleCount; j++)
129 // maskOut<<masker.findThreshold(j*fact)<<'\t';
130  for (int j=0; j<DEFAULT_FBANK_CNT; j++)
131  maskOut<<masker.mask[j]<<'\t';
132  maskOut<<endl;
133  for (int j=0; j<windowSize; j++)
134  audioOut<<audioData(j,i)<<'\t';
135  audioOut<<endl;
136  }
137 
138  // read more audio data
139  ret=sox.read(audioData, windowSize);
140  if (ret!=NO_ERROR)
141  break;
142  if (audioData.rows()!=windowSize){
143  cout<<"Couldn't read audio, wanted "<<windowSize<<" samples got "<<audioData.rows()<<endl;
144  cout<<"This must be the end of the file"<<endl;
145  break;
146  }
147  }
148 
149  audioOut.close();
150  maskOut.close();
151  sox.closeRead();
152 
153  cout<<"Please run the following .m file to see the output "<<endl;
154  cout<<"function view"<<endl;
155  cout<<"fs="<<fs<<";"<<endl;
156  cout<<"load /tmp/11.Neutral.44k.wav."<<windowSize<<".audio.dat"<<endl;
157  cout<<"load /tmp/11.Neutral.44k.wav."<<windowSize<<".f.dat"<<endl;
158  cout<<"load /tmp/11.Neutral.44k.wav."<<windowSize<<".mask.dat"<<endl;
159  cout<<"[M,Nm]=size(X11_Neutral_44k_wav_"<<windowSize<<"_mask);"<<endl;
160  cout<<"[M,N]=size(X11_Neutral_44k_wav_"<<windowSize<<"_audio);"<<endl;
161  cout<<"f=linspace(0,fs,N);"<<endl;
162  cout<<"for i=1:M"<<endl;
163  cout<<" loglog(f, abs(fft(X11_Neutral_44k_wav_"<<windowSize<<"_audio(i,:)))); hold on"<<endl;
164  cout<<" loglog(X11_Neutral_44k_wav_"<<windowSize<<"_f, X11_Neutral_44k_wav_"<<windowSize<<"_mask(i,:), 'r'); hold off"<<endl;
165  cout<<" legend('audio','mask'); xlabel('f (Hz)'); ylabel('amplitude');"<<endl;
166  cout<<" pause"<<endl;
167  cout<<"end"<<endl;
168  return ret;
169 }
#define SOX_READ_MAXSCALE_ERROR
Sox couldn&#39;t open the filename.max to read the rescale value for the audio file.
Definition: Sox.H:36
float FP_TYPE
Definition: audioMasker.C:25
int getArg(string key, int argc, char *argv[], TYPE &ret, int i)
Definition: OptionParser.H:43
int main(int argc, char *argv[])
Definition: audioMasker.C:48
int openRead(string fileName)
Definition: Sox.C:70
double * cf
The filter centre frequencies.
Definition: depukfb.H:154
virtual int evaluateError(int errorNum)
Definition: Debug.H:132
#define NO_ERROR
There is no error.
Definition: Debug.H:33
void convertArg(const char *arg, TYPE &ret)
Definition: OptionParser.H:58
double * mask
The audio mask.
Definition: AudioMask.H:36
Definition: Sox.H:54
double getFSIn(void)
Definition: Sox.H:315
int getChCntIn(void)
Definition: Sox.H:333
void setMaxVal(double newMax)
Definition: Sox.H:300
int closeRead(void)
Definition: Sox.C:226
DepUKFB * pfb
roex filters
Definition: AudioMasker.H:116
void excite(short int *Input, int sCount)
Definition: AudioMasker.C:171
#define DEFAULT_FBANK_CNT
The default number of auditory filters.
Definition: audioMasker.C:30
void printUsage(const char *str)
Definition: audioMasker.C:34
int read(Eigen::DenseBase< Derived > &audioData, int count=0)
Definition: Sox.H:135
gtkIOStream: /tmp/gtkiostream/applications/audioMasker.C Source File
GTK+ IOStream  Beta