working with fft

This commit is contained in:
Dave Griffiths 2015-07-08 15:06:08 +01:00
parent 6876ae9031
commit 299af9442d
7 changed files with 135 additions and 77 deletions

View File

@ -16,29 +16,45 @@ sample brain::load_sound(std::string filename) {
sfinfo.format=0;
SNDFILE* f=sf_open(filename.c_str(), SFM_READ, &sfinfo);
sample s(sfinfo.frames);
sf_read_float(f, s.get_non_const_buffer(), s.get_length());
sf_readf_float(f, s.get_non_const_buffer(), s.get_length());
sf_close(f);
m_samples.push_back(s);
return s;
}
void save_sample(const string &filename, const sample s) {
SF_INFO sfinfo;
sfinfo.format=SF_FORMAT_WAV | SF_FORMAT_FLOAT;
sfinfo.frames=s.get_length();
sfinfo.samplerate=44100;
sfinfo.channels=1;
sfinfo.sections=1;
sfinfo.seekable=0;
SNDFILE* f=sf_open(filename.c_str(), SFM_WRITE, &sfinfo);
if (!f) cerr<<"couldn't open "<<filename<<endl;
u32 written = sf_writef_float(f, s.get_buffer(), s.get_length());
if (written!=s.get_length()) cerr<<"error: wrote "<<written<<endl;
sf_close(f);
}
// rewrites whole brain
void brain::init(u32 block_size, u32 overlap) {
void brain::init(u32 block_size, u32 overlap, bool ditchpcm) {
m_blocks.clear();
m_block_size = block_size;
m_overlap = overlap;
for (vector<sample>::iterator i=m_samples.begin(); i!=m_samples.end(); ++i) {
chop_and_add(*i, block_size, overlap);
chop_and_add(*i, block_size, overlap, ditchpcm);
}
}
void brain::chop_and_add(const sample &s, u32 block_size, u32 overlap) {
void brain::chop_and_add(const sample &s, u32 block_size, u32 overlap, bool ditchpcm) {
u32 pos=0;
while (pos+block_size-1<s.get_length()) {
cerr<<pos/(float)s.get_length()*100<<endl;
sample region;
s.get_region(region,pos,pos+block_size-1);
m_blocks.push_back(brain_block("",region,44100));
pos += block_size-overlap;
m_blocks.push_back(brain_block("",region,44100,ditchpcm));
pos += (block_size-overlap);
}
}
@ -65,15 +81,28 @@ u32 brain::search(const brain_block &target, float ratio) const {
// take another brain and rebuild this brain from bits of that one
// (presumably this one is made from a single sample)
sample brain::resynth(const brain &other, float ratio){
sample out(m_block_size*m_blocks.size());
void brain::resynth(const string &filename, const brain &other, float ratio){
sample out((m_block_size-m_overlap)*m_blocks.size());
out.zero();
u32 pos = 0;
u32 count = 0;
for (vector<brain_block>::iterator i=m_blocks.begin(); i!=m_blocks.end(); ++i) {
cerr<<count/float(m_blocks.size())*100<<endl;
u32 index = other.search(*i,ratio);
cerr<<index<<endl;
out.mix(other.get_block_pcm(index),pos);
pos += m_block_size-m_overlap;
if (count%1000==0) {
save_sample(filename,out);
}
return out;
++count;
pos += (m_block_size-m_overlap);
}
save_sample(filename,out);
cerr<<m_blocks.size()<<" brain blocks..."<<endl;
}
@ -100,19 +129,21 @@ bool brain::unit_test() {
assert(b.m_samples.size()==2);
assert(b.m_blocks.size()==12);
// replicate brain
// replicate brains
brain b2;
b2.load_sound("test_data/100f32.wav");
b2.load_sound("test_data/100f32.wav");
assert(b2.m_samples.size()==2);
b2.init(10, 0);
b.init(10, 0);
assert(b2.m_blocks.size()==20);
assert(b.search(b2.m_blocks[0],1)==0);
assert(b.search(b2.m_blocks[9],1)==9);
b2.load_sound("test_data/up.wav");
brain b3;
b3.load_sound("test_data/up.wav");
sample r = b2.resynth(b,1);
assert(r.get_length()==200);
b2.init(512, 0);
b3.init(512, 0);
assert(b3.search(b2.m_blocks[0],1)==0);
assert(b3.search(b2.m_blocks[9],1)==9);
assert(b3.search(b2.m_blocks[19],1)==19);
assert(b3.search(b2.m_blocks[29],1)==29);
// sample r = b2.resynth(b,1);
// assert(r.get_length()==200);
return true;
}

View File

@ -14,24 +14,22 @@ public:
brain();
// rewrites whole brain
void init(u32 block_size, u32 overlap);
void init(u32 block_size, u32 overlap, bool ditchpcm=false);
// load, chop up and add to brain
// todo: add tags
sample load_sound(std::string filename);
const sample &get_block_pcm(u32 index) const;
// take another brain and rebuild this brain from bits of that one
// (presumably this one is made from a single sample)
sample resynth(const brain &other, float ratio);
void resynth(const std::string &filename, const brain &other, float ratio);
const sample &get_block_pcm(u32 index) const;
static bool unit_test();
private:
u32 search(const brain_block &target, float ratio) const;
void chop_and_add(const sample &s, u32 block_size, u32 overlap);
void chop_and_add(const sample &s, u32 block_size, u32 overlap, bool ditchpcm=false);
vector<brain_block> m_blocks;
vector<sample> m_samples;

View File

@ -8,37 +8,38 @@ using namespace spiralcore;
FFT *brain_block::m_fftw;
brain_block::brain_block(const string &filename, const sample &pcm, u32 rate) :
static const int MFCC_FILTERS=48;
void enveloper(sample &s, u32 start, u32 end) {
for(u32 i=0; i<start; ++i) {
s[i]*=i/(float)start;
}
for(u32 i=0; i<end; ++i) {
s[(s.get_length()-1)-i]*=i/(float)end;
}
}
brain_block::brain_block(const string &filename, const sample &pcm, u32 rate, bool ditchpcm) :
m_pcm(pcm),
m_fft(pcm.get_length()),
m_mfcc(13),
m_mfcc(MFCC_FILTERS),
m_block_size(pcm.get_length()),
m_rate(rate),
m_orig_filename(filename)
{
init_fft(m_pcm.get_length());
for (u32 i=0; i<m_block_size; i++) {
// convert from float to double
m_fftw->m_in[i] = m_pcm[i];
}
enveloper(m_pcm,50,50);
m_fftw->raw_impulse2freq();
m_fftw->impulse2freq(m_pcm.get_non_const_buffer(),
m_fft.get_non_const_buffer());
double *spectrum = new double[m_block_size];
if (m_block_size>30) m_fft.crop_to(30);
if (ditchpcm) m_pcm.clear();
for (u32 i=0; i<m_block_size; i++) {
// convert from complex to float for storage
m_fft[i] = m_fftw->m_spectrum[i][0];
// convert from complex to double for mfcc calc
spectrum[i] = m_fftw->m_spectrum[i][0];
}
for (u32 i=0; i<13; i++) {
m_mfcc[i] = GetCoefficient(spectrum, rate, 48, m_block_size, i);
}
delete[] spectrum;
// for (u32 i=0; i<MFCC_FILTERS; i++) {
// m_mfcc[i] = GetCoefficient(m_fft.get_non_const_buffer(), rate, MFCC_FILTERS, m_block_size, i);
// }
}
void brain_block::init_fft(u32 block_size)
@ -54,9 +55,13 @@ double brain_block::compare(const brain_block &other, float ratio) const {
// just mfcc
//if (ratio==1)
{
for (u32 i=0; i<13; ++i) {
acc+=(m_mfcc[i]-other.m_mfcc[i]) * (m_mfcc[i]-other.m_mfcc[i]);
for (u32 i=0; i<m_fft.get_length(); ++i) {
acc+=(m_fft[i]-other.m_fft[i]) * (m_fft[i]-other.m_fft[i]);
}
//for (u32 i=0; i<MFCC_FILTERS; ++i) {
// acc+=(m_mfcc[i]-other.m_mfcc[i]) * (m_mfcc[i]-other.m_mfcc[i]);
//}
}
return acc;
}
@ -72,7 +77,7 @@ bool brain_block::unit_test() {
assert(bb.m_pcm.get_length()==data.get_length());
assert(bb.m_fft.get_length()==data.get_length());
assert(bb.m_mfcc.get_length()==13);
assert(bb.m_mfcc.get_length()==MFCC_FILTERS);
assert(bb.m_orig_filename==string("test"));
assert(bb.m_rate==44100);
assert(bb.m_block_size==data.get_length());
@ -88,6 +93,7 @@ bool brain_block::unit_test() {
brain_block cpy("test",data,100);
{
brain_block bb3("test",data2,44100);
cerr<<bb.compare(bb3,1)<<endl;
assert(bb.compare(bb3,1)!=0);
cpy=bb3;
}

View File

@ -11,7 +11,7 @@ namespace spiralcore {
class brain_block {
public:
// runs analysis on pcm
brain_block(const std::string &filename, const sample &pcm, u32 rate);
brain_block(const std::string &filename, const sample &pcm, u32 rate, bool ditchpcm=false);
// returns distance based on ratio of fft-mfcc values
double compare(const brain_block &other, float ratio) const;

View File

@ -12,18 +12,18 @@
/*
* Computes the specified (mth) MFCC
*
* spectralData - array of doubles containing the results of FFT computation. This data is already assumed to be purely real
* spectralData - array of mfcc_reals containing the results of FFT computation. This data is already assumed to be purely real
* samplingRate - the rate that the original time-series data was sampled at (i.e 44100)
* NumFilters - the number of filters to use in the computation. Recommended value = 48
* binSize - the size of the spectralData array, usually a power of 2
* m - The mth MFCC coefficient to compute
*
*/
double GetCoefficient(double* spectralData, unsigned int samplingRate, unsigned int NumFilters, unsigned int binSize, unsigned int m)
mfcc_real GetCoefficient(mfcc_real* spectralData, unsigned int samplingRate, unsigned int NumFilters, unsigned int binSize, unsigned int m)
{
double result = 0.0f;
double outerSum = 0.0f;
double innerSum = 0.0f;
mfcc_real result = 0.0f;
mfcc_real outerSum = 0.0f;
mfcc_real innerSum = 0.0f;
unsigned int k, l;
// 0 <= m < L
@ -64,9 +64,9 @@ double GetCoefficient(double* spectralData, unsigned int samplingRate, unsigned
* Computes the Normalization Factor (Equation 6)
* Used for internal computation only - not to be called directly
*/
double NormalizationFactor(int NumFilters, int m)
mfcc_real NormalizationFactor(int NumFilters, int m)
{
double normalizationFactor = 0.0f;
mfcc_real normalizationFactor = 0.0f;
if(m == 0)
{
@ -84,14 +84,14 @@ double NormalizationFactor(int NumFilters, int m)
* Compute the filter parameter for the specified frequency and filter bands (Eq. 2)
* Used for internal computation only - not the be called directly
*/
double GetFilterParameter(unsigned int samplingRate, unsigned int binSize, unsigned int frequencyBand, unsigned int filterBand)
mfcc_real GetFilterParameter(unsigned int samplingRate, unsigned int binSize, unsigned int frequencyBand, unsigned int filterBand)
{
double filterParameter = 0.0f;
mfcc_real filterParameter = 0.0f;
double boundary = (frequencyBand * samplingRate) / binSize; // k * Fs / N
double prevCenterFrequency = GetCenterFrequency(filterBand - 1); // fc(l - 1) etc.
double thisCenterFrequency = GetCenterFrequency(filterBand);
double nextCenterFrequency = GetCenterFrequency(filterBand + 1);
mfcc_real boundary = (frequencyBand * samplingRate) / binSize; // k * Fs / N
mfcc_real prevCenterFrequency = GetCenterFrequency(filterBand - 1); // fc(l - 1) etc.
mfcc_real thisCenterFrequency = GetCenterFrequency(filterBand);
mfcc_real nextCenterFrequency = GetCenterFrequency(filterBand + 1);
if(boundary >= 0 && boundary < prevCenterFrequency)
{
@ -119,9 +119,9 @@ double GetFilterParameter(unsigned int samplingRate, unsigned int binSize, unsig
* Compute the band-dependent magnitude factor for the given filter band (Eq. 3)
* Used for internal computation only - not the be called directly
*/
double GetMagnitudeFactor(unsigned int filterBand)
mfcc_real GetMagnitudeFactor(unsigned int filterBand)
{
double magnitudeFactor = 0.0f;
mfcc_real magnitudeFactor = 0.0f;
if(filterBand >= 1 && filterBand <= 14)
{
@ -141,10 +141,10 @@ double GetMagnitudeFactor(unsigned int filterBand)
* center frequencies are equally spaced on the mel scale
* Used for internal computation only - not the be called directly
*/
double GetCenterFrequency(unsigned int filterBand)
mfcc_real GetCenterFrequency(unsigned int filterBand)
{
double centerFrequency = 0.0f;
double exponent;
mfcc_real centerFrequency = 0.0f;
mfcc_real exponent;
if(filterBand == 0)
{

View File

@ -8,19 +8,21 @@
#pragma once
typedef float mfcc_real;
#define PI 3.14159265358979323846264338327
// Returns the specified (mth) MFCC
double GetCoefficient(double* spectralData, unsigned int samplingRate, unsigned int NumFilters, unsigned int binSize, unsigned int m);
mfcc_real GetCoefficient(mfcc_real* spectralData, unsigned int samplingRate, unsigned int NumFilters, unsigned int binSize, unsigned int m);
// Compute the normalization factor (For internal computation only - not to be called directly)
double NormalizationFactor(int NumFilters, int m);
mfcc_real NormalizationFactor(int NumFilters, int m);
// Compute the filter parameter for the specified frequency and filter bands (For internal computation only - not the be called directly)
double GetFilterParameter(unsigned int samplingRate, unsigned int binSize, unsigned int frequencyBand, unsigned int filterBand);
mfcc_real GetFilterParameter(unsigned int samplingRate, unsigned int binSize, unsigned int frequencyBand, unsigned int filterBand);
// Compute the band-dependent magnitude factor for the given filter band (For internal computation only - not the be called directly)
double GetMagnitudeFactor(unsigned int filterBand);
mfcc_real GetMagnitudeFactor(unsigned int filterBand);
// Compute the center frequency (fc) of the specified filter band (l) (For internal computation only - not the be called directly)
double GetCenterFrequency(unsigned int filterBand);
mfcc_real GetCenterFrequency(unsigned int filterBand);

View File

@ -25,14 +25,35 @@
using namespace std;
int main(int argc, char *argv[])
{
void unit_test() {
cerr<<"testing brain_block"<<endl;
if (brain_block::unit_test()) cerr<<"passed"<<endl;
else cerr<<"failed"<<endl;
cerr<<"testing brain"<<endl;
if (brain::unit_test()) cerr<<"passed"<<endl;
else cerr<<"failed"<<endl;
}
int main(int argc, char *argv[])
{
unit_test();
cerr<<"starting"<<endl;
brain source, target;
source.load_sound("../sound/source/shostakovich6.wav");
// source.load_sound("../sound/source/eagle.wav");
target.load_sound("../sound/source/sb-right.wav");
cerr<<"loaded sounds"<<endl;
u32 len=3000;
source.init(len,len-len);
target.init(len,len-len/8);
cerr<<"ready..."<<endl;
target.resynth("shosta-sb-right.wav",source,1);
//audio_device *a = new audio_device("samplebrain",44100,2048);
}