experimental pitch shifting

This commit is contained in:
dave griffiths 2015-09-23 18:35:13 +01:00
parent ad64e64984
commit 44be9d76cb
12 changed files with 555 additions and 561 deletions

View File

@ -23,6 +23,7 @@
#include "process_thread.h"
#include "audio_thread.h"
#include "pitchshift.h"
using namespace std;
@ -31,6 +32,8 @@ int main( int argc , char *argv[] ){
MainWindow mainWin;
mainWin.show();
pitchshift::init(44100);
process_thread pt;
audio_thread at(pt);
pt.register_renderer(at.m_renderer);

View File

@ -25,6 +25,7 @@ SOURCES += MainWindow.cpp \
../src/renderer.cpp \
../src/status.cpp \
../src/window.cpp \
../src/pitchshift.cpp \
../src/aquila/filter/MelFilterBank.cpp \
../src/aquila/filter/MelFilter.cpp \
../src/aquila/transform/Dct.cpp \
@ -38,7 +39,7 @@ SOURCES += MainWindow.cpp \
../../../jellyfish/src/core/stream.cpp
INCLUDEPATH += ../src
LIBS += -L.. -lportaudio -lfftw3 -lsndfile -llo -ldl -lpthread -lm
LIBS += -L.. -lrubberband -lportaudio -lfftw3 -lsndfile -llo -ldl -lpthread -lm
#CONFIG+=debug
QMAKE_CXXFLAGS += -Wall -Wno-unused -std=c++11 -DDONT_USE_FLUXA_GRAPH

View File

@ -26,6 +26,7 @@ FFT *block::m_fftw;
Aquila::Mfcc *block::m_mfcc_proc;
static const int MFCC_FILTERS=12;
static const int FFT_BIAS=200;
double blend(double a, double b, double t) {
return a*(1-t)+b*t;
@ -81,12 +82,12 @@ block::block(u64 id, const string &filename, const sample &pcm, u32 rate, const
assert(m_fftw!=NULL);
w.run(m_pcm);
process(m_pcm,m_fft,m_mfcc);
process(m_pcm,m_fft,m_mfcc,m_dominant_freq);
// rerun the normalised version
normalise(m_n_pcm);
w.run(m_n_pcm);
process(m_n_pcm,m_n_fft,m_n_mfcc);
process(m_n_pcm,m_n_fft,m_n_mfcc,m_n_dominant_freq);
if (ditchpcm) {
m_pcm.clear();
@ -105,7 +106,7 @@ void block::init_fft(u32 block_size)
}
}
void block::process(const sample &pcm, sample &fft, sample &mfcc) {
void block::process(const sample &pcm, sample &fft, sample &mfcc, float &freq) {
m_fftw->impulse2freq(pcm.get_buffer());
m_fftw->calculate_bins();
@ -116,6 +117,8 @@ void block::process(const sample &pcm, sample &fft, sample &mfcc) {
m_fftw->m_spectrum[i][1]));
}
freq = m_fftw->calculate_dominant_freq();
u32 fft_size = m_block_size;
if (fft_size>100) {
fft.crop_to(100);
@ -135,8 +138,6 @@ void block::process(const sample &pcm, sample &fft, sample &mfcc) {
}
#define FFT_BIAS 200
double block::_compare(const sample &fft_a, const sample &mfcc_a,
const sample &fft_b, const sample &mfcc_b,
const search_params &params) const
@ -183,11 +184,15 @@ double block::compare(const block &other, const search_params &params) const {
}
ios &spiralcore::operator||(ios &s, block &b) {
u32 version=2;
u32 version=3;
string id("block");
s||id||version;
if (version>1) s||b.m_id;
if (version>2) {
s||b.m_dominant_freq;
s||b.m_n_dominant_freq;
}
s||b.m_pcm||b.m_fft||b.m_mfcc;
s||b.m_n_pcm||b.m_n_fft||b.m_n_mfcc;

View File

@ -29,8 +29,8 @@
namespace spiralcore {
class block {
public:
class block {
public:
// runs analysis on pcm
block(u64 id, const std::string &filename, const sample &pcm, u32 rate, const window &w, bool ditchpcm=false);
block() {}
@ -47,10 +47,12 @@ public:
std::vector<u32> &get_synapse() { return m_synapse; }
const std::vector<u32> &get_synapse_const() const { return m_synapse; }
float &get_usage() { return m_usage; }
float get_freq() const { return m_dominant_freq; }
float get_n_freq() const { return m_n_dominant_freq; }
private:
private:
void process(const sample &pcm, sample &fft, sample &mfcc);
void process(const sample &pcm, sample &fft, sample &mfcc, float &freq);
double _compare(const sample &fft_a, const sample &mfcc_a,
const sample &fft_b, const sample &mfcc_b,
@ -75,11 +77,14 @@ private:
std::vector<u32> m_synapse;
float m_usage;
float m_dominant_freq;
float m_n_dominant_freq;
friend ios &operator||(ios &s, block &b);
};
};
ios &operator||(ios &s, block &b);
ios &operator||(ios &s, block &b);
}

View File

@ -71,19 +71,19 @@ void brain::init(u32 block_size, u32 overlap, window::type t, bool ditchpcm) {
u32 count=0;
for (std::list<sound>::iterator i=m_samples.begin(); i!=m_samples.end(); ++i) {
count++;
chop_and_add(i->m_sample, count, ditchpcm);
chop_and_add(*i, count, ditchpcm);
}
status::update("all samples processed");
}
void brain::chop_and_add(const sample &s, u32 count, bool ditchpcm) {
void brain::chop_and_add(const sound &s, u32 count, bool ditchpcm) {
u32 pos=0;
if (m_overlap>=m_block_size) m_overlap=0;
while (pos+m_block_size-1<s.get_length()) {
status::update("processing sample %d: %d%%",count,(int)(pos/(float)s.get_length()*100));
while (pos+m_block_size-1<s.m_sample.get_length()) {
status::update("processing sample %d: %d%%",count,(int)(pos/(float)s.m_sample.get_length()*100));
sample region;
s.get_region(region,pos,pos+m_block_size-1);
m_blocks.push_back(block(m_blocks.size(),"",region,44100,m_window,ditchpcm));
s.m_sample.get_region(region,pos,pos+m_block_size-1);
m_blocks.push_back(block(m_blocks.size(),s.m_filename,region,44100,m_window,ditchpcm));
pos += (m_block_size-m_overlap);
}
}

View File

@ -84,12 +84,13 @@ public:
private:
void chop_and_add(const sample &s, u32 count, bool ditchpcm=false);
void chop_and_add(const sound &s, u32 count, bool ditchpcm=false);
void deplete_usage();
u32 stickify(const block &target, u32 closest_index, f32 dist, const search_params &params);
vector<block> m_blocks;
std::list<sound> m_samples;
vector<string> m_active_sounds;
u32 m_block_size;
u32 m_overlap;

View File

@ -53,6 +53,21 @@ void FFT::impulse2freq(const float *imp)
fftw_execute(m_plan);
}
static const float SRATE = 44100;
float FFT::calculate_dominant_freq() {
double highest = 0;
u32 index = 0;
for (u32 i=0; i<m_length/2; ++i) {
double t = m_spectrum[i][0]*m_spectrum[i][0];
if (t>highest) {
index=i;
highest=t;
}
}
return index * (SRATE/(float)m_length);
}
void FFT::calculate_bins() {
float useful_area = m_length/2;
@ -70,7 +85,7 @@ void FFT::calculate_bins() {
for (u32 i=from; i<=to; i++) {
if (i<m_length) {
value += m_spectrum[i][0];
value += m_spectrum[i][0]*m_spectrum[i][0];
}
}

View File

@ -24,13 +24,14 @@
namespace spiralcore {
class FFT
{
public:
class FFT
{
public:
FFT(u32 length, u32 num_bins);
~FFT();
void impulse2freq(const float *imp);
void calculate_bins();
float calculate_dominant_freq();
fftw_plan m_plan;
u32 m_length;
@ -38,7 +39,7 @@ public:
double *m_in;
fftw_complex *m_spectrum;
float *m_bin;
};
};
}

View File

@ -0,0 +1,39 @@
// Copyright (C) 2015 Foam Kernow
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#include <pitchshift.h>
using namespace spiralcore;
using namespace std;
using namespace RubberBand;
RubberBandStretcher *pitchshift::m_stretcher = NULL;
void pitchshift::init(u32 srate) {
if (m_stretcher!=NULL) delete m_stretcher;
m_stretcher = new RubberBandStretcher(srate,1,
RubberBandStretcher::OptionProcessRealTime);
}
void pitchshift::process(const sample &in, float freq_scale, sample &out) {
if (freq_scale<1/256) freq_scale=1;
if (freq_scale>255) freq_scale=255;
m_stretcher->setPitchScale(freq_scale);
const float *in_ptr = in.get_buffer();
m_stretcher->process(&in_ptr, in.get_length(), false);
float *out_ptr = out.get_non_const_buffer();
m_stretcher->retrieve(&out_ptr, out.get_length());
}

View File

@ -0,0 +1,35 @@
// Copyright (C) 2015 Foam Kernow
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#ifndef SPIRALCORE_PITCHSHIFT
#define SPIRALCORE_PITCHSHIFT
#include <jellyfish/core/types.h>
#include <jellyfish/fluxa/sample.h>
#include <rubberband/RubberBandStretcher.h>
namespace spiralcore {
class pitchshift {
public:
static void init(u32 srate);
static void process(const sample &in, float freq_change, sample &out);
static RubberBand::RubberBandStretcher *m_stretcher;
};
}
#endif

View File

@ -16,6 +16,7 @@
#include "renderer.h"
#include <iostream>
#include "pitchshift.h"
using namespace spiralcore;
using namespace std;
@ -36,6 +37,7 @@ void renderer::init(brain &source, brain &target) {
m_render_index=0;
m_stretch=1;
m_last_tgt_shift=0;
m_autotune=0;
}
static int ratio_time = 0;
@ -85,7 +87,7 @@ bool renderer::find_render_blocks(u32 nframes) {
return false;
}
/*
/*
cerr<<"-----------------"<<endl;
cerr<<"tgt start:"<<m_target_index<<endl;
cerr<<"tgt end:"<<tgt_end<<endl;
@ -97,7 +99,7 @@ bool renderer::find_render_blocks(u32 nframes) {
cerr<<"render time (index) "<<m_render_index*tgt_shift<<endl;
cerr<<"real vs index = "<<(s32)m_render_time-(s32)(m_render_index*tgt_shift)<<endl;
cerr<<m_render_blocks.size()<<endl;
*/
*/
// search phase
// get indices for current buffer
@ -144,6 +146,8 @@ bool renderer::find_render_blocks(u32 nframes) {
}
void renderer::render(u32 nframes, float *buf) {
sample render_pcm(m_source.get_block_size());
// render phase
// render all blocks in list
for (std::list<render_block>::iterator i=m_render_blocks.begin(); i!=m_render_blocks.end(); ++i) {
@ -153,20 +157,31 @@ void renderer::render(u32 nframes, float *buf) {
// get the sample offset into the buffer
s32 offset = i->m_time-m_render_time;
// assume midway through block
u32 block_start = offset;
u32 buffer_start = 0;
if (offset<0) {
block_start=-offset;
if (block_start>=pcm.get_length()) i->m_finished=true;
if (block_start>=pcm.get_length() ||
i->m_position>=pcm.get_length()) i->m_finished=true;
} else { // block is midway through buffer
block_start=0;
buffer_start=offset;
}
// cerr<<"-----------------"<<endl;
// cerr<<"block start:"<<block_start<<endl;
// cerr<<"buffer start:"<<buffer_start<<endl;
// cerr<<"-----------------"<<endl;
// cerr<<"block start:"<<block_start<<endl;
// cerr<<"buffer start:"<<buffer_start<<endl;
float pitch_scale = m_target.get_block(i->m_tgt_index).get_freq() /
m_source.get_block(i->m_index).get_freq();
// fade in/out autotune
pitch_scale = pitch_scale*m_autotune + 1.0f*(1-m_autotune);
//pitchshift::process(pcm,pitch_scale,render_pcm);
if (!i->m_finished) {
// mix in
@ -177,7 +192,7 @@ void renderer::render(u32 nframes, float *buf) {
while (block_pos<block_end && buffer_pos<nframes) {
// mix with normalised version
float brain_sample = (pcm[block_pos]*(1-m_n_mix)+
float brain_sample = (pcm[i->m_position]*(1-m_n_mix)+
n_pcm[block_pos]*m_n_mix);
// for mixing with target audio
@ -185,6 +200,9 @@ void renderer::render(u32 nframes, float *buf) {
buf[buffer_pos]+=(brain_sample*(1-m_target_mix) +
target_sample*m_target_mix)*0.2*m_volume;
i->m_position+=pitch_scale;
++buffer_pos;
++block_pos;
}
@ -204,141 +222,6 @@ void renderer::clean_up() {
}
}
void renderer::old_process(u32 nframes, float *buf) {
if (!m_playing) return;
// get new blocks from source for the current buffer
u32 tgt_shift = m_target.get_block_size()-m_target.get_overlap();
u32 tgt_end = (m_target_time+nframes)/(float)tgt_shift;
if (tgt_shift!=m_last_tgt_shift ||
tgt_end>=m_target.get_num_blocks() || m_source.get_num_blocks()==0) {
reset();
m_last_tgt_shift = tgt_shift;
// next time...
return;
}
cerr<<"-----------------"<<endl;
cerr<<"tgt start:"<<m_target_index<<endl;
cerr<<"tgt end:"<<tgt_end<<endl;
cerr<<":"<<tgt_end-m_target_index<<endl;
cerr<<"block time "<<m_target_index*tgt_shift<<endl;
cerr<<"render time "<<m_render_time<<endl;
cerr<<": "<<(s32)m_render_time-(s32)(m_target_index*tgt_shift)<<endl;
// cerr<<"-----------------"<<endl;
// cerr<<"tgt start:"<<m_target_index<<endl;
// cerr<<"tgt end:"<<tgt_end<<endl;
// get indices for current buffer
u32 counter = m_target_index;
//u32 cur_time = m_render_time;
while (counter<=tgt_end) {
u32 time=m_render_index*tgt_shift;
u32 src_index=0;
switch (m_search_algo) {
case BASIC:
src_index = m_source.search(m_target.get_block(m_target_index), m_search_params);
break;
case REV_BASIC:
src_index = m_source.rev_search(m_target.get_block(m_target_index), m_search_params);
break;
case SYNAPTIC:
case SYNAPTIC_SLIDE:
src_index = m_source.search_synapses(m_target.get_block(m_target_index), m_search_params);
break;
}
if (m_search_algo==SYNAPTIC_SLIDE) {
m_render_blocks.push_back(render_block(src_index,m_target_index,time));
if (m_source.get_current_error()<m_slide_error) {
m_target_index++;
}
else{ cerr<<"skip"<<endl; }
} else {
// put them in the index list
m_render_blocks.push_back(render_block(src_index,m_target_index,time));
m_target_index++;
}
counter++;
}
// render all blocks in list
for (std::list<render_block>::iterator i=m_render_blocks.begin(); i!=m_render_blocks.end(); ++i) {
const sample &pcm=m_source.get_block(i->m_index).get_pcm();
const sample &n_pcm=m_source.get_block(i->m_index).get_n_pcm();
const sample &target_pcm=m_target.get_block(i->m_tgt_index).get_pcm();
// get the sample offset into the buffer
s32 offset = i->m_time-m_render_time;
// assume midway through block
u32 block_start = offset;
u32 buffer_start = 0;
if (offset<0) {
block_start=-offset;
if (block_start>=pcm.get_length()) i->m_finished=true;
} else { // block is midway through buffer
block_start=0;
buffer_start=offset;
}
// cerr<<"-----------------"<<endl;
// cerr<<"block start:"<<block_start<<endl;
// cerr<<"buffer start:"<<buffer_start<<endl;
if (!i->m_finished) {
// mix in
u32 buffer_pos = buffer_start;
u32 block_pos = block_start;
u32 block_end = pcm.get_length();
while (block_pos<block_end && buffer_pos<nframes) {
// mix with normalised version
float brain_sample = (pcm[block_pos]*(1-m_n_mix)+
n_pcm[block_pos]*m_n_mix);
// for mixing with target audio
float target_sample = target_pcm[block_pos];
buf[buffer_pos]+=(brain_sample*(1-m_target_mix) +
target_sample*m_target_mix)*0.2*m_volume;
++buffer_pos;
++block_pos;
}
}
}
// delete old ones
std::list<render_block>::iterator i=m_render_blocks.begin();
std::list<render_block>::iterator ni=m_render_blocks.begin();
while(i!=m_render_blocks.end()) {
ni++;
if (i->m_finished) m_render_blocks.erase(i);
i=ni;
}
m_render_time+=nframes;
m_target_time+=nframes;
}
bool renderer::unit_test() {
brain source;
source.load_sound("test_data/up.wav");

View File

@ -23,9 +23,9 @@
namespace spiralcore {
class renderer {
public:
renderer(brain &source, brain &target) :
class renderer {
public:
renderer(brain &source, brain &target) :
m_source(source),
m_target(target),
m_search_params(0,0,0,100,0)
@ -58,7 +58,7 @@ renderer(brain &source, brain &target) :
static bool unit_test();
private:
private:
bool find_render_blocks(u32 nframes);
void render(u32 nframes, float *buf);
@ -69,11 +69,16 @@ private:
class render_block {
public:
render_block(u32 index, u32 tgt_index, u32 time) :
m_index(index), m_tgt_index(tgt_index), m_time(time), m_finished(false) {}
m_index(index),
m_tgt_index(tgt_index),
m_time(time),
m_finished(false),
m_position(0) {}
u32 m_index;
u32 m_tgt_index; // original target block
u32 m_time; // in samples
bool m_finished;
float m_position; // in samples
};
brain &m_source;
@ -89,13 +94,14 @@ private:
u32 m_stretch;
float m_n_mix;
float m_target_mix;
float m_autotune;
search_algo m_search_algo;
double m_slide_error;
u32 m_last_tgt_shift;
std::list<render_block> m_render_blocks;
};
};
}