experimental pitch shifting

2025-07-01 09:43:36 +00:00 · 2015-09-23 18:35:13 +01:00
parent ad64e64984
commit 44be9d76cb
12 changed files with 555 additions and 561 deletions
--- a/samplebrain/qt/qtmain.cpp
+++ b/samplebrain/qt/qtmain.cpp
@ -23,6 +23,7 @@

 #include "process_thread.h"
 #include "audio_thread.h"
+#include "pitchshift.h"

 using namespace std;

@ -31,6 +32,8 @@ int main( int argc , char *argv[] ){
    MainWindow mainWin;
    mainWin.show();

+    pitchshift::init(44100);
+
    process_thread pt;
    audio_thread at(pt);
    pt.register_renderer(at.m_renderer);
--- a/samplebrain/qt/samplebrain.pro
+++ b/samplebrain/qt/samplebrain.pro
@ -25,6 +25,7 @@ SOURCES += MainWindow.cpp \
           ../src/renderer.cpp \
           ../src/status.cpp \
           ../src/window.cpp \
+           ../src/pitchshift.cpp \
           ../src/aquila/filter/MelFilterBank.cpp \
           ../src/aquila/filter/MelFilter.cpp \
           ../src/aquila/transform/Dct.cpp \
@ -38,7 +39,7 @@ SOURCES += MainWindow.cpp \
           ../../../jellyfish/src/core/stream.cpp

 INCLUDEPATH += ../src
-LIBS += -L.. -lportaudio -lfftw3 -lsndfile -llo -ldl -lpthread -lm
+LIBS += -L.. -lrubberband -lportaudio -lfftw3 -lsndfile -llo -ldl -lpthread -lm
 #CONFIG+=debug

 QMAKE_CXXFLAGS += -Wall -Wno-unused -std=c++11 -DDONT_USE_FLUXA_GRAPH
--- a/samplebrain/src/block.cpp
+++ b/samplebrain/src/block.cpp
@ -26,6 +26,7 @@ FFT *block::m_fftw;
 Aquila::Mfcc *block::m_mfcc_proc;

 static const int MFCC_FILTERS=12;
+static const int FFT_BIAS=200;

 double blend(double a, double b, double t) {
  return a*(1-t)+b*t;
@ -81,12 +82,12 @@ block::block(u64 id, const string &filename, const sample &pcm, u32 rate, const
  assert(m_fftw!=NULL);

  w.run(m_pcm);
-    process(m_pcm,m_fft,m_mfcc);
+  process(m_pcm,m_fft,m_mfcc,m_dominant_freq);

  // rerun the normalised version
  normalise(m_n_pcm);
  w.run(m_n_pcm);
-    process(m_n_pcm,m_n_fft,m_n_mfcc);
+  process(m_n_pcm,m_n_fft,m_n_mfcc,m_n_dominant_freq);

  if (ditchpcm) {
    m_pcm.clear();
@ -105,7 +106,7 @@ void block::init_fft(u32 block_size)
  }
 }

-void block::process(const sample &pcm, sample &fft, sample &mfcc) {
+void block::process(const sample &pcm, sample &fft, sample &mfcc, float &freq) {
  m_fftw->impulse2freq(pcm.get_buffer());
  m_fftw->calculate_bins();

@ -116,6 +117,8 @@ void block::process(const sample &pcm, sample &fft, sample &mfcc) {
                                          m_fftw->m_spectrum[i][1]));
  }

+  freq = m_fftw->calculate_dominant_freq();
+
  u32 fft_size = m_block_size;
  if (fft_size>100) {
    fft.crop_to(100);
@ -135,8 +138,6 @@ void block::process(const sample &pcm, sample &fft, sample &mfcc) {
 }


-#define FFT_BIAS 200
-
 double block::_compare(const sample &fft_a, const sample &mfcc_a,
                       const sample &fft_b, const sample &mfcc_b,
                       const search_params &params) const
@ -183,11 +184,15 @@ double block::compare(const block &other, const search_params &params) const {
 }

 ios &spiralcore::operator||(ios &s, block &b) {
-    u32 version=2;
+  u32 version=3;
  string id("block");
  s||id||version;

  if (version>1) s||b.m_id;
+  if (version>2) {
+    s||b.m_dominant_freq;
+    s||b.m_n_dominant_freq;
+  }

  s||b.m_pcm||b.m_fft||b.m_mfcc;
  s||b.m_n_pcm||b.m_n_fft||b.m_n_mfcc;
--- a/samplebrain/src/block.h
+++ b/samplebrain/src/block.h
@ -29,8 +29,8 @@

 namespace spiralcore {

-class block {
-public:
+  class block {
+  public:
    // runs analysis on pcm
    block(u64 id, const std::string &filename, const sample &pcm, u32 rate, const window &w, bool ditchpcm=false);
    block() {}
@ -47,10 +47,12 @@ public:
    std::vector<u32> &get_synapse() { return m_synapse; }
    const std::vector<u32> &get_synapse_const() const { return m_synapse; }
    float &get_usage() { return m_usage; }
+    float get_freq() const { return m_dominant_freq; }
+    float get_n_freq() const { return m_n_dominant_freq; }

-private:
+  private:

-    void process(const sample &pcm, sample &fft, sample &mfcc);
+    void process(const sample &pcm, sample &fft, sample &mfcc, float &freq);

    double _compare(const sample &fft_a, const sample &mfcc_a,
                    const sample &fft_b, const sample &mfcc_b,
@ -75,11 +77,14 @@ private:
    std::vector<u32> m_synapse;
    float m_usage;

+    float m_dominant_freq;
+    float m_n_dominant_freq;
+
    friend ios &operator||(ios &s, block &b);

-};
+  };

-ios &operator||(ios &s, block &b);
+  ios &operator||(ios &s, block &b);

 }

--- a/samplebrain/src/brain.cpp
+++ b/samplebrain/src/brain.cpp
@ -71,19 +71,19 @@ void brain::init(u32 block_size, u32 overlap, window::type t, bool ditchpcm) {
    u32 count=0;
    for (std::list<sound>::iterator i=m_samples.begin(); i!=m_samples.end(); ++i) {
        count++;
-        chop_and_add(i->m_sample, count, ditchpcm);
+        chop_and_add(*i, count, ditchpcm);
    }
    status::update("all samples processed");
 }

-void brain::chop_and_add(const sample &s, u32 count, bool ditchpcm) {
+void brain::chop_and_add(const sound &s, u32 count, bool ditchpcm) {
    u32 pos=0;
    if (m_overlap>=m_block_size) m_overlap=0;
-    while (pos+m_block_size-1<s.get_length()) {
-        status::update("processing sample %d: %d%%",count,(int)(pos/(float)s.get_length()*100));
+    while (pos+m_block_size-1<s.m_sample.get_length()) {
+        status::update("processing sample %d: %d%%",count,(int)(pos/(float)s.m_sample.get_length()*100));
        sample region;
-        s.get_region(region,pos,pos+m_block_size-1);
-        m_blocks.push_back(block(m_blocks.size(),"",region,44100,m_window,ditchpcm));
+        s.m_sample.get_region(region,pos,pos+m_block_size-1);
+        m_blocks.push_back(block(m_blocks.size(),s.m_filename,region,44100,m_window,ditchpcm));
        pos += (m_block_size-m_overlap);
    }
 }
--- a/samplebrain/src/brain.h
+++ b/samplebrain/src/brain.h
@ -84,12 +84,13 @@ public:

 private:

-    void chop_and_add(const sample &s, u32 count, bool ditchpcm=false);
+    void chop_and_add(const sound &s, u32 count, bool ditchpcm=false);
    void deplete_usage();
    u32 stickify(const block &target, u32 closest_index, f32 dist, const search_params &params);

    vector<block> m_blocks;
    std::list<sound> m_samples;
+    vector<string> m_active_sounds;

    u32 m_block_size;
    u32 m_overlap;
--- a/samplebrain/src/fft.cpp
+++ b/samplebrain/src/fft.cpp
@ -53,6 +53,21 @@ void FFT::impulse2freq(const float *imp)
  fftw_execute(m_plan);
 }

+static const float SRATE = 44100;
+
+float FFT::calculate_dominant_freq() {
+  double highest = 0;
+  u32 index = 0;
+  for (u32 i=0; i<m_length/2; ++i) {
+    double t = m_spectrum[i][0]*m_spectrum[i][0];
+    if (t>highest) {
+      index=i;
+      highest=t;
+    }
+  }
+  return index * (SRATE/(float)m_length);
+}
+
 void FFT::calculate_bins() {
  float useful_area = m_length/2;

@ -70,7 +85,7 @@ void FFT::calculate_bins() {

    for (u32 i=from; i<=to; i++) {
      if (i<m_length) {
-				value += m_spectrum[i][0];
+        value += m_spectrum[i][0]*m_spectrum[i][0];
      }
    }

--- a/samplebrain/src/fft.h
+++ b/samplebrain/src/fft.h
@ -24,13 +24,14 @@

 namespace spiralcore {

-class FFT
-{
-public:
+  class FFT
+  {
+  public:
    FFT(u32 length, u32 num_bins);
    ~FFT();
 	void impulse2freq(const float *imp);
    void calculate_bins();
+    float calculate_dominant_freq();

 	fftw_plan m_plan;
 	u32 m_length;
@ -38,7 +39,7 @@ public:
 	double *m_in;
 	fftw_complex *m_spectrum;
    float *m_bin;
-};
+  };

 }

--- a/samplebrain/src/pitchshift.cpp
+++ b/samplebrain/src/pitchshift.cpp
@ -0,0 +1,39 @@
+// Copyright (C) 2015 Foam Kernow
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+#include <pitchshift.h>
+
+using namespace spiralcore;
+using namespace std;
+using namespace RubberBand;
+
+RubberBandStretcher *pitchshift::m_stretcher = NULL;
+
+void pitchshift::init(u32 srate) {
+  if (m_stretcher!=NULL) delete m_stretcher;
+  m_stretcher = new RubberBandStretcher(srate,1,
+                                        RubberBandStretcher::OptionProcessRealTime);
+}
+
+void pitchshift::process(const sample &in, float freq_scale, sample &out) {
+  if (freq_scale<1/256) freq_scale=1;
+  if (freq_scale>255) freq_scale=255;
+  m_stretcher->setPitchScale(freq_scale);
+  const float *in_ptr = in.get_buffer();
+  m_stretcher->process(&in_ptr, in.get_length(), false);
+  float *out_ptr = out.get_non_const_buffer();
+  m_stretcher->retrieve(&out_ptr, out.get_length());
+}
--- a/samplebrain/src/pitchshift.h
+++ b/samplebrain/src/pitchshift.h
@ -0,0 +1,35 @@
+// Copyright (C) 2015 Foam Kernow
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+#ifndef SPIRALCORE_PITCHSHIFT
+#define SPIRALCORE_PITCHSHIFT
+
+#include <jellyfish/core/types.h>
+#include <jellyfish/fluxa/sample.h>
+#include <rubberband/RubberBandStretcher.h>
+
+namespace spiralcore {
+
+  class pitchshift {
+  public:
+    static void init(u32 srate);
+    static void process(const sample &in, float freq_change, sample &out);
+
+    static RubberBand::RubberBandStretcher *m_stretcher;
+  };
+
+}
+#endif
--- a/samplebrain/src/renderer.cpp
+++ b/samplebrain/src/renderer.cpp
@ -16,6 +16,7 @@

 #include "renderer.h"
 #include <iostream>
+#include "pitchshift.h"

 using namespace spiralcore;
 using namespace std;
@ -36,6 +37,7 @@ void renderer::init(brain &source, brain &target) {
  m_render_index=0;
  m_stretch=1;
  m_last_tgt_shift=0;
+  m_autotune=0;
 }

 static int ratio_time = 0;
@ -85,7 +87,7 @@ bool renderer::find_render_blocks(u32 nframes) {
    return false;
  }

-/*
+  /*
    cerr<<"-----------------"<<endl;
    cerr<<"tgt start:"<<m_target_index<<endl;
    cerr<<"tgt end:"<<tgt_end<<endl;
@ -97,7 +99,7 @@ bool renderer::find_render_blocks(u32 nframes) {
    cerr<<"render time (index) "<<m_render_index*tgt_shift<<endl;
    cerr<<"real vs index = "<<(s32)m_render_time-(s32)(m_render_index*tgt_shift)<<endl;
    cerr<<m_render_blocks.size()<<endl;
-*/
+  */

  // search phase
  // get indices for current buffer
@ -144,6 +146,8 @@ bool renderer::find_render_blocks(u32 nframes) {
 }

 void renderer::render(u32 nframes, float *buf) {
+  sample render_pcm(m_source.get_block_size());
+
  // render phase
  // render all blocks in list
  for (std::list<render_block>::iterator i=m_render_blocks.begin(); i!=m_render_blocks.end(); ++i) {
@ -153,20 +157,31 @@ void renderer::render(u32 nframes, float *buf) {
    // get the sample offset into the buffer
    s32 offset = i->m_time-m_render_time;

+
    // assume midway through block
    u32 block_start = offset;
    u32 buffer_start = 0;
    if (offset<0) {
      block_start=-offset;
-            if (block_start>=pcm.get_length()) i->m_finished=true;
+      if (block_start>=pcm.get_length() ||
+          i->m_position>=pcm.get_length()) i->m_finished=true;
    } else { // block is midway through buffer
      block_start=0;
      buffer_start=offset;
    }

-//        cerr<<"-----------------"<<endl;
-//        cerr<<"block start:"<<block_start<<endl;
-//        cerr<<"buffer start:"<<buffer_start<<endl;
+    //        cerr<<"-----------------"<<endl;
+    //        cerr<<"block start:"<<block_start<<endl;
+    //        cerr<<"buffer start:"<<buffer_start<<endl;
+
+    float pitch_scale = m_target.get_block(i->m_tgt_index).get_freq() /
+      m_source.get_block(i->m_index).get_freq();
+
+    // fade in/out autotune
+    pitch_scale = pitch_scale*m_autotune + 1.0f*(1-m_autotune);
+
+    //pitchshift::process(pcm,pitch_scale,render_pcm);
+

    if (!i->m_finished) {
      // mix in
@ -177,7 +192,7 @@ void renderer::render(u32 nframes, float *buf) {

      while (block_pos<block_end && buffer_pos<nframes) {
        // mix with normalised version
-                float brain_sample = (pcm[block_pos]*(1-m_n_mix)+
+        float brain_sample = (pcm[i->m_position]*(1-m_n_mix)+
                              n_pcm[block_pos]*m_n_mix);

        // for mixing with target audio
@ -185,6 +200,9 @@ void renderer::render(u32 nframes, float *buf) {

        buf[buffer_pos]+=(brain_sample*(1-m_target_mix) +
                          target_sample*m_target_mix)*0.2*m_volume;
+
+        i->m_position+=pitch_scale;
+
        ++buffer_pos;
        ++block_pos;
      }
@ -204,141 +222,6 @@ void renderer::clean_up() {
  }
 }

-
-
-
-
-
-
-
-
-
-
-
-
-void renderer::old_process(u32 nframes, float *buf) {
-    if (!m_playing) return;
-
-    // get new blocks from source for the current buffer
-    u32 tgt_shift = m_target.get_block_size()-m_target.get_overlap();
-    u32 tgt_end = (m_target_time+nframes)/(float)tgt_shift;
-
-    if (tgt_shift!=m_last_tgt_shift ||
-        tgt_end>=m_target.get_num_blocks() || m_source.get_num_blocks()==0) {
-        reset();
-        m_last_tgt_shift = tgt_shift;
-        // next time...
-        return;
-    }
-
-    cerr<<"-----------------"<<endl;
-    cerr<<"tgt start:"<<m_target_index<<endl;
-    cerr<<"tgt end:"<<tgt_end<<endl;
-    cerr<<":"<<tgt_end-m_target_index<<endl;
-    cerr<<"block time "<<m_target_index*tgt_shift<<endl;
-    cerr<<"render time "<<m_render_time<<endl;
-    cerr<<": "<<(s32)m_render_time-(s32)(m_target_index*tgt_shift)<<endl;
-
-
-//    cerr<<"-----------------"<<endl;
-//    cerr<<"tgt start:"<<m_target_index<<endl;
-//    cerr<<"tgt end:"<<tgt_end<<endl;
-
-    // get indices for current buffer
-    u32 counter = m_target_index;
-    //u32 cur_time = m_render_time;
-    while (counter<=tgt_end) {
-        u32 time=m_render_index*tgt_shift;
-        u32 src_index=0;
-
-        switch (m_search_algo) {
-        case BASIC:
-            src_index = m_source.search(m_target.get_block(m_target_index), m_search_params);
-            break;
-        case REV_BASIC:
-            src_index = m_source.rev_search(m_target.get_block(m_target_index), m_search_params);
-            break;
-        case SYNAPTIC:
-        case SYNAPTIC_SLIDE:
-            src_index = m_source.search_synapses(m_target.get_block(m_target_index), m_search_params);
-            break;
-        }
-
-        if (m_search_algo==SYNAPTIC_SLIDE) {
-            m_render_blocks.push_back(render_block(src_index,m_target_index,time));
-
-            if (m_source.get_current_error()<m_slide_error) {
-                m_target_index++;
-            }
-            else{ cerr<<"skip"<<endl; }
-        } else {
-            // put them in the index list
-            m_render_blocks.push_back(render_block(src_index,m_target_index,time));
-            m_target_index++;
-        }
-        counter++;
-    }
-
-    // render all blocks in list
-    for (std::list<render_block>::iterator i=m_render_blocks.begin(); i!=m_render_blocks.end(); ++i) {
-        const sample &pcm=m_source.get_block(i->m_index).get_pcm();
-        const sample &n_pcm=m_source.get_block(i->m_index).get_n_pcm();
-        const sample &target_pcm=m_target.get_block(i->m_tgt_index).get_pcm();
-        // get the sample offset into the buffer
-        s32 offset = i->m_time-m_render_time;
-
-        // assume midway through block
-        u32 block_start = offset;
-        u32 buffer_start = 0;
-        if (offset<0) {
-            block_start=-offset;
-            if (block_start>=pcm.get_length()) i->m_finished=true;
-        } else { // block is midway through buffer
-            block_start=0;
-            buffer_start=offset;
-        }
-
-//        cerr<<"-----------------"<<endl;
-//        cerr<<"block start:"<<block_start<<endl;
-//        cerr<<"buffer start:"<<buffer_start<<endl;
-
-        if (!i->m_finished) {
-            // mix in
-            u32 buffer_pos = buffer_start;
-            u32 block_pos = block_start;
-            u32 block_end = pcm.get_length();
-
-
-            while (block_pos<block_end && buffer_pos<nframes) {
-                // mix with normalised version
-                float brain_sample = (pcm[block_pos]*(1-m_n_mix)+
-                                      n_pcm[block_pos]*m_n_mix);
-
-                // for mixing with target audio
-                float target_sample = target_pcm[block_pos];
-
-                buf[buffer_pos]+=(brain_sample*(1-m_target_mix) +
-                                  target_sample*m_target_mix)*0.2*m_volume;
-                ++buffer_pos;
-                ++block_pos;
-            }
-        }
-    }
-
-    // delete old ones
-    std::list<render_block>::iterator i=m_render_blocks.begin();
-    std::list<render_block>::iterator ni=m_render_blocks.begin();
-    while(i!=m_render_blocks.end()) {
-        ni++;
-        if (i->m_finished) m_render_blocks.erase(i);
-        i=ni;
-    }
-
-    m_render_time+=nframes;
-    m_target_time+=nframes;
-}
-
-
 bool renderer::unit_test() {
  brain source;
  source.load_sound("test_data/up.wav");
--- a/samplebrain/src/renderer.h
+++ b/samplebrain/src/renderer.h
@ -23,9 +23,9 @@

 namespace spiralcore {

-class renderer {
-public:
-renderer(brain &source, brain &target) :
+  class renderer {
+  public:
+  renderer(brain &source, brain &target) :
    m_source(source),
      m_target(target),
      m_search_params(0,0,0,100,0)
@ -58,7 +58,7 @@ renderer(brain &source, brain &target) :

    static bool unit_test();

-private:
+  private:

    bool find_render_blocks(u32 nframes);
    void render(u32 nframes, float *buf);
@ -69,11 +69,16 @@ private:
    class render_block {
    public:
    render_block(u32 index, u32 tgt_index, u32 time) :
-            m_index(index), m_tgt_index(tgt_index), m_time(time), m_finished(false) {}
+      m_index(index),
+        m_tgt_index(tgt_index),
+        m_time(time),
+        m_finished(false),
+        m_position(0) {}
      u32 m_index;
      u32 m_tgt_index; // original target block
      u32 m_time; // in samples
      bool m_finished;
+      float m_position; // in samples
    };

    brain &m_source;
@ -89,13 +94,14 @@ private:
    u32 m_stretch;
    float m_n_mix;
    float m_target_mix;
+    float m_autotune;

    search_algo m_search_algo;
    double m_slide_error;
    u32 m_last_tgt_shift;

    std::list<render_block> m_render_blocks;
-};
+  };

 }