This commit is contained in:
Dave Griffiths 2015-07-07 20:41:53 +01:00
commit 35b84c6da5
73 changed files with 11369 additions and 0 deletions

1
.#samplebrain.h Symbolic link
View File

@ -0,0 +1 @@
dave@fulmar.4739:1436183149

1
.#test.py Symbolic link
View File

@ -0,0 +1 @@
dave@fulmar.5129:1435565326

BIN
cooking/mock/colour.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 432 KiB

1953
cooking/mock/drawing.svg Normal file

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 197 KiB

6099
cooking/mock/drawing2.svg Normal file

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 956 KiB

BIN
cooking/mock/monochrome.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 371 KiB

BIN
cooking/mock/wav1.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 43 KiB

BIN
cooking/mock/wav1.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.6 KiB

BIN
cooking/mock/wav1.xcf Normal file

Binary file not shown.

BIN
cooking/mock/wav2.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 40 KiB

BIN
cooking/mock/wav2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.0 KiB

BIN
cooking/mock/wav3.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 42 KiB

BIN
cooking/mock/wav3.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.0 KiB

BIN
cooking/mock/wav4.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 56 KiB

BIN
cooking/mock/wav4.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.2 KiB

BIN
cooking/mock/wav5.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 57 KiB

BIN
cooking/mock/wav5.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.4 KiB

BIN
cooking/mock/wav6.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

BIN
cooking/mock/wav6.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.6 KiB

BIN
cooking/mock/wav7.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 52 KiB

BIN
cooking/mock/wav7.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.2 KiB

BIN
cooking/mock/wav7.xcf Normal file

Binary file not shown.

BIN
cooking/mock/wav8.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.8 KiB

BIN
cooking/mock/wav8.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

BIN
cooking/mock/wav9.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

BIN
cooking/mock/wav9.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.8 KiB

BIN
python/_magicsquares.so Executable file

Binary file not shown.

View File

@ -0,0 +1 @@
from .base import *

Binary file not shown.

173
python/features/base.py Normal file
View File

@ -0,0 +1,173 @@
# calculate filterbank features. Provides e.g. fbank and mfcc features for use in ASR applications
# Author: James Lyons 2012
import numpy
from features import sigproc
from scipy.fftpack import dct
# make it python3.x compatible
try:
xrange(1)
except:
xrange=range
def mfcc(signal,samplerate=16000,winlen=0.025,winstep=0.01,numcep=13,
nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97,ceplifter=22,appendEnergy=True):
"""Compute MFCC features from an audio signal.
:param signal: the audio signal from which to compute features. Should be an N*1 array
:param samplerate: the samplerate of the signal we are working with.
:param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
:param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
:param numcep: the number of cepstrum to return, default 13
:param nfilt: the number of filters in the filterbank, default 26.
:param nfft: the FFT size. Default is 512.
:param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
:param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
:param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
:param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22.
:param appendEnergy: if this is true, the zeroth cepstral coefficient is replaced with the log of the total frame energy.
:returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector.
"""
feat,energy = fbank(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,preemph)
feat = numpy.log(feat)
feat = dct(feat, type=2, axis=1, norm='ortho')[:,:numcep]
feat = lifter(feat,ceplifter)
if appendEnergy: feat[:,0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy
return feat
def fbank(signal,samplerate=16000,winlen=0.025,winstep=0.01,
nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97):
"""Compute Mel-filterbank energy features from an audio signal.
:param signal: the audio signal from which to compute features. Should be an N*1 array
:param samplerate: the samplerate of the signal we are working with.
:param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
:param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
:param nfilt: the number of filters in the filterbank, default 26.
:param nfft: the FFT size. Default is 512.
:param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
:param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
:param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
:returns: 2 values. The first is a numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector. The
second return value is the energy in each frame (total energy, unwindowed)
"""
highfreq= highfreq or samplerate/2
signal = sigproc.preemphasis(signal,preemph)
frames = sigproc.framesig(signal, winlen*samplerate, winstep*samplerate)
pspec = sigproc.powspec(frames,nfft)
energy = numpy.sum(pspec,1) # this stores the total energy in each frame
energy = numpy.where(energy == 0,numpy.finfo(float).eps,energy) # if energy is zero, we get problems with log
fb = get_filterbanks(nfilt,nfft,samplerate,lowfreq,highfreq)
feat = numpy.dot(pspec,fb.T) # compute the filterbank energies
feat = numpy.where(feat == 0,numpy.finfo(float).eps,feat) # if feat is zero, we get problems with log
return feat,energy
def logfbank(signal,samplerate=16000,winlen=0.025,winstep=0.01,
nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97):
"""Compute log Mel-filterbank energy features from an audio signal.
:param signal: the audio signal from which to compute features. Should be an N*1 array
:param samplerate: the samplerate of the signal we are working with.
:param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
:param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
:param nfilt: the number of filters in the filterbank, default 26.
:param nfft: the FFT size. Default is 512.
:param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
:param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
:param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
:returns: A numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector.
"""
feat,energy = fbank(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,preemph)
return numpy.log(feat)
def ssc(signal,samplerate=16000,winlen=0.025,winstep=0.01,
nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97):
"""Compute Spectral Subband Centroid features from an audio signal.
:param signal: the audio signal from which to compute features. Should be an N*1 array
:param samplerate: the samplerate of the signal we are working with.
:param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
:param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
:param nfilt: the number of filters in the filterbank, default 26.
:param nfft: the FFT size. Default is 512.
:param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
:param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
:param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
:returns: A numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector.
"""
highfreq= highfreq or samplerate/2
signal = sigproc.preemphasis(signal,preemph)
frames = sigproc.framesig(signal, winlen*samplerate, winstep*samplerate)
pspec = sigproc.powspec(frames,nfft)
pspec = numpy.where(pspec == 0,numpy.finfo(float).eps,pspec) # if things are all zeros we get problems
fb = get_filterbanks(nfilt,nfft,samplerate,lowfreq,highfreq)
feat = numpy.dot(pspec,fb.T) # compute the filterbank energies
R = numpy.tile(numpy.linspace(1,samplerate/2,numpy.size(pspec,1)),(numpy.size(pspec,0),1))
return numpy.dot(pspec*R,fb.T) / feat
def hz2mel(hz):
"""Convert a value in Hertz to Mels
:param hz: a value in Hz. This can also be a numpy array, conversion proceeds element-wise.
:returns: a value in Mels. If an array was passed in, an identical sized array is returned.
"""
return 2595 * numpy.log10(1+hz/700.0)
def mel2hz(mel):
"""Convert a value in Mels to Hertz
:param mel: a value in Mels. This can also be a numpy array, conversion proceeds element-wise.
:returns: a value in Hertz. If an array was passed in, an identical sized array is returned.
"""
return 700*(10**(mel/2595.0)-1)
def get_filterbanks(nfilt=20,nfft=512,samplerate=16000,lowfreq=0,highfreq=None):
"""Compute a Mel-filterbank. The filters are stored in the rows, the columns correspond
to fft bins. The filters are returned as an array of size nfilt * (nfft/2 + 1)
:param nfilt: the number of filters in the filterbank, default 20.
:param nfft: the FFT size. Default is 512.
:param samplerate: the samplerate of the signal we are working with. Affects mel spacing.
:param lowfreq: lowest band edge of mel filters, default 0 Hz
:param highfreq: highest band edge of mel filters, default samplerate/2
:returns: A numpy array of size nfilt * (nfft/2 + 1) containing filterbank. Each row holds 1 filter.
"""
highfreq= highfreq or samplerate/2
assert highfreq <= samplerate/2, "highfreq is greater than samplerate/2"
# compute points evenly spaced in mels
lowmel = hz2mel(lowfreq)
highmel = hz2mel(highfreq)
melpoints = numpy.linspace(lowmel,highmel,nfilt+2)
# our points are in Hz, but we use fft bins, so we have to convert
# from Hz to fft bin number
bin = numpy.floor((nfft+1)*mel2hz(melpoints)/samplerate)
fbank = numpy.zeros([nfilt,nfft/2+1])
for j in xrange(0,nfilt):
for i in xrange(int(bin[j]),int(bin[j+1])):
fbank[j,i] = (i - bin[j])/(bin[j+1]-bin[j])
for i in xrange(int(bin[j+1]),int(bin[j+2])):
fbank[j,i] = (bin[j+2]-i)/(bin[j+2]-bin[j+1])
return fbank
def lifter(cepstra,L=22):
"""Apply a cepstral lifter the the matrix of cepstra. This has the effect of increasing the
magnitude of the high frequency DCT coeffs.
:param cepstra: the matrix of mel-cepstra, will be numframes * numcep in size.
:param L: the liftering coefficient to use. Default is 22. L <= 0 disables lifter.
"""
if L > 0:
nframes,ncoeff = numpy.shape(cepstra)
n = numpy.arange(ncoeff)
lift = 1+ (L/2)*numpy.sin(numpy.pi*n/L)
return lift*cepstra
else:
# values of L <= 0, do nothing
return cepstra

BIN
python/features/base.pyc Normal file

Binary file not shown.

113
python/features/sigproc.py Normal file
View File

@ -0,0 +1,113 @@
# This file includes routines for basic signal processing including framing and computing power spectra.
# Author: James Lyons 2012
import numpy
import math
def framesig(sig,frame_len,frame_step,winfunc=lambda x:numpy.ones((1,x))):
"""Frame a signal into overlapping frames.
:param sig: the audio signal to frame.
:param frame_len: length of each frame measured in samples.
:param frame_step: number of samples after the start of the previous frame that the next frame should begin.
:param winfunc: the analysis window to apply to each frame. By default no window is applied.
:returns: an array of frames. Size is NUMFRAMES by frame_len.
"""
slen = len(sig)
frame_len = int(round(frame_len))
frame_step = int(round(frame_step))
if slen <= frame_len:
numframes = 1
else:
numframes = 1 + int(math.ceil((1.0*slen - frame_len)/frame_step))
padlen = int((numframes-1)*frame_step + frame_len)
zeros = numpy.zeros((padlen - slen,))
padsignal = numpy.concatenate((sig,zeros))
indices = numpy.tile(numpy.arange(0,frame_len),(numframes,1)) + numpy.tile(numpy.arange(0,numframes*frame_step,frame_step),(frame_len,1)).T
indices = numpy.array(indices,dtype=numpy.int32)
frames = padsignal[indices]
win = numpy.tile(winfunc(frame_len),(numframes,1))
return frames*win
def deframesig(frames,siglen,frame_len,frame_step,winfunc=lambda x:numpy.ones((1,x))):
"""Does overlap-add procedure to undo the action of framesig.
:param frames: the array of frames.
:param siglen: the length of the desired signal, use 0 if unknown. Output will be truncated to siglen samples.
:param frame_len: length of each frame measured in samples.
:param frame_step: number of samples after the start of the previous frame that the next frame should begin.
:param winfunc: the analysis window to apply to each frame. By default no window is applied.
:returns: a 1-D signal.
"""
frame_len = round(frame_len)
frame_step = round(frame_step)
numframes = numpy.shape(frames)[0]
assert numpy.shape(frames)[1] == frame_len, '"frames" matrix is wrong size, 2nd dim is not equal to frame_len'
indices = numpy.tile(numpy.arange(0,frame_len),(numframes,1)) + numpy.tile(numpy.arange(0,numframes*frame_step,frame_step),(frame_len,1)).T
indices = numpy.array(indices,dtype=numpy.int32)
padlen = (numframes-1)*frame_step + frame_len
if siglen <= 0: siglen = padlen
rec_signal = numpy.zeros((1,padlen))
window_correction = numpy.zeros((1,padlen))
win = winfunc(frame_len)
for i in range(0,numframes):
window_correction[indices[i,:]] = window_correction[indices[i,:]] + win + 1e-15 #add a little bit so it is never zero
rec_signal[indices[i,:]] = rec_signal[indices[i,:]] + frames[i,:]
rec_signal = rec_signal/window_correction
return rec_signal[0:siglen]
def magspec(frames,NFFT):
"""Compute the magnitude spectrum of each frame in frames. If frames is an NxD matrix, output will be NxNFFT.
:param frames: the array of frames. Each row is a frame.
:param NFFT: the FFT length to use. If NFFT > frame_len, the frames are zero-padded.
:returns: If frames is an NxD matrix, output will be NxNFFT. Each row will be the magnitude spectrum of the corresponding frame.
"""
complex_spec = numpy.fft.rfft(frames,NFFT)
return numpy.absolute(complex_spec)
def powspec(frames,NFFT):
"""Compute the power spectrum of each frame in frames. If frames is an NxD matrix, output will be NxNFFT.
:param frames: the array of frames. Each row is a frame.
:param NFFT: the FFT length to use. If NFFT > frame_len, the frames are zero-padded.
:returns: If frames is an NxD matrix, output will be NxNFFT. Each row will be the power spectrum of the corresponding frame.
"""
return 1.0/NFFT * numpy.square(magspec(frames,NFFT))
def logpowspec(frames,NFFT,norm=1):
"""Compute the log power spectrum of each frame in frames. If frames is an NxD matrix, output will be NxNFFT.
:param frames: the array of frames. Each row is a frame.
:param NFFT: the FFT length to use. If NFFT > frame_len, the frames are zero-padded.
:param norm: If norm=1, the log power spectrum is normalised so that the max value (across all frames) is 1.
:returns: If frames is an NxD matrix, output will be NxNFFT. Each row will be the log power spectrum of the corresponding frame.
"""
ps = powspec(frames,NFFT);
ps[ps<=1e-30] = 1e-30
lps = 10*numpy.log10(ps)
if norm:
return lps - numpy.max(lps)
else:
return lps
def preemphasis(signal,coeff=0.95):
"""perform preemphasis on the input signal.
:param signal: The signal to filter.
:param coeff: The preemphasis coefficient. 0 is no filter, default is 0.95.
:returns: the filtered signal.
"""
return numpy.append(signal[0],signal[1:]-coeff*signal[:-1])

BIN
python/features/sigproc.pyc Normal file

Binary file not shown.

438
python/magicsquares.py Normal file
View File

@ -0,0 +1,438 @@
# This file was automatically generated by SWIG (http://www.swig.org).
# Version 2.0.4
#
# Do not make changes to this file unless you know what you are doing--modify
# the SWIG interface file instead.
from sys import version_info
if version_info >= (2,6,0):
def swig_import_helper():
from os.path import dirname
import imp
fp = None
try:
fp, pathname, description = imp.find_module('_magicsquares', [dirname(__file__)])
except ImportError:
import _magicsquares
return _magicsquares
if fp is not None:
try:
_mod = imp.load_module('_magicsquares', fp, pathname, description)
finally:
fp.close()
return _mod
_magicsquares = swig_import_helper()
del swig_import_helper
else:
import _magicsquares
del version_info
try:
_swig_property = property
except NameError:
pass # Python < 2.2 doesn't have 'property'.
def _swig_setattr_nondynamic(self,class_type,name,value,static=1):
if (name == "thisown"): return self.this.own(value)
if (name == "this"):
if type(value).__name__ == 'SwigPyObject':
self.__dict__[name] = value
return
method = class_type.__swig_setmethods__.get(name,None)
if method: return method(self,value)
if (not static):
self.__dict__[name] = value
else:
raise AttributeError("You cannot add attributes to %s" % self)
def _swig_setattr(self,class_type,name,value):
return _swig_setattr_nondynamic(self,class_type,name,value,0)
def _swig_getattr(self,class_type,name):
if (name == "thisown"): return self.this.own()
method = class_type.__swig_getmethods__.get(name,None)
if method: return method(self)
raise AttributeError(name)
def _swig_repr(self):
try: strthis = "proxy of " + self.this.__repr__()
except: strthis = ""
return "<%s.%s; %s >" % (self.__class__.__module__, self.__class__.__name__, strthis,)
try:
_object = object
_newclass = 1
except AttributeError:
class _object : pass
_newclass = 0
class SwigPyIterator(_object):
__swig_setmethods__ = {}
__setattr__ = lambda self, name, value: _swig_setattr(self, SwigPyIterator, name, value)
__swig_getmethods__ = {}
__getattr__ = lambda self, name: _swig_getattr(self, SwigPyIterator, name)
def __init__(self, *args, **kwargs): raise AttributeError("No constructor defined - class is abstract")
__repr__ = _swig_repr
__swig_destroy__ = _magicsquares.delete_SwigPyIterator
__del__ = lambda self : None;
def value(self): return _magicsquares.SwigPyIterator_value(self)
def incr(self, n = 1): return _magicsquares.SwigPyIterator_incr(self, n)
def decr(self, n = 1): return _magicsquares.SwigPyIterator_decr(self, n)
def distance(self, *args): return _magicsquares.SwigPyIterator_distance(self, *args)
def equal(self, *args): return _magicsquares.SwigPyIterator_equal(self, *args)
def copy(self): return _magicsquares.SwigPyIterator_copy(self)
def next(self): return _magicsquares.SwigPyIterator_next(self)
def __next__(self): return _magicsquares.SwigPyIterator___next__(self)
def previous(self): return _magicsquares.SwigPyIterator_previous(self)
def advance(self, *args): return _magicsquares.SwigPyIterator_advance(self, *args)
def __eq__(self, *args): return _magicsquares.SwigPyIterator___eq__(self, *args)
def __ne__(self, *args): return _magicsquares.SwigPyIterator___ne__(self, *args)
def __iadd__(self, *args): return _magicsquares.SwigPyIterator___iadd__(self, *args)
def __isub__(self, *args): return _magicsquares.SwigPyIterator___isub__(self, *args)
def __add__(self, *args): return _magicsquares.SwigPyIterator___add__(self, *args)
def __sub__(self, *args): return _magicsquares.SwigPyIterator___sub__(self, *args)
def __iter__(self): return self
SwigPyIterator_swigregister = _magicsquares.SwigPyIterator_swigregister
SwigPyIterator_swigregister(SwigPyIterator)
class PCA(_object):
__swig_setmethods__ = {}
__setattr__ = lambda self, name, value: _swig_setattr(self, PCA, name, value)
__swig_getmethods__ = {}
__getattr__ = lambda self, name: _swig_getattr(self, PCA, name)
__repr__ = _swig_repr
def __init__(self, *args):
this = _magicsquares.new_PCA(*args)
try: self.this.append(this)
except: self.this = this
__swig_destroy__ = _magicsquares.delete_PCA
__del__ = lambda self : None;
def AddFeature(self, *args): return _magicsquares.PCA_AddFeature(self, *args)
def Calculate(self): return _magicsquares.PCA_Calculate(self)
def Compress(self, *args): return _magicsquares.PCA_Compress(self, *args)
def Mult(self, *args): return _magicsquares.PCA_Mult(self, *args)
def Project(self, *args): return _magicsquares.PCA_Project(self, *args)
def Synth(self, *args): return _magicsquares.PCA_Synth(self, *args)
__swig_getmethods__["RunTests"] = lambda x: _magicsquares.PCA_RunTests
if _newclass:RunTests = staticmethod(_magicsquares.PCA_RunTests)
def GetEigenValues(self): return _magicsquares.PCA_GetEigenValues(self)
def GetEigenTransform(self): return _magicsquares.PCA_GetEigenTransform(self)
def EigenTransform(self): return _magicsquares.PCA_EigenTransform(self)
def GetFeatures(self): return _magicsquares.PCA_GetFeatures(self)
def GetMean(self): return _magicsquares.PCA_GetMean(self)
def GetFeatureSize(self): return _magicsquares.PCA_GetFeatureSize(self)
def GetParamsSize(self): return _magicsquares.PCA_GetParamsSize(self)
def Load(self, *args): return _magicsquares.PCA_Load(self, *args)
def Save(self, *args): return _magicsquares.PCA_Save(self, *args)
PCA_swigregister = _magicsquares.PCA_swigregister
PCA_swigregister(PCA)
def PCA_RunTests():
return _magicsquares.PCA_RunTests()
PCA_RunTests = _magicsquares.PCA_RunTests
class Classifier(_object):
__swig_setmethods__ = {}
__setattr__ = lambda self, name, value: _swig_setattr(self, Classifier, name, value)
__swig_getmethods__ = {}
__getattr__ = lambda self, name: _swig_getattr(self, Classifier, name)
def __init__(self, *args, **kwargs): raise AttributeError("No constructor defined - class is abstract")
__repr__ = _swig_repr
__swig_destroy__ = _magicsquares.delete_Classifier
__del__ = lambda self : None;
def AddFeature(self, *args): return _magicsquares.Classifier_AddFeature(self, *args)
def Classify(self, *args): return _magicsquares.Classifier_Classify(self, *args)
def GroupExists(self, *args): return _magicsquares.Classifier_GroupExists(self, *args)
def GetGroupMean(self, *args): return _magicsquares.Classifier_GetGroupMean(self, *args)
Classifier_swigregister = _magicsquares.Classifier_swigregister
Classifier_swigregister(Classifier)
class LDAClassifier(Classifier):
__swig_setmethods__ = {}
for _s in [Classifier]: __swig_setmethods__.update(getattr(_s,'__swig_setmethods__',{}))
__setattr__ = lambda self, name, value: _swig_setattr(self, LDAClassifier, name, value)
__swig_getmethods__ = {}
for _s in [Classifier]: __swig_getmethods__.update(getattr(_s,'__swig_getmethods__',{}))
__getattr__ = lambda self, name: _swig_getattr(self, LDAClassifier, name)
def __init__(self, *args, **kwargs): raise AttributeError("No constructor defined - class is abstract")
__repr__ = _swig_repr
__swig_destroy__ = _magicsquares.delete_LDAClassifier
__del__ = lambda self : None;
def Classify(self, *args): return _magicsquares.LDAClassifier_Classify(self, *args)
LDAClassifier_swigregister = _magicsquares.LDAClassifier_swigregister
LDAClassifier_swigregister(LDAClassifier)
def matrix_inverse(*args):
return _magicsquares.matrix_inverse(*args)
matrix_inverse = _magicsquares.matrix_inverse
class Image(_object):
__swig_setmethods__ = {}
__setattr__ = lambda self, name, value: _swig_setattr(self, Image, name, value)
__swig_getmethods__ = {}
__getattr__ = lambda self, name: _swig_getattr(self, Image, name)
__repr__ = _swig_repr
def __init__(self, *args):
this = _magicsquares.new_Image(*args)
try: self.this.append(this)
except: self.this = this
__swig_destroy__ = _magicsquares.delete_Image
__del__ = lambda self : None;
def Clear(self): return _magicsquares.Image_Clear(self)
def Get(self, *args): return _magicsquares.Image_Get(self, *args)
def __sub__(self, *args): return _magicsquares.Image___sub__(self, *args)
def __add__(self, *args): return _magicsquares.Image___add__(self, *args)
def PrintInfo(self): return _magicsquares.Image_PrintInfo(self)
def Crop(self, *args): return _magicsquares.Image_Crop(self, *args)
def Scale(self, *args): return _magicsquares.Image_Scale(self, *args)
def SubImage(self, *args): return _magicsquares.Image_SubImage(self, *args)
def Blit(self, *args): return _magicsquares.Image_Blit(self, *args)
def SSD(self, *args): return _magicsquares.Image_SSD(self, *args)
def SubMean(self): return _magicsquares.Image_SubMean(self)
def LBP(self): return _magicsquares.Image_LBP(self)
def GRAY2RGB(self): return _magicsquares.Image_GRAY2RGB(self)
def RGB2GRAY(self): return _magicsquares.Image_RGB2GRAY(self)
def BayerGB2RGB(self): return _magicsquares.Image_BayerGB2RGB(self)
def Hist(self, *args): return _magicsquares.Image_Hist(self, *args)
def ToFloatVector(self): return _magicsquares.Image_ToFloatVector(self)
def NumElements(self): return _magicsquares.Image_NumElements(self)
def Save(self, *args): return _magicsquares.Image_Save(self, *args)
def GetBB(self, *args): return _magicsquares.Image_GetBB(self, *args)
__swig_setmethods__["m_Image"] = _magicsquares.Image_m_Image_set
__swig_getmethods__["m_Image"] = _magicsquares.Image_m_Image_get
if _newclass:m_Image = _swig_property(_magicsquares.Image_m_Image_get, _magicsquares.Image_m_Image_set)
Image_swigregister = _magicsquares.Image_swigregister
Image_swigregister(Image)
class PCAClassifier(Classifier):
__swig_setmethods__ = {}
for _s in [Classifier]: __swig_setmethods__.update(getattr(_s,'__swig_setmethods__',{}))
__setattr__ = lambda self, name, value: _swig_setattr(self, PCAClassifier, name, value)
__swig_getmethods__ = {}
for _s in [Classifier]: __swig_getmethods__.update(getattr(_s,'__swig_getmethods__',{}))
__getattr__ = lambda self, name: _swig_getattr(self, PCAClassifier, name)
__repr__ = _swig_repr
def __init__(self, *args):
this = _magicsquares.new_PCAClassifier(*args)
try: self.this.append(this)
except: self.this = this
__swig_destroy__ = _magicsquares.delete_PCAClassifier
__del__ = lambda self : None;
def AddFeature(self, *args): return _magicsquares.PCAClassifier_AddFeature(self, *args)
def Classify(self, *args): return _magicsquares.PCAClassifier_Classify(self, *args)
def GetPCA(self): return _magicsquares.PCAClassifier_GetPCA(self)
PCAClassifier_swigregister = _magicsquares.PCAClassifier_swigregister
PCAClassifier_swigregister(PCAClassifier)
def dsvd(*args):
return _magicsquares.dsvd(*args)
dsvd = _magicsquares.dsvd
def SVD(*args):
return _magicsquares.SVD(*args)
SVD = _magicsquares.SVD
def OpenFile(*args):
return _magicsquares.OpenFile(*args)
OpenFile = _magicsquares.OpenFile
def CloseFile(*args):
return _magicsquares.CloseFile(*args)
CloseFile = _magicsquares.CloseFile
def Glob(*args):
return _magicsquares.Glob(*args)
Glob = _magicsquares.Glob
class Rect(_object):
__swig_setmethods__ = {}
__setattr__ = lambda self, name, value: _swig_setattr(self, Rect, name, value)
__swig_getmethods__ = {}
__getattr__ = lambda self, name: _swig_getattr(self, Rect, name)
__repr__ = _swig_repr
def __init__(self, *args):
this = _magicsquares.new_Rect(*args)
try: self.this.append(this)
except: self.this = this
__swig_destroy__ = _magicsquares.delete_Rect
__del__ = lambda self : None;
__swig_setmethods__["x"] = _magicsquares.Rect_x_set
__swig_getmethods__["x"] = _magicsquares.Rect_x_get
if _newclass:x = _swig_property(_magicsquares.Rect_x_get, _magicsquares.Rect_x_set)
__swig_setmethods__["y"] = _magicsquares.Rect_y_set
__swig_getmethods__["y"] = _magicsquares.Rect_y_get
if _newclass:y = _swig_property(_magicsquares.Rect_y_get, _magicsquares.Rect_y_set)
__swig_setmethods__["w"] = _magicsquares.Rect_w_set
__swig_getmethods__["w"] = _magicsquares.Rect_w_get
if _newclass:w = _swig_property(_magicsquares.Rect_w_get, _magicsquares.Rect_w_set)
__swig_setmethods__["h"] = _magicsquares.Rect_h_set
__swig_getmethods__["h"] = _magicsquares.Rect_h_get
if _newclass:h = _swig_property(_magicsquares.Rect_h_get, _magicsquares.Rect_h_set)
Rect_swigregister = _magicsquares.Rect_swigregister
Rect_swigregister(Rect)
class FaceFinder(_object):
__swig_setmethods__ = {}
__setattr__ = lambda self, name, value: _swig_setattr(self, FaceFinder, name, value)
__swig_getmethods__ = {}
__getattr__ = lambda self, name: _swig_getattr(self, FaceFinder, name)
__repr__ = _swig_repr
def __init__(self):
this = _magicsquares.new_FaceFinder()
try: self.this.append(this)
except: self.this = this
__swig_destroy__ = _magicsquares.delete_FaceFinder
__del__ = lambda self : None;
def Find(self, *args): return _magicsquares.FaceFinder_Find(self, *args)
FaceFinder_swigregister = _magicsquares.FaceFinder_swigregister
FaceFinder_swigregister(FaceFinder)
class FloatVector(_object):
__swig_setmethods__ = {}
__setattr__ = lambda self, name, value: _swig_setattr(self, FloatVector, name, value)
__swig_getmethods__ = {}
__getattr__ = lambda self, name: _swig_getattr(self, FloatVector, name)
__repr__ = _swig_repr
__swig_destroy__ = _magicsquares.delete_FloatVector
__del__ = lambda self : None;
def __init__(self, *args):
this = _magicsquares.new_FloatVector(*args)
try: self.this.append(this)
except: self.this = this
def Size(self): return _magicsquares.FloatVector_Size(self)
def GetRawData(self): return _magicsquares.FloatVector_GetRawData(self)
def GetRawDataConst(self): return _magicsquares.FloatVector_GetRawDataConst(self)
def Print(self): return _magicsquares.FloatVector_Print(self)
def SetAll(self, *args): return _magicsquares.FloatVector_SetAll(self, *args)
def Zero(self): return _magicsquares.FloatVector_Zero(self)
def IsInf(self): return _magicsquares.FloatVector_IsInf(self)
def Mean(self): return _magicsquares.FloatVector_Mean(self)
def DistanceFrom(self, *args): return _magicsquares.FloatVector_DistanceFrom(self, *args)
def Magnitude(self): return _magicsquares.FloatVector_Magnitude(self)
def Dot(self, *args): return _magicsquares.FloatVector_Dot(self, *args)
def Normalised(self): return _magicsquares.FloatVector_Normalised(self)
def __add__(self, *args): return _magicsquares.FloatVector___add__(self, *args)
def __sub__(self, *args): return _magicsquares.FloatVector___sub__(self, *args)
def __mul__(self, *args): return _magicsquares.FloatVector___mul__(self, *args)
def __div__(self, *args): return _magicsquares.FloatVector___div__(self, *args)
def __iadd__(self, *args): return _magicsquares.FloatVector___iadd__(self, *args)
def __isub__(self, *args): return _magicsquares.FloatVector___isub__(self, *args)
def __imul__(self, *args): return _magicsquares.FloatVector___imul__(self, *args)
def __idiv__(self, *args): return _magicsquares.FloatVector___idiv__(self, *args)
def Save(self, *args): return _magicsquares.FloatVector_Save(self, *args)
def Load(self, *args): return _magicsquares.FloatVector_Load(self, *args)
__swig_getmethods__["RunTests"] = lambda x: _magicsquares.FloatVector_RunTests
if _newclass:RunTests = staticmethod(_magicsquares.FloatVector_RunTests)
def __getitem__(self, *args): return _magicsquares.FloatVector___getitem__(self, *args)
def __setitem__(self, *args): return _magicsquares.FloatVector___setitem__(self, *args)
FloatVector_swigregister = _magicsquares.FloatVector_swigregister
FloatVector_swigregister(FloatVector)
def FloatVector_RunTests():
return _magicsquares.FloatVector_RunTests()
FloatVector_RunTests = _magicsquares.FloatVector_RunTests
class FloatMatrix(_object):
__swig_setmethods__ = {}
__setattr__ = lambda self, name, value: _swig_setattr(self, FloatMatrix, name, value)
__swig_getmethods__ = {}
__getattr__ = lambda self, name: _swig_getattr(self, FloatMatrix, name)
__repr__ = _swig_repr
__swig_destroy__ = _magicsquares.delete_FloatMatrix
__del__ = lambda self : None;
def __init__(self, *args):
this = _magicsquares.new_FloatMatrix(*args)
try: self.this.append(this)
except: self.this = this
def GetRows(self): return _magicsquares.FloatMatrix_GetRows(self)
def GetCols(self): return _magicsquares.FloatMatrix_GetCols(self)
def GetRawData(self): return _magicsquares.FloatMatrix_GetRawData(self)
def GetRawDataConst(self): return _magicsquares.FloatMatrix_GetRawDataConst(self)
def GetRowVector(self, *args): return _magicsquares.FloatMatrix_GetRowVector(self, *args)
def GetColVector(self, *args): return _magicsquares.FloatMatrix_GetColVector(self, *args)
def SetRowVector(self, *args): return _magicsquares.FloatMatrix_SetRowVector(self, *args)
def SetColVector(self, *args): return _magicsquares.FloatMatrix_SetColVector(self, *args)
def NormaliseRows(self): return _magicsquares.FloatMatrix_NormaliseRows(self)
def NormaliseCols(self): return _magicsquares.FloatMatrix_NormaliseCols(self)
def Print(self): return _magicsquares.FloatMatrix_Print(self)
def SetAll(self, *args): return _magicsquares.FloatMatrix_SetAll(self, *args)
def Zero(self): return _magicsquares.FloatMatrix_Zero(self)
def IsInf(self): return _magicsquares.FloatMatrix_IsInf(self)
def Transposed(self): return _magicsquares.FloatMatrix_Transposed(self)
def Inverted(self): return _magicsquares.FloatMatrix_Inverted(self)
def __add__(self, *args): return _magicsquares.FloatMatrix___add__(self, *args)
def __sub__(self, *args): return _magicsquares.FloatMatrix___sub__(self, *args)
def __mul__(self, *args): return _magicsquares.FloatMatrix___mul__(self, *args)
def VecMulTransposed(self, *args): return _magicsquares.FloatMatrix_VecMulTransposed(self, *args)
def __iadd__(self, *args): return _magicsquares.FloatMatrix___iadd__(self, *args)
def __isub__(self, *args): return _magicsquares.FloatMatrix___isub__(self, *args)
def __imul__(self, *args): return _magicsquares.FloatMatrix___imul__(self, *args)
def __eq__(self, *args): return _magicsquares.FloatMatrix___eq__(self, *args)
def SortRows(self, *args): return _magicsquares.FloatMatrix_SortRows(self, *args)
def SortCols(self, *args): return _magicsquares.FloatMatrix_SortCols(self, *args)
def CropRows(self, *args): return _magicsquares.FloatMatrix_CropRows(self, *args)
def CropCols(self, *args): return _magicsquares.FloatMatrix_CropCols(self, *args)
def Save(self, *args): return _magicsquares.FloatMatrix_Save(self, *args)
def Load(self, *args): return _magicsquares.FloatMatrix_Load(self, *args)
__swig_getmethods__["RunTests"] = lambda x: _magicsquares.FloatMatrix_RunTests
if _newclass:RunTests = staticmethod(_magicsquares.FloatMatrix_RunTests)
def Get(self, *args): return _magicsquares.FloatMatrix_Get(self, *args)
def Set(self, *args): return _magicsquares.FloatMatrix_Set(self, *args)
FloatMatrix_swigregister = _magicsquares.FloatMatrix_swigregister
FloatMatrix_swigregister(FloatMatrix)
def FloatMatrix_RunTests():
return _magicsquares.FloatMatrix_RunTests()
FloatMatrix_RunTests = _magicsquares.FloatMatrix_RunTests
class RectVector(_object):
__swig_setmethods__ = {}
__setattr__ = lambda self, name, value: _swig_setattr(self, RectVector, name, value)
__swig_getmethods__ = {}
__getattr__ = lambda self, name: _swig_getattr(self, RectVector, name)
__repr__ = _swig_repr
def iterator(self): return _magicsquares.RectVector_iterator(self)
def __iter__(self): return self.iterator()
def __nonzero__(self): return _magicsquares.RectVector___nonzero__(self)
def __bool__(self): return _magicsquares.RectVector___bool__(self)
def __len__(self): return _magicsquares.RectVector___len__(self)
def pop(self): return _magicsquares.RectVector_pop(self)
def __getslice__(self, *args): return _magicsquares.RectVector___getslice__(self, *args)
def __setslice__(self, *args): return _magicsquares.RectVector___setslice__(self, *args)
def __delslice__(self, *args): return _magicsquares.RectVector___delslice__(self, *args)
def __delitem__(self, *args): return _magicsquares.RectVector___delitem__(self, *args)
def __getitem__(self, *args): return _magicsquares.RectVector___getitem__(self, *args)
def __setitem__(self, *args): return _magicsquares.RectVector___setitem__(self, *args)
def append(self, *args): return _magicsquares.RectVector_append(self, *args)
def empty(self): return _magicsquares.RectVector_empty(self)
def size(self): return _magicsquares.RectVector_size(self)
def clear(self): return _magicsquares.RectVector_clear(self)
def swap(self, *args): return _magicsquares.RectVector_swap(self, *args)
def get_allocator(self): return _magicsquares.RectVector_get_allocator(self)
def begin(self): return _magicsquares.RectVector_begin(self)
def end(self): return _magicsquares.RectVector_end(self)
def rbegin(self): return _magicsquares.RectVector_rbegin(self)
def rend(self): return _magicsquares.RectVector_rend(self)
def pop_back(self): return _magicsquares.RectVector_pop_back(self)
def erase(self, *args): return _magicsquares.RectVector_erase(self, *args)
def __init__(self, *args):
this = _magicsquares.new_RectVector(*args)
try: self.this.append(this)
except: self.this = this
def push_back(self, *args): return _magicsquares.RectVector_push_back(self, *args)
def front(self): return _magicsquares.RectVector_front(self)
def back(self): return _magicsquares.RectVector_back(self)
def assign(self, *args): return _magicsquares.RectVector_assign(self, *args)
def resize(self, *args): return _magicsquares.RectVector_resize(self, *args)
def insert(self, *args): return _magicsquares.RectVector_insert(self, *args)
def reserve(self, *args): return _magicsquares.RectVector_reserve(self, *args)
def capacity(self): return _magicsquares.RectVector_capacity(self)
__swig_destroy__ = _magicsquares.delete_RectVector
__del__ = lambda self : None;
RectVector_swigregister = _magicsquares.RectVector_swigregister
RectVector_swigregister(RectVector)
# This file is compatible with both classic and new-style classes.

BIN
python/magicsquares.pyc Normal file

Binary file not shown.

324
python/samplebrain Executable file
View File

@ -0,0 +1,324 @@
#!/usr/bin/env python
# aggregate sound from mfcc or fft similarity of chunks
import numpy as np
import scipy.io.wavfile
from features import mfcc
from features import logfbank
from features import base
import copy
import os
import platform;
if int(platform.python_version_tuple()[0])>2:
from tkinter import *
from tkinter.filedialog import *
from tkinter.messagebox import *
else:
from Tkinter import *
from tkFileDialog import *
from tkMessageBox import *
source_dir = "../sound/source/"
render_dir = "../sound/render/"
version = "0.0.2"
def msg(msg):
print(msg)
def clear_msg():
pass
def render_stats():
pass
def fadeinout(s,slength,elength):
s = copy.deepcopy(s)
for i in range(0,slength):
m = float(i)/slength;
s[i]*=m
for i in range(0,elength):
m = float(i)/elength;
s[(len(s)-1)-i]*=m
return s
def normalise(s):
m = 0
p = 999999999999999999
for i in range(0,len(s)):
if m<s[i]: m=s[i]
if p>s[i]: p=s[i]
b = max(m,-p)
if b>0:
s/=float(b/10000.0)
return s
def chop(wav,size,overlap,rand,norm):
ret = []
pos = 0
seg = []
samples = wav[1]
while (pos+size<len(samples)):
if norm:
ret.append([0,fadeinout(normalise(samples[pos:pos+size]),500,500)])
else:
ret.append([0,fadeinout(samples[pos:pos+size],500,500)])
pos+=(size-overlap)
return ret
def fftify(chopped):
return map(lambda i: np.fft.fft(i[1]), chopped)
def mfccify(chopped,rate):
ret = []
for sig in chopped:
ret.append(logfbank(sig[1],rate))
return ret
def fftdiff(a,b):
return (abs(a-b)).sum(dtype=np.float128)
def diffify(a,b):
return map(lambda a,b: fftdiff(a,b), a, b)
def search(fft,bank):
closest = 99999999999999999
ret = -1
for i,tfft in enumerate(bank):
dist = fftdiff(fft,tfft)
if dist<closest:
ret = i
closest = dist
msg(str(ret))
return ret
#unit_test()
class transponge():
def __init__(self):
self.src=[]
self.dst=[0,[]]
self.src_chp=[]
self.dst_chp=[]
self.src_fft=[]
self.dst_fft=[]
def set_target(self,dst_filename):
self.dst = scipy.io.wavfile.read(dst_filename)
msg("succesfully loaded "+dst_filename+" as the target")
def add_source(self,src_filename):
ret = [src_filename,scipy.io.wavfile.read(src_filename)]
self.src.append(ret)
msg("succesfully loaded "+src_filename+" as a source")
msg("(now have "+str(len(self.src))+" sounds in brain...)")
return ret
def delete_source(self,src_filename):
self.src = filter(lambda i: i[0]!=src_filename, self.src)
def prepare(self,chp_size,chp_overlap,mfcc,norm):
self.chp_size = chp_size
self.chp_overlap = chp_overlap
self.src_chp=[]
self.dst_chp=[]
self.src_fft=[]
self.dst_fft=[]
msg("chopping up target wav...")
self.dst_chp = chop(self.dst,self.chp_size,self.chp_overlap,0,norm)
msg("number of target blocks: "+str(len(self.dst_chp)))
if mfcc:
self.dst_fft = mfccify(self.dst_chp,self.dst[0])
else:
self.dst_fft = fftify(self.dst_chp)
render_stats()
self.dst_chp = [] # clear
self.dst_size = len(self.dst[1])
for i,src in enumerate(self.src):
name = src[0]
src = src[1]
chopped=chop(src,self.chp_size,self.chp_overlap,0,norm)
msg(name+" has "+str(len(chopped))+" blocks")
self.src_chp+=chopped
if mfcc:
self.src_fft+=mfccify(chopped,src[0])
else:
self.src_fft+=fftify(chopped)
render_stats()
def process(self,filename):
out = np.zeros(self.dst_size,dtype=self.src_chp[0][1].dtype)
pos = 0
for i,seg in enumerate(self.dst_fft):
# collect indices of closest sections
ii = search(seg,self.src_fft)
clear_msg()
for s in range(0,self.chp_size):
if pos+s<self.dst_size:
sample = self.src_chp[ii][1][s]
out[pos+s]=out[pos+s]+(sample*0.25)
pos+=(self.chp_size-self.chp_overlap)
msg(str((i/float(len(self.dst_fft)))*100.0)[:5]+"%")
if i%10==0: scipy.io.wavfile.write(filename,44100,out)
class win:
def __init__(self):
self.sponge = transponge()
# create window
self.root = Tk()
self.root.title("sample brain "+version)
top = Frame(self.root)
top.pack(fill=BOTH)
f=Frame(top)
f.pack(side=LEFT,fill=NONE);
Label(f,text="statistix (last run)").pack()
self.stats_brain_blocks = StringVar()
Label(f,textvariable=self.stats_brain_blocks).pack()
self.stats_brain_blocks.set("brain blocks: not run yet...")
self.stats_target_blocks = StringVar()
Label(f,textvariable=self.stats_target_blocks).pack()
self.stats_target_blocks.set("target blocks: not run yet...")
f=Frame(top)
f.pack(side=LEFT);
Button(f, text="add to brain", command=self.load_source).grid(row=0, column=0, sticky="we")
Button(f, text="set the target", command=self.load_target).grid(row=0, column=1, sticky="we")
Button(f, text="save as", command=self.save_as).grid(row=0, column=2, sticky="we")
self.output_filename = "brain_out.wav"
self.run_button = Button(f, text="run", command=self.run)
self.run_button.grid(row=0, column=3, sticky="we")
self.mfcc_var = IntVar()
cb=Checkbutton(f, text="use mfcc", variable=self.mfcc_var, command=self.mfcc)
cb.grid(row=1, column=0)
cb.select()
self.norm_var = IntVar()
cb=Checkbutton(f, text="normalise", variable=self.norm_var, command=self.norm)
cb.grid(row=1, column=1)
rf = Frame(f)
rf.grid(row=1, column=3)
Label(rf, text="overlap").grid(row=0,column=0)
self.overlap_entry = Entry(rf, width=5)
self.overlap_entry.grid(row=1, column=0)
self.overlap_entry.delete(0, END)
self.overlap_entry.insert(0, "0.75")
rf = Frame(f)
rf.grid(row=1, column=4)
Label(rf, text="window size").grid(row=0,column=0)
self.window_entry = Entry(rf, width=5)
self.window_entry.grid(row=1, column=0)
self.window_entry.delete(0, END)
self.window_entry.insert(0, "3000")
self.target_name = StringVar()
Label(top,textvariable=self.target_name).pack()
self.target_name.set("no target yet...")
Label(top,text="brain contents:").pack()
self.samples=Frame(top)
self.samples.pack(fill=NONE,side=RIGHT);
self.debug = Text(self.root, font = "Helvetica 24 bold", height=10, width=60)
self.debug.pack()
self.debug.insert(END, "ready...\n")
#Label(lf, text="Branch length").grid(row=0,column=0)
#self.length_scale = Scale(lf, from_=0, to=100, orient=HORIZONTAL, command=self.on_length)
#self.length_scale.grid(row=1, column=0)
#self.length_scale.set(30)
#t.grid_rowconfigure(1,weight=0)
# start event loop
def build_sample_gui(self,name,size):
f = Frame(self.samples)
f.pack()
Button(f, text="x", command=lambda: self.delete_sample(name)).pack(side=LEFT)
Label(f, text=os.path.basename(name)+" ("+str(size)[:5]+" secs)").pack(side=RIGHT)
def delete_sample(self,name):
self.sponge.delete_source(name)
msg("deleted "+name+" from my brain")
for widget in self.samples.winfo_children():
widget.destroy()
for i in self.sponge.src:
self.build_sample_gui(i[0],len(i[1][1])/float(i[1][0]))
def msg(self,msg):
self.debug.insert(0.0, msg+"\n")
self.root.update()
def clear_msg(self):
self.debug.delete(0.0, END)
self.root.update()
def load_target(self):
filename = askopenfilename(title = "load target wav")
if filename!="":
self.sponge.set_target(filename)
self.target_name.set("target: "+os.path.basename(filename))
def load_source(self):
filename = askopenfilename(title = "load source wav into brain")
if filename!="":
i=self.sponge.add_source(filename)
self.build_sample_gui(i[0],len(i[1][1])/float(i[1][0]))
def save_as(self):
filename = asksaveasfilename(title = "set the output wav")
if filename!="":
self.output_filename = filename
def render_stats(self):
self.stats_brain_blocks.set("brain blocks: "+str(len(self.sponge.src_chp)))
self.stats_target_blocks.set("target blocks: "+str(len(self.sponge.dst_fft)))
def run(self):
window_size = float(self.window_entry.get())
overlap = float(self.overlap_entry.get())
if self.norm_var.get()==1:
msg("normalising: on")
else:
msg("normalising: off")
self.sponge.prepare(int(window_size),int(window_size*overlap),
self.mfcc_var.get()==1,
self.norm_var.get()==1)
msg("processing...")
self.sponge.process(self.output_filename)
msg("done, saved in "+self.output_filename)
def mfcc(self):
pass
def norm(self):
pass
w = win()
msg = w.msg
clear_msg = w.clear_msg
render_stats = w.render_stats
try:
w.root.mainloop()
except Exception,e:
msg(e)

198
python/transponge-eigen.py Normal file
View File

@ -0,0 +1,198 @@
# wierd eigensound approach - project into learned pca space
# fft -> matrix -> ifft -> sound
from magicsquares import *
import numpy as np
import scipy.io.wavfile
import math
source_dir = "../sound/source/"
render_dir = "../sound/render/"
def fadeinout(s,slength,elength):
for i in range(0,slength):
m = float(i)/slength;
s[i]*=m
for i in range(0,elength):
m = float(i)/elength;
s[(len(s)-1)-i]*=m
return s
def normalise(s):
m = 0
p = 999999999999999999
for i in range(0,len(s)):
if m<s[i]: m=s[i]
if p>s[i]: p=s[i]
b = max(m,-p)
if b>0:
s/=float(b/10000.0)
return s
def arr_to_fv(a):
vp = FloatVector(len(a))
for i in range(0,vp.Size()):
vp[i]=float(a[i])
return vp
def fv_to_arr(a):
ret = np.zeros(a.Size(),dtype=np.float32)
for i in range(0,a.Size()):
ret[i]=a[i]
return ret
def build_pca(filename,s):
pca = PCA(len(s[0]))
for i,v in enumerate(s):
print(i/float(len(s))*100.0)
pca.AddFeature(arr_to_fv(v))
print("making eigenmatrix")
pca.Calculate()
print("saving eigenmatrix")
f = OpenFile(filename, "wb")
pca.Save(f)
CloseFile(f)
return pca
def load_pca(filename):
pca = PCA(1)
f = OpenFile(filename, "rb")
pca.Load(f)
CloseFile(f)
pca.Compress(2,40)
return pca
def synth_sound(s,pca):
p = pca.Project(arr_to_fv(s))
#for i in range(0,p.Size()):
# if i<50 and i>5: p[i]=0
# if i>100: p[i]=0
s = pca.Synth(p)
return s
def project_eigen_sound(pca,row,gain):
return fv_to_arr(pca.GetEigenTransform().GetRowVector(row)*gain)
def fftify(chopped):
return map(lambda i: np.array(np.fft.fft(i),dtype=np.float32), chopped)
def chop(wav,size,overlap):
ret = []
pos = 0
seg = []
samples = wav[1]
while (pos+size<len(samples)):
ret.append(samples[pos:pos+size]) # 50,100
pos+=(size-overlap)
return ret
class transponge():
def __init__(self,chp_size,chp_overlap,dst_filename):
dst = scipy.io.wavfile.read(dst_filename)
self.src_chp=[]
self.src_fft=[]
self.chp_size = chp_size
self.chp_overlap = chp_overlap
self.dst_chp = chop(dst,self.chp_size,self.chp_overlap)
self.dst_fft = fftify(self.dst_chp)
print(self.chp_overlap)
print (len(self.dst_chp))
self.dst_size = len(dst[1])
def add(self,src_filename):
src = scipy.io.wavfile.read(src_filename)
src_chp=chop(src,self.chp_size,self.chp_overlap)
self.src_fft+=fftify(src_chp)
self.src_chp+=src_chp
print("src now: "+str(len(self.src_chp)))
def learn(self,filename):
scipy.io.wavfile.write(render_dir+"input.wav",44100,np.concatenate(self.src_fft))
self.pca = build_pca(filename,self.src_fft)
def load(self,filename):
self.pca = load_pca(filename)
def process(self):
out = np.zeros(self.dst_size,dtype=np.int16)
pos = 0
for i,seg in enumerate(self.dst_fft):
fft = fv_to_arr(synth_sound(seg,self.pca))
resynth = fadeinout(np.fft.ifft(fft),50,50)
print(resynth[55])
for s in range(0,self.chp_size):
if pos+s<self.dst_size:
out[pos+s]+=resynth[s]
pos+=(self.chp_size-self.chp_overlap)
print((i/float(len(self.dst_chp)))*100.0)
if i%10==0: scipy.io.wavfile.write(render_dir+"pca-out-amen-wierd.wav",44100,out)
def process_eigensound(self,row):
print self.pca.GetEigenValues()[0]
out = []
for row in range(0,50):
for i in range(0,10):
#gain = (i/float(100)-0.5)*2
#gain = 1/self.pca.GetEigenValues()[row]
gain =100000
#print("gain:"+str(gain))
n = project_eigen_sound(self.pca,row,gain)
pcm = np.fft.ifft(n).astype(np.float32)
pcm*=3
print(pcm[0])
out.append(pcm)
scipy.io.wavfile.write(render_dir+"eigensound.wav",44100,np.concatenate(out).astype(np.int16))
def run(l):
t = transponge(l,int(l*0.75),source_dir+"pw2.wav")
# t.add(source_dir+"full.wav")
# t.add(source_dir+"808.wav")
# t.add(source_dir+"amen_brother.wav")
# t.add(source_dir+"sailingbybit.wav")
# t.add(source_dir+"dreambit.wav")
#t.add(source_dir+"pw2.wav")
#print("learning")
#t.learn("amen-fft.pca")
#print("loading")
#t.load("sailingby-fft.pca")
# t.load("acid-fft.pca")
#t.load("full-fft.pca")
# t.load("808-fft.pca")
t.load("amen-fft.pca")
#t.process_eigensound(3)
# print("processing")
t.process()
run(512)
'''import matplotlib.pyplot as plt
sound = scipy.io.wavfile.read(source_dir+"808.wav")[1]
fourier = np.fft.fft(sound)
n = len(sound)
timestep = 0.01
freq = np.fft.fftfreq(n, d=timestep)
freq = freq.astype(np.float32)
for i in range(0,1000):
print(freq[i])
scipy.io.wavfile.write(render_dir+"fft.wav", 44100, freq)
'''

261
python/transponge-gui.py Normal file
View File

@ -0,0 +1,261 @@
# aggregate sound from mfcc or fft similarity of chunks
import numpy as np
import scipy.io.wavfile
from features import mfcc
from features import logfbank
from features import base
import copy
import platform;
if int(platform.python_version_tuple()[0])>2:
from tkinter import *
from tkinter.filedialog import *
from tkinter.messagebox import *
else:
from Tkinter import *
from tkFileDialog import *
from tkMessageBox import *
source_dir = "../sound/source/"
render_dir = "../sound/render/"
version = "0.0.1"
def msg(msg):
print(msg)
def clear_msg():
pass
def fadeinout(s,slength,elength):
s = copy.deepcopy(s)
for i in range(0,slength):
m = float(i)/slength;
s[i]*=m
for i in range(0,elength):
m = float(i)/elength;
s[(len(s)-1)-i]*=m
return s
def normalise(s):
m = 0
p = 999999999999999999
for i in range(0,len(s)):
if m<s[i]: m=s[i]
if p>s[i]: p=s[i]
b = max(m,-p)
if b>0:
s/=float(b/10000.0)
return s
def chop(wav,size,overlap,rand):
ret = []
pos = 0
seg = []
samples = wav[1]
while (pos+size<len(samples)):
ret.append([0,fadeinout(samples[pos:pos+size],500,500)])
pos+=(size-overlap)
return ret
def fftify(chopped):
return map(lambda i: np.fft.fft(i[1]), chopped)
def mfccify(chopped,rate):
ret = []
for sig in chopped:
ret.append(logfbank(sig[1],rate))
return ret
def fftdiff(a,b):
return (abs(a-b)).sum(dtype=np.float128)
def diffify(a,b):
return map(lambda a,b: fftdiff(a,b), a, b)
def search(fft,bank):
closest = 99999999999999999
ret = -1
for i,tfft in enumerate(bank):
dist = fftdiff(fft,tfft)
if dist<closest:
ret = i
closest = dist
msg(str(ret))
return ret
#unit_test()
class transponge():
def __init__(self):
self.src=[]
self.dst=[0,[]]
self.src_chp=[]
self.dst_chp=[]
self.src_fft=[]
self.dst_fft=[]
def set_target(self,dst_filename):
self.dst = scipy.io.wavfile.read(dst_filename)
msg("succesfully loaded "+dst_filename+" as the target")
def add_source(self,src_filename):
self.src.append(scipy.io.wavfile.read(src_filename))
msg("succesfully loaded "+src_filename+" as a source")
msg("(now have "+str(len(self.src))+" sounds in brain...)")
def prepare(self,chp_size,chp_overlap,mfcc):
self.chp_size = chp_size
self.chp_overlap = chp_overlap
msg("chopping up target wav...")
self.dst_chp = chop(self.dst,self.chp_size,self.chp_overlap,0)
msg("number of target blocks: "+str(len(self.dst_chp)))
msg("processing target blocks")
if mfcc:
self.dst_fft = mfccify(self.dst_chp,self.dst[0])
else:
self.dst_fft = fftify(self.dst_chp)
self.dst_chp = [] # clear
self.dst_size = len(self.dst[1])
for i,src in enumerate(self.src):
msg("chopping up source wav no. "+str(i+1))
chopped=chop(src,self.chp_size,self.chp_overlap,0)
msg("number of target blocks: "+str(len(chopped)))
self.src_chp+=chopped
if mfcc:
self.src_fft+=mfccify(chopped,src[0])
else:
self.src_fft+=fftify(chopped)
msg("total number of source blocks now: "+str(len(self.dst_fft)))
def process(self,filename):
out = np.zeros(self.dst_size,dtype=self.src_chp[0][1].dtype)
pos = 0
for i,seg in enumerate(self.dst_fft):
# collect indices of closest sections
ii = search(seg,self.src_fft)
clear_msg()
for s in range(0,self.chp_size):
if pos+s<self.dst_size:
sample = self.src_chp[ii][1][s]
out[pos+s]=out[pos+s]+(sample*0.25)
pos+=(self.chp_size-self.chp_overlap)
msg(str((i/float(len(self.dst_fft)))*100.0)[:5]+"%")
if i%10==0: scipy.io.wavfile.write(render_dir+filename,44100,out)
class win:
def __init__(self):
self.sponge = transponge()
# create window
self.root = Tk()
self.root.title("transpongerator brain "+version)
f=Frame(self.root)
f.pack(fill=NONE);
Button(f, text="load a source", command=self.load_source).grid(row=0, column=0, sticky="we")
Button(f, text="load the target", command=self.load_target).grid(row=0, column=1, sticky="we")
self.run_button = Button(f, text="run", command=self.run)
self.run_button.grid(row=0, column=2, sticky="we")
self.mfcc_var = IntVar()
cb=Checkbutton(f, text="use mfcc", variable=self.mfcc_var, command=self.mfcc)
cb.grid(row=1, column=0)
cb.select()
rf = Frame(f)
rf.grid(row=1, column=3)
Label(rf, text="overlap").grid(row=0,column=0)
self.overlap_entry = Entry(rf, width=5)
self.overlap_entry.grid(row=1, column=0)
self.overlap_entry.bind("<Return>", self.on_overlap)
self.overlap_entry.delete(0, END)
self.overlap_entry.insert(0, "0.75")
self.overlap=0.75
rf = Frame(f)
rf.grid(row=1, column=4)
Label(rf, text="window size").grid(row=0,column=0)
self.window_entry = Entry(rf, width=5)
self.window_entry.grid(row=1, column=0)
self.window_entry.bind("<Return>", self.on_window_size)
self.window_entry.delete(0, END)
self.window_entry.insert(0, "3000")
self.window_size=3000
self.debug = Text(self.root, font = "Helvetica 24 bold", height=10, width=60)
self.debug.pack()
self.debug.insert(END, "ready...\n")
#Label(lf, text="Branch length").grid(row=0,column=0)
#self.length_scale = Scale(lf, from_=0, to=100, orient=HORIZONTAL, command=self.on_length)
#self.length_scale.grid(row=1, column=0)
#self.length_scale.set(30)
#t.grid_rowconfigure(1,weight=0)
# start event loop
def msg(self,msg):
self.debug.insert(0.0, msg+"\n")
self.root.update()
def clear_msg(self):
self.debug.delete(0.0, END)
self.root.update()
def load_target(self):
filename = askopenfilename(title = "load target wav")
if filename!="":
self.sponge.set_target(filename)
def load_source(self):
filename = askopenfilename(title = "load source wav into brain")
if filename!="":
self.sponge.add_source(filename)
def on_window_size(self,event):
try:
self.window_size = float(self.window_entry.get())
except:
pass
def on_overlap(self,event):
try:
self.overlap = float(self.overlap_entry.get())
except:
pass
def run(self):
self.msg("preparing: windowsize: "+str(self.window_size))
self.msg("overlap: "+str(self.overlap))
self.sponge.prepare(int(self.window_size),int(self.window_size*self.overlap),self.mfcc_var.get()==1)
msg("processing...")
self.sponge.process("brain_out.wav")
msg("done, saved in brain_out.wav")
def mfcc(self):
pass
w = win()
msg = w.msg
clear_msg = w.clear_msg
try:
w.root.mainloop()
except Exception,e:
msg(e)

133
python/transponge-mfcc.py Normal file
View File

@ -0,0 +1,133 @@
# aggregate sound from mfcc or fft similarity of chunks
import numpy as np
import scipy.io.wavfile
from features import mfcc
from features import logfbank
from features import base
import copy
source_dir = "../sound/source/"
render_dir = "../sound/render/"
def fadeinout(s,slength,elength):
s = copy.deepcopy(s)
for i in range(0,slength):
m = float(i)/slength;
s[i]*=m
for i in range(0,elength):
m = float(i)/elength;
s[(len(s)-1)-i]*=m
return s
def normalise(s):
m = 0
p = 999999999999999999
for i in range(0,len(s)):
if m<s[i]: m=s[i]
if p>s[i]: p=s[i]
b = max(m,-p)
if b>0:
s/=float(b/10000.0)
return s
def chop(wav,size,overlap,rand):
ret = []
pos = 0
seg = []
samples = wav[1]
while (pos+size<len(samples)):
ret.append([0,fadeinout(samples[pos:pos+size],500,500)])
pos+=(size-overlap)
return ret
def fftify(chopped):
return map(lambda i: np.fft.fft(i[1]), chopped)
def mfccify(chopped,rate):
ret = []
for sig in chopped:
ret.append(logfbank(sig[1],rate))
return ret
def fftdiff(a,b):
return (abs(a-b)).sum(dtype=np.float128)
def diffify(a,b):
return map(lambda a,b: fftdiff(a,b), a, b)
def search(fft,bank):
closest = 99999999999999999
ret = -1
for i,tfft in enumerate(bank):
dist = fftdiff(fft,tfft)
if dist<closest:
ret = i
closest = dist
print(ret)
return ret
def unit_test():
print(fftdiff(np.array([0,0,0,0]),np.array([1,1,1,1])))
#assert(fftdiff(np.array([0,0,0,0]),np.array([1,1,1,1]))==1)
print(fftdiff(np.array([-100,-1000,0,0]),np.array([-1,-1,-1,-1])))
print(fadeinout(np.array([10,10,10,10,10,10,10]),3))
#unit_test()
class transponge():
def __init__(self,chp_size,chp_overlap,dst_filename):
dst = scipy.io.wavfile.read(dst_filename)
self.src_chp=[]
self.src_fft=[]
self.chp_size = chp_size
self.chp_overlap = chp_overlap
self.dst_chp = chop(dst,self.chp_size,self.chp_overlap,0)
print("number of target blocks: "+str(len(self.dst_chp)))
self.dst_fft = mfccify(self.dst_chp,dst[0])
#self.dst_fft = fftify(self.dst_chp)
self.dst_chp = [] # clear
self.dst_size = len(dst[1])
def add(self,src_filename):
src = scipy.io.wavfile.read(src_filename)
print("adding "+src_filename)
src_chp=chop(src,self.chp_size,self.chp_overlap,0)
self.src_chp+=src_chp
self.src_fft+=mfccify(src_chp,src[0])
print("number of source blocks now: "+str(len(self.dst_fft)))
#self.src_fft+=fftify(src_chp)
def process(self):
out = np.zeros(self.dst_size,dtype=self.src_chp[0][1].dtype)
pos = 0
for i,seg in enumerate(self.dst_fft):
# collect indices of closest sections
ii = search(seg,self.src_fft)
for s in range(0,self.chp_size):
if pos+s<self.dst_size:
sample = self.src_chp[ii][1][s]
out[pos+s]=out[pos+s]+(sample*0.25)
pos+=(self.chp_size-self.chp_overlap)
print((i/float(len(self.dst_fft)))*100.0)
if i%10==0: scipy.io.wavfile.write(render_dir+"pwr-acid-mfccnonorm1500.wav",44100,out)
def run(l):
t = transponge(l,int(l*0.75),source_dir+"pw-right.wav")
t.add(source_dir+"totalsine.wav")
# t.add(source_dir+"water.wav")
# t.add(source_dir+"cumbia.wav")
# t.add(source_dir+"pista07.wav")
# t.add(source_dir+"sailingbybit.wav")
t.add(source_dir+"808.wav")
t.add(source_dir+"joey.wav")
# t.add("full.wav")
print("processing")
t.process()
run(1500)

118
python/transponge.py Normal file
View File

@ -0,0 +1,118 @@
import numpy as np
import scipy.io.wavfile
def fadeinout(s,slength,elength):
for i in range(0,slength):
m = float(i)/slength;
s[i]*=m
for i in range(0,elength):
m = float(i)/elength;
s[(len(s)-1)-i]*=m
return s
def normalise(s):
m = 0
for i in range(0,len(s)):
if m<s[i]: m=s[i]
if m>0:
s/=float(m/10000.0)
return s
def chop(wav,size,overlap):
ret = []
pos = 0
seg = []
samples = wav[1]
while (pos+size<len(samples)):
ret.append(fadeinout(normalise(samples[pos:pos+size]),30,100))
pos+=(size-overlap)
return ret
def fftify(chopped):
return map(lambda i: np.fft.fft(i), chopped)
def fftdiff(a,b):
return (abs(a-b)).sum(dtype=np.float128)
def diffify(a,b):
return map(lambda a,b: fftdiff(a,b), a, b)
def search(fft,bank):
closest = 99999999999999999
ret = -1
for i,tfft in enumerate(bank):
dist = fftdiff(fft,tfft)
if dist<closest:
ret = i
closest = dist
print(ret)
return ret
def unit_test():
print(fftdiff(np.array([0,0,0,0]),np.array([1,1,1,1])))
#assert(fftdiff(np.array([0,0,0,0]),np.array([1,1,1,1]))==1)
print(fftdiff(np.array([-100,-1000,0,0]),np.array([-1,-1,-1,-1])))
print(fadeinout(np.array([10,10,10,10,10,10,10]),3))
#unit_test()
class transponge():
def __init__(self,chp_size,chp_overlap,dst_filename):
dst = scipy.io.wavfile.read(dst_filename)
self.src_chp=[]
self.src_fft=[]
self.chp_size = chp_size
self.chp_overlap = chp_overlap
self.dst_chp = chop(dst,self.chp_size,self.chp_overlap)
print(self.chp_overlap)
print (len(self.dst_chp))
self.dst_fft = fftify(self.dst_chp)
self.dst_chp = [] # clear
self.dst_size = len(dst[1])
def add(self,src_filename):
src = scipy.io.wavfile.read(src_filename)
src_chp=chop(src,self.chp_size,self.chp_overlap)
self.src_chp+=src_chp
self.src_fft+=fftify(src_chp)
def process(self):
out = np.zeros(self.dst_size,dtype=self.src_chp[0].dtype)
pos = 0
for i,seg in enumerate(self.dst_fft):
# collect indices of closest sections
ii = search(seg,self.src_fft)
for s in range(0,self.chp_size):
if pos+s<self.dst_size:
out[pos+s]+=self.src_chp[ii][s]*0.5
pos+=(self.chp_size-self.chp_overlap)
print((i/float(len(self.dst_fft)))*100.0)
if i%10==0: scipy.io.wavfile.write("outl.wav",44100,out)
def render(self):
t = []
ret = np.zeros(self.dst_size,dtype=self.src_chp[0].dtype)
pos = 0
for i in self.indices:
#t.append(self.src_chp[i])
#print(pos)
for s in range(0,self.chp_size):
if pos+s<self.dst_size:
ret[pos+s]+=self.src_chp[i][s]*0.5
pos+=(self.chp_size-self.chp_overlap)
return ret
#return np.concatenate(t)
def run(l):
t = transponge(l,int(l*0.75),"pw-left.wav")
t.add("water.wav")
t.add("cumbia.wav")
t.add("pista07.wav")
t.add("sailingbybit.wav")
# t.add("full.wav")
print("processing")
t.process()
run(3000)

165
server/libmfcc.cpp Normal file
View File

@ -0,0 +1,165 @@
/*
* libmfcc.c - Code implementation for libMFCC
* Copyright (c) 2010 Jeremy Sawruk
*
* This code is released under the MIT License.
* For conditions of distribution and use, see the license in LICENSE
*/
#include <math.h>
#include "libmfcc.h"
/*
* Computes the specified (mth) MFCC
*
* spectralData - array of doubles containing the results of FFT computation. This data is already assumed to be purely real
* samplingRate - the rate that the original time-series data was sampled at (i.e 44100)
* NumFilters - the number of filters to use in the computation. Recommended value = 48
* binSize - the size of the spectralData array, usually a power of 2
* m - The mth MFCC coefficient to compute
*
*/
double GetCoefficient(double* spectralData, unsigned int samplingRate, unsigned int NumFilters, unsigned int binSize, unsigned int m)
{
double result = 0.0f;
double outerSum = 0.0f;
double innerSum = 0.0f;
unsigned int k, l;
// 0 <= m < L
if(m >= NumFilters)
{
// This represents an error condition - the specified coefficient is greater than or equal to the number of filters. The behavior in this case is undefined.
return 0.0f;
}
result = NormalizationFactor(NumFilters, m);
for(l = 1; l <= NumFilters; l++)
{
// Compute inner sum
innerSum = 0.0f;
for(k = 0; k < binSize - 1; k++)
{
innerSum += fabs(spectralData[k] * GetFilterParameter(samplingRate, binSize, k, l));
}
if(innerSum > 0.0f)
{
innerSum = log(innerSum); // The log of 0 is undefined, so don't use it
}
innerSum = innerSum * cos(((m * PI) / NumFilters) * (l - 0.5f));
outerSum += innerSum;
}
result *= outerSum;
return result;
}
/*
* Computes the Normalization Factor (Equation 6)
* Used for internal computation only - not to be called directly
*/
double NormalizationFactor(int NumFilters, int m)
{
double normalizationFactor = 0.0f;
if(m == 0)
{
normalizationFactor = sqrt(1.0f / NumFilters);
}
else
{
normalizationFactor = sqrt(2.0f / NumFilters);
}
return normalizationFactor;
}
/*
* Compute the filter parameter for the specified frequency and filter bands (Eq. 2)
* Used for internal computation only - not the be called directly
*/
double GetFilterParameter(unsigned int samplingRate, unsigned int binSize, unsigned int frequencyBand, unsigned int filterBand)
{
double filterParameter = 0.0f;
double boundary = (frequencyBand * samplingRate) / binSize; // k * Fs / N
double prevCenterFrequency = GetCenterFrequency(filterBand - 1); // fc(l - 1) etc.
double thisCenterFrequency = GetCenterFrequency(filterBand);
double nextCenterFrequency = GetCenterFrequency(filterBand + 1);
if(boundary >= 0 && boundary < prevCenterFrequency)
{
filterParameter = 0.0f;
}
else if(boundary >= prevCenterFrequency && boundary < thisCenterFrequency)
{
filterParameter = (boundary - prevCenterFrequency) / (thisCenterFrequency - prevCenterFrequency);
filterParameter *= GetMagnitudeFactor(filterBand);
}
else if(boundary >= thisCenterFrequency && boundary < nextCenterFrequency)
{
filterParameter = (boundary - nextCenterFrequency) / (thisCenterFrequency - nextCenterFrequency);
filterParameter *= GetMagnitudeFactor(filterBand);
}
else if(boundary >= nextCenterFrequency && boundary < samplingRate)
{
filterParameter = 0.0f;
}
return filterParameter;
}
/*
* Compute the band-dependent magnitude factor for the given filter band (Eq. 3)
* Used for internal computation only - not the be called directly
*/
double GetMagnitudeFactor(unsigned int filterBand)
{
double magnitudeFactor = 0.0f;
if(filterBand >= 1 && filterBand <= 14)
{
magnitudeFactor = 0.015;
}
else if(filterBand >= 15 && filterBand <= 48)
{
magnitudeFactor = 2.0f / (GetCenterFrequency(filterBand + 1) - GetCenterFrequency(filterBand -1));
}
return magnitudeFactor;
}
/*
* Compute the center frequency (fc) of the specified filter band (l) (Eq. 4)
* This where the mel-frequency scaling occurs. Filters are specified so that their
* center frequencies are equally spaced on the mel scale
* Used for internal computation only - not the be called directly
*/
double GetCenterFrequency(unsigned int filterBand)
{
double centerFrequency = 0.0f;
double exponent;
if(filterBand == 0)
{
centerFrequency = 0;
}
else if(filterBand >= 1 && filterBand <= 14)
{
centerFrequency = (200.0f * filterBand) / 3.0f;
}
else
{
exponent = filterBand - 14.0f;
centerFrequency = pow(1.0711703, exponent);
centerFrequency *= 1073.4;
}
return centerFrequency;
}

26
server/libmfcc.h Normal file
View File

@ -0,0 +1,26 @@
/*
* libmfcc.h - Header for libMFCC
* Copyright (c) 2010 Jeremy Sawruk
*
* This code is released under the MIT License.
* For conditions of distribution and use, see the license in LICENSE
*/
#pragma once
#define PI 3.14159265358979323846264338327
// Returns the specified (mth) MFCC
double GetCoefficient(double* spectralData, unsigned int samplingRate, unsigned int NumFilters, unsigned int binSize, unsigned int m);
// Compute the normalization factor (For internal computation only - not to be called directly)
double NormalizationFactor(int NumFilters, int m);
// Compute the filter parameter for the specified frequency and filter bands (For internal computation only - not the be called directly)
double GetFilterParameter(unsigned int samplingRate, unsigned int binSize, unsigned int frequencyBand, unsigned int filterBand);
// Compute the band-dependent magnitude factor for the given filter band (For internal computation only - not the be called directly)
double GetMagnitudeFactor(unsigned int filterBand);
// Compute the center frequency (fc) of the specified filter band (l) (For internal computation only - not the be called directly)
double GetCenterFrequency(unsigned int filterBand);

35
server/samplebrain.h Normal file
View File

@ -0,0 +1,35 @@
#include "sample.h"
#include <vector>
class brain_block {
// runs analysis on pcm
brain_block(const sample &pcm);
// returns distance based on ratio of fft-mfcc values
float compare(const brain_block &other, float ratio);
public:
sample m_pcm;
sample m_fft;
sample m_mfcc;
};
class sample_brain {
public:
sample_brain();
// rewrites whole brain
void init(u32 block_size, u32 overlap);
// take another brain and rebuild this brain from bits of that one
// (presumably this one is made from a single sample)
sample resynth(const sample_brain *other, ratio);
private:
vector<brain_block> m_brain;
u32 m_block_size;
u32 m_overlap;
};

8
sponge/.gitignore vendored Normal file
View File

@ -0,0 +1,8 @@
/pom.xml
*jar
/lib
/classes
/native
/.lein-failures
/checkouts
/.lein-deps-sum

13
sponge/README Normal file
View File

@ -0,0 +1,13 @@
# app
FIXME: write description
## Usage
FIXME: write
## License
Copyright (C) 2015 FIXME
Distributed under the Eclipse Public License, the same as Clojure.

BIN
sponge/images/icon.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 322 B

View File

@ -0,0 +1 @@
83c7852ff1316446216a4452ed66ccd3

View File

@ -0,0 +1 @@
5fb05ff1d494d0e015315237244899fcc29ecce6

View File

@ -0,0 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<modelVersion>4.0.0</modelVersion>
<groupId>local</groupId>
<artifactId>comirva</artifactId>
<version>0.0.36</version>
<description>POM was created from install:install-file</description>
</project>

View File

@ -0,0 +1 @@
97aa618f81397aabd91d962b9b1e8f2c

View File

@ -0,0 +1 @@
7c9f90d0712b2a83237b65cba3f1dfb791bd6cf6

View File

@ -0,0 +1 @@
83c7852ff1316446216a4452ed66ccd3

View File

@ -0,0 +1 @@
5fb05ff1d494d0e015315237244899fcc29ecce6

View File

@ -0,0 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<modelVersion>4.0.0</modelVersion>
<groupId>local</groupId>
<artifactId>comirva</artifactId>
<version>0.36</version>
<description>POM was created from install:install-file</description>
</project>

View File

@ -0,0 +1 @@
d3f0bb0939b4aea5a37d4a5a1bb6fe72

View File

@ -0,0 +1 @@
c86d1b0cea230bc9a9792cbef72e717bcf71fcf5

View File

@ -0,0 +1,13 @@
<?xml version="1.0" encoding="UTF-8"?>
<metadata>
<groupId>local</groupId>
<artifactId>comirva</artifactId>
<version>0.36</version>
<versioning>
<versions>
<version>0.36</version>
<version>0.0.36</version>
</versions>
<lastUpdated>20150624211203</lastUpdated>
</versioning>
</metadata>

View File

@ -0,0 +1 @@
282709b17127e5b0cf4426991cb5f23a

View File

@ -0,0 +1 @@
1141e16182424f22fe3e34ca54d41ea130a21f69

11
sponge/project.clj Normal file
View File

@ -0,0 +1,11 @@
(defproject app "1.0.0-SNAPSHOT"
:description "FIXME: write description"
:dependencies [[org.clojure/clojure "1.5.1"]
[seesaw "1.4.5"]
[org.clojure/core.async "0.1.267.0-0d7780-alpha"]
[incanter "1.5.5"]
[primitive-math "0.1.4"]
[hiphip-aot "0.1.2"]
[org.openimaj/audio-processing "1.3.1"]
]
:main app.core)

View File

@ -0,0 +1,54 @@
(ns app.aggregate
(:use app.wav)
(:require [hiphip.double :as dbl]
[app.block :as block]
[app.blocklist :as blocklist]))
(defn data-to-sound [data]
(reify SampledSound
(duration [this] (/ (count data) 44100))
(channels [this] 1)
(chunks [this sample-rate]
;; sloooow
[[data]])))
(defn sound-to-data [sound]
(first (first (.chunks sound 44100))))
(defn render-blocklist [blocklist]
(reify SampledSound
(duration [this] (/ (* (count (:pcm (first blocklist)))
(count blocklist)) 44100))
(channels [this] 1)
(chunks [this sample-rate]
;; sloooow
(let [strp (map (fn [b] (:pcm b)) blocklist)]
[[(double-array (apply concat strp))]]))))
(defn render-blocklist-fft [blocklist]
(reify SampledSound
(duration [this] (/ (* (count (:pcm (first blocklist)))
(count blocklist)) 44100))
(channels [this] 1)
(chunks [this sample-rate]
;; sloooow
(let [strp (map (fn [b] (:fft b)) blocklist)]
[[(double-array (apply concat strp))]]))))
(defn search [src target ratio]
(map
(fn [block]
(let [r (blocklist/search src block ratio)]
(println (first r)) (second r)))
target))
(defn aggregate [source-path-list target-filename rate block-size ratio]
(let [blocks (apply
concat
(map
(fn [filename]
(blocklist/build (read-sound filename) rate block-size))
source-path-list))
target (blocklist/build (read-sound target-filename) rate block-size)]
(println "built sources")
(render-blocklist (search blocks target ratio))))

20
sponge/src/app/block.clj Normal file
View File

@ -0,0 +1,20 @@
(ns app.block
(:use app.wav)
(:require [hiphip.double :as v]
[app.listen :as listen]))
;; whacks pcm
(defn build [pcm]
;(listen/normalise! pcm)
(listen/fadeinout! pcm 50 100)
{:pcm pcm
:fft (listen/fftify (v/aclone pcm))
:mfcc (first (listen/mfccify (v/aclone pcm)))})
;; scores fft and mfcc simulataneosly with a weighting
(defn diff [a b ratio]
(cond
(= ratio 0) (listen/diff (:fft a) (:fft b))
(= ratio 1) (listen/diff (:mfcc a) (:mfcc b))
:else (+ (* (listen/diff (:fft a) (:fft b)) (- 1 ratio))
(* (listen/diff (:mfcc a) (:mfcc b))) ratio)))

View File

@ -0,0 +1,22 @@
(ns app.blocklist
(:use app.wav)
(:require [hiphip.double :as dbl]
[app.block :as block]))
(defn build [sound rate block-size]
(let [data (first (first (.chunks sound rate)))]
(println (count data))
(for [block-index (range (- (/ (count data) block-size) 1))]
(block/build
(dbl/amake
[i block-size]
(aget data (+ (* block-index block-size) i)))))))
(defn search [blocks target-block ratio]
(reduce
(fn [r b]
(let [d (block/diff target-block b ratio)]
(if (> (first r) d)
[d b] r)))
[9999999999999 false]
blocks))

145
sponge/src/app/core.clj Normal file
View File

@ -0,0 +1,145 @@
(ns app.core (:gen-class)
(:use app.wav app.aggregate)
(:require
[app.blocklist :as blocklist]
[incanter.core :as incanter]
[incanter.charts :as charts]
[app.listen :as listen])
(:import javax.swing.ImageIcon
edu.emory.mathcs.jtransforms.fft.DoubleFFT_1D
org.openimaj.audio.features.MFCC
org.openimaj.audio.samples.FloatSampleBuffer
org.openimaj.audio.AudioFormat
))
(use 'seesaw.core
'seesaw.graphics
'seesaw.color
'seesaw.font)
; A very rudimentary example of (canvas).
; Define some paint handlers. Each takes the canvas and Graphics2D object
; as args. The function is called within a (seesaw.graphics/push) block
; so any changes made to the graphics context will be backed out when
; the function returns.
;
; This first handler uses raw Java2D calls to do painting. See (paint2) below
; for an example of using Seesaw's simple shape support.
(defn paint1 [c g]
(let [w (.getWidth c)
h (.getHeight c)]
(doto g
(draw (polygon [0 h] [(/ w 4) 0] [(/ w 2) (/ h 2)] [w (/ h 2)] [0 h])
(style :foreground java.awt.Color/BLACK
:background (color 128 128 128 128)
:stroke (stroke :width 4)))
(.setColor (color 224 224 0 128))
(.fillRect 0 0 (/ w 2) (/ h 2))
(.setColor (color 0 224 224 128))
(.fillRect 0 (/ h 2) (/ w 2) (/ h 2))
(.setColor (color 224 0 224 128))
(.fillRect (/ w 2) 0 (/ w 2) (/ h 2))
(.setColor (color 224 0 0 128))
(.fillRect (/ w 2) (/ h 2) (/ w 2) (/ h 2))
(.setColor (color 0 0 0))
(.drawString "Hello. This is a canvas example" 20 20))))
(def text-style (style :foreground (color 0 0 0)
:font "ARIAL-BOLD-24"))
(def star
(path []
(move-to 0 20) (line-to 5 5)
(line-to 20 0) (line-to 5 -5)
(line-to 0 -20) (line-to -5 -5)
(line-to -20 0) (line-to -5 5)
(line-to 0 20)))
(defn paint2 [c g]
(println "hello")
(eval (read-string (.getText (select (to-root c) [:#editor]))))
(let [w (.getWidth c) w2 (/ w 2)
h (.getHeight c) h2 (/ h 2)]
(draw g
(ellipse 0 0 w2 h2) (style :background (color 224 224 0 128))
(ellipse 0 h2 w2 h2) (style :background (color 0 224 224 128))
(ellipse w2 0 w2 h2) (style :background (color 224 0 224 128))
(ellipse w2 h2 w2 h2) (style :background (color 224 0 0 128)))
(push g
(rotate g 20)
(draw g (string-shape 20 20 "Hello. This is a canvas example") text-style))
(push g
(translate g w2 h2)
(draw g star (style :foreground java.awt.Color/BLACK :background java.awt.Color/YELLOW)))))
; Create an action that swaps the paint handler for the canvas.
; Note that we can use (config!) to set the :paint handler just like
; properties on other widgets.
(defn switch-paint-action [n paint]
(action :name n
:handler #(-> (to-frame %)
;(select [:#editor])
(select [:#canvas])
(config! :paint paint))))
(defn handler
[event]
(alert event
(str "<html>Hello from <b>Clojure</b>. Button "
(.getActionCommand event) " clicked.")))
(def f (frame :title "s p o n g e" :on-close :exit
:width 800 :height 600
:content
(border-panel
:center
(left-right-split
(editor-pane
:id :editor
:content-type "text/ascii"
:editable? true
:font (font :name :monospaced
:size 20)
:text "(println \"hello world\")")
(canvas :id :canvas :background "#BBBBDD" :paint nil)
:divider-location 1/3)
:south
(horizontal-panel :items ["Switch canvas paint function: "
(switch-paint-action "None" nil)
(switch-paint-action "Rectangles" paint1)
(switch-paint-action "Ovals" paint2)]))))
(defn -main [& args]
(.setIconImage f (.getImage (new ImageIcon "images/icon.png")))
(show! f)
(println "playing...")
(def source-path "../sound/source/")
(visualize (read-sound (str source-path "rise.wav")))
(visualize
(data-to-sound
(listen/fftify
(sound-to-data (read-sound (str source-path "rise.wav"))))))
(comment def s (aggregate
(map (fn [s] (str source-path s))
["rise.wav"]
;;["water.wav" "cumbia.wav" "pista07.wav" "sailingbybit.wav"]
)
"../sound/source/drop.wav" 44100 3000 0))
;;(visualize s)
;;(save s "out.wav" 44100)
)

38
sponge/src/app/listen.clj Normal file
View File

@ -0,0 +1,38 @@
(ns app.listen
(:use app.wav)
(:require [hiphip.double :as v])
(:import javax.swing.ImageIcon
edu.emory.mathcs.jtransforms.fft.DoubleFFT_1D
org.openimaj.audio.features.MFCC
org.openimaj.audio.samples.FloatSampleBuffer
org.openimaj.audio.AudioFormat
))
(defn fftify [s]
(let [fft (new DoubleFFT_1D (count s))]
(.realForward fft s)
(v/afill! [i s] (Math/abs i))
s))
(defn mfccify [s]
(let [mfcc (new MFCC)]
(.calculateMFCC mfcc (new FloatSampleBuffer s
(new AudioFormat 64 44100 1)))))
;; crappy envelope
(defn fadeinout! [xs slen elen]
(doall (for [i (range 0 slen)]
(v/aset xs i (* (v/aget xs i) (/ i slen)))))
(doall (for [i (range 0 elen)]
(let [idx (- (- (count xs) 1) i)]
(v/aset xs idx (* (v/aget xs idx) (/ i elen))))))
xs)
;; assumes zero crossing
(defn normalise! [xs]
(let [peak (v/areduce [i xs] m 0 (max m (Math/abs i)))]
(v/afill! [x xs] (/ x peak)))
xs)
(defn diff [a b]
(Math/abs (v/asum [i a j b] (- i j))))

720
sponge/src/app/wav.clj Normal file
View File

@ -0,0 +1,720 @@
(ns app.wav
"Functions for manipulating a sound whose amplitude representation
is arrays of doubles."
(:require [clojure.java.io :as io]
[hiphip.double :as dbl]
[incanter.core :as incanter]
[incanter.charts :as charts]
[primitive-math :as p])
(:import [java.nio ByteBuffer]
[java.util.concurrent LinkedBlockingQueue]
[javax.sound.sampled
AudioFileFormat$Type
AudioFormat
AudioFormat$Encoding
AudioInputStream
AudioSystem]))
;;; Abstraction
;; TODO: It feels like the channels and duration stuff are the real
;; core of the abstraction, and the way you get amplitudes is sort of
;; orthogonal. Maybe there's another abstraction that can get pulled
;; out here.
(defprotocol SampledSound
"Represents a sound as a sequence of vectors of Java double arrays."
(channels [this] "Returns the number of channels in the sound.")
(duration [this] "Returns the duration of the sound in seconds.")
(chunks [this sample-rate] "Returns a sequence of sequences each
containing a sequence of double arrays - one per channel - populated
with the data for this sound. The total number of samples per
channel will be (* duration sample-rate)"))
;;; Sound construction
(defmacro defsound
"Expands to define a function `name` that accepts arguments `args`
returns a sound with `duration`, `channels` whose samples are
determined by `expr`. Inside expr, the sample rate, the total number
of samples, the current sample index, and the current channel number
will be bound to the four symbols in `bindings`."
[name
duration-param
channels-param
docstring
args
[sample-rate num-samples index c]
expr]
`(defn ~name
~docstring
~(vec (concat [duration-param
channels-param]
args))
(let [duration# (double ~duration-param)
chans# (double ~channels-param)]
(reify SampledSound
(channels [this#] ~channels-param)
(duration [this#] duration#)
(chunks [this# ~sample-rate]
(let [chunk-size# (long (* duration# ~sample-rate))
~num-samples (long (* duration# ~sample-rate))
num-chunks# (-> ~num-samples (/ chunk-size#) Math/ceil long)]
(concat
(for [chunk-num# (range (dec num-chunks#))]
(let [base-index# (p/* (long chunk-num#) chunk-size#)]
(for [~c (range chans#)]
(dbl/amake [i# chunk-size#]
(let [~index (p/+ i# base-index#)]
~expr)))))
;; Handle the last chunk specially, since it's probably
;; shorter.
[(let [chunks-so-far# (p/- num-chunks# 1)
samples-so-far# (p/* chunk-size# chunks-so-far#)
samples-remaining# (p/- ~num-samples samples-so-far#)]
(for [~c (range chans#)]
(dbl/amake [i# samples-remaining#]
(let [~index (p/+ i# (p/* (p/- num-chunks# 1) chunk-size#))]
~expr))))])))))))
(defsound constant duration chans
"Returns a sound of `duration` that has `chans` channels, each of
which is constant at `x`."
[x]
[sample-rate num-samples i c]
x)
(defn silence
"Returns a sound of `duration` with `chans` channels of silence."
[dur chans]
(constant dur chans 0.0))
;; TODO: It would be nice if we had a way to indicate local bindings
;; that we want to be in effect outside the amake so we don't have all
;; these stupid calls to double inside the inner loop.
(defsound linear duration chans
"Returns a sound of `duration` that has `chans` channels, each of
which changes linearly from `start` to `end`."
[start end]
[sample-rate num-samples i c]
(p/+ (double start)
(p/* (p/- (double end)
(double start))
(p/div (double i)
(double num-samples)))))
(defsound fn-sound duration chans
"Creates a SampledSound `duration` seconds long where the amplitudes
are produced by `f`, a function of a channel number and a time in
seconds."
[f]
[sample-rate num-samples i c]
(f c (p/div (double i) (double sample-rate))))
(defn sinusoid
"Returns a single-channel sound of `duration` and `frequency`"
[^double duration ^double frequency]
(fn-sound duration 1 (fn sinusoid-fn [^long c ^double t]
(Math/sin (p/* t frequency 2.0 Math/PI)))))
(defn square-wave
"Produces a single-channel sound that toggles between 1.0 and -1.0
at frequency `freq`."
[^double duration ^double frequency]
(fn-sound duration 1 (fn square-wave-fn [^long c ^double t]
(let [x (-> t (p/* frequency 2.0) long)]
(if (even? x) 1.0 -1.0)))))
(defn- to-double-arrays
"Return a seq of arrays of doubles that decode the values in buf."
[^bytes buf ^long bytes-read ^long bytes-per-sample ^long chans]
(let [samples-read (/ bytes-read bytes-per-sample chans)
bb (ByteBuffer/allocate bytes-read)
arrs (repeatedly chans #(double-array samples-read))]
(.put bb buf 0 bytes-read)
(.position bb 0)
(dotimes [n samples-read]
(doseq [arr arrs]
;; TODO: We're hardcoded to .getShort here, but the
;; bytes-per-sample is a parameter. Should probably have
;; something that knows how to read from a ByteBuffer given a
;; number of bits.
(dbl/aset arr n (p/div (double (.getShort bb)) 32768.0))))
arrs))
(defn- sample-chunks
"Return a seq of chunks from an AudioInputStream."
[^AudioInputStream ais ^long chans ^long bytes-per-sample ^long chunk-size]
(let [buf (byte-array (p/* chunk-size chans bytes-per-sample))
bytes-read (.read ais buf)]
(when (pos? bytes-read)
(lazy-seq
(cons (to-double-arrays buf (long bytes-read) bytes-per-sample chans)
(sample-chunks ais chans bytes-per-sample chunk-size))))))
(defn- read-duration
"Given a path to a .wav or .mp3 file, return the duration in
seconds."
[path]
(let [file (io/file path)
base-file-format (AudioSystem/getAudioFileFormat file)
base-file-properties (.properties base-file-format)
base-file-duration (get base-file-properties "duration")]
(if base-file-duration
(/ base-file-duration 1000000.0)
(let [in (AudioSystem/getAudioInputStream file)
base-format (.getFormat in)
frame-length (.getFrameLength in)
frames-per-second (.getSampleRate base-format)]
(.close in)
(/ frame-length (double frames-per-second))))))
(defn read-sound
"Given a path to a .wav or .mp3 file, return a SampledSound instance
over it."
[path]
(let [file (io/file path)
base-file-format (-> file AudioSystem/getAudioFileFormat .getFormat)
base-file-properties (.properties base-file-format)
dur (read-duration path)
chans (.getChannels base-file-format)
file-sample-rate (.getSampleRate base-file-format)
file-encoding (.getEncoding base-file-format)]
(reify SampledSound
(duration [this] dur)
(channels [this] chans)
(chunks [this sample-rate]
(let [bits-per-sample 16
bytes-per-sample (-> bits-per-sample (/ 8) long)
in (AudioSystem/getAudioInputStream file)
decoded (if (= AudioFormat$Encoding/PCM_SIGNED file-encoding)
in
(AudioSystem/getAudioInputStream
(AudioFormat. AudioFormat$Encoding/PCM_SIGNED
file-sample-rate
bits-per-sample
chans
(* bytes-per-sample chans)
file-sample-rate
true)
^AudioInputStream in))
resampled (if (= sample-rate file-sample-rate)
decoded
(AudioSystem/getAudioInputStream
(AudioFormat. AudioFormat$Encoding/PCM_SIGNED
sample-rate
bits-per-sample
chans
(* bytes-per-sample chans)
sample-rate
true)
^AudioInputStream decoded))]
(sample-chunks resampled chans bytes-per-sample (* dur sample-rate)))))))
;;; Sound manipulation
(defn peak
"Returns the maximum absolute amplitude of `s` when sampled at
`sample-rate`. If provided, will return immediately on finding a
value above `limit`."
([s sample-rate] (peak s sample-rate Double/MAX_VALUE))
([s sample-rate limit]
(loop [c (chunks s sample-rate)
max-amplitude Double/MIN_VALUE]
;; It's weird that I have to do the destructuring in a let
;; rather than above where we bind c, but if I don't, this loop
;; retains head and runs out of memory for longer sequences.
(let [[head-chunk & more-chunks] c]
(cond
;; Short-circuit if we hit `limit`
(< limit max-amplitude) max-amplitude
;; Sequence has been consumed
(not (seq head-chunk)) max-amplitude
:else
(recur more-chunks
(double (apply max
(map (fn [^doubles arr]
(dbl/areduce [e arr]
m max-amplitude
(max m (Math/abs e))))
head-chunk)))))))))
;;; Sound operations
;; An operation takes one or more sounds and returns a new sound
(defn append
"Concatenates two sounds together"
[s1 s2]
{:pre [(= (channels s1) (channels s2))]}
(let [d1 (duration s1)
d2 (duration s2)]
(reify SampledSound
(duration [this] (+ d1 d2))
(channels [this] (channels s1))
(chunks [this sample-rate]
(concat (chunks s1 sample-rate)
(chunks s2 sample-rate))))))
(defn- dbl-asub
"Returns the part of `arr` whose indices fall in [`start` `end`)."
[arr ^long start ^long end]
(dbl/amake [i (p/- end start)]
(dbl/aget arr (p/+ i start))))
(defn- drop-samples
"Drops `n` samples from `chunks`."
[^long n chunks]
(cond
(zero? n) chunks
(< n (dbl/alength (ffirst chunks)))
(lazy-seq
(cons (map #(dbl-asub % n (dbl/alength %)) (first chunks))
(rest chunks)))
(seq chunks)
(recur (- n (dbl/alength (ffirst chunks))) (rest chunks))))
(defn- take-samples
"Returns chunks from `chunks` until `n` samples have been returned."
[^long n chunks]
(cond
(not (seq chunks)) nil
(not (pos? n)) nil
(< n (dbl/alength (ffirst chunks)))
[(map #(dbl-asub % 0 n) (first chunks))]
:else
(lazy-seq
(cons (first chunks)
(take-samples (- n (dbl/alength (ffirst chunks)))
(rest chunks))))))
(defn multiplex
"Takes a single-channel sound `s` and returns an `n`-channel sound
whose channels are all identical to channel 0 of `s`."
[s ^long n]
{:pre [(== 1 (channels s))]}
(if (== 1 n)
s
(reify SampledSound
(duration [this] (duration s))
(channels [this] n)
(chunks [this sample-rate]
(map (fn [[arr]] (repeat n arr))
(chunks s sample-rate))))))
(defn trim
"Truncates `s` to the region between `start` and `end`. If `end` is
beyond the end of the sound, just trim to the end."
[s ^double start ^double end]
{:pre [(<= 0 start (duration s))
(<= start end)]}
(let [end* (min (duration s) end)
dur (- end* start)]
(reify SampledSound
(duration [this] dur)
(channels [this] (channels s))
(chunks [this sample-rate]
(let [samples-to-drop (-> start (* sample-rate) long)
samples-to-take (-> dur (* sample-rate) long)]
(->> (chunks s sample-rate)
(drop-samples samples-to-drop)
(take-samples samples-to-take)))))))
(defn- combine-chunks
"Returns a sequence of chunks whose contents are corresponding
elements of chunks1 and chunks2 combined by calling `f` on them. `f`
should be a function of the number of samples in the chunk to be
produced, the first chunk, the offset in that chunk at which to
start, the second chunk, and the offset in that chunk at which to
start. If no offsets are provided, defaults to zero."
([f chunks1 chunks2] (combine-chunks f chunks1 0 chunks2 0))
([f chunks1 offset1 chunks2 offset2]
(let [[head1 & more1] chunks1
[head2 & more2] chunks2]
(cond
(and head1 head2)
(let [len1 (dbl/alength (first head1))
len2 (dbl/alength (first head2))
samples (min (- len1 offset1) (- len2 offset2))
consumed1? (= len1 (+ samples offset1))
consumed2? (= len2 (+ samples offset2))]
(lazy-seq
(cons
(f samples head1 offset1 head2 offset2)
(combine-chunks f
(if consumed1? more1 chunks1)
(if consumed1? 0 (+ offset1 samples))
(if consumed2? more2 chunks2)
(if consumed2? 0 (+ offset2 samples))))))
(and head1 (not head2))
(cons (map #(dbl-asub % offset1 (dbl/alength %)) head1)
more1)
(and (not head1) head2)
(cons (map #(dbl-asub % offset2 (dbl/alength %)) head2)
more2)))))
(defn mix
"Mixes sounds `s1` and `s2` together."
[s1 s2]
{:pre [(= (channels s1) (channels s2))]}
(let [d1 (duration s1)
d2 (duration s2)]
(reify SampledSound
(duration [this] (max d1 d2))
(channels [this] (channels s1))
(chunks [this sample-rate]
(let [s1* (if (< d1 d2)
(append s1 (silence (- d2 d1) (channels s1)))
s1)
s2* (if (<= d1 d2)
s2
(append s2 (silence (- d1 d2) (channels s2))))]
(combine-chunks (fn mix-fn [samples head1 offset1 head2 offset2]
(let [o1 (long offset1)
o2 (long offset2)]
(map #(dbl/amake [i samples]
(p/+ (dbl/aget %1 (p/+ i o1))
(dbl/aget %2 (p/+ i o2))))
head1
head2)))
(chunks s1* sample-rate)
(chunks s2* sample-rate)))))))
(defn gain
"Changes the amplitude of `s` by `g`."
[s ^double g]
(reify SampledSound
(duration [this] (duration s))
(channels [this] (channels s))
(chunks [this sample-rate]
(map (fn [chunk]
(map (fn [channel-chunk]
(dbl/amap [x channel-chunk]
(p/* x g)))
chunk))
(chunks s sample-rate)))))
(defn envelope
"Multiplies the amplitudes of `s1` and `s2`, trimming the sound to
the shorter of the two."
[s1 s2]
{:pre [(= (channels s1) (channels s2))]}
(let [dur (min (duration s1) (duration s2))]
(reify SampledSound
(duration [this] dur)
(channels [this] (channels s1))
(chunks [this sample-rate]
(let [s1* (if (< dur (duration s1))
(trim s1 0 dur)
s1)
s2* (if (< dur (duration s2))
(trim s2 0 dur)
s2)]
(combine-chunks (fn envelope-fn [samples head1 offset1 head2 offset2]
(map #(dbl/amake [i samples]
(p/* (dbl/aget %1 (p/+ i (long offset1)))
(dbl/aget %2 (p/+ i (long offset2)))))
head1
head2))
(chunks s1* sample-rate)
(chunks s2* sample-rate)))))))
(defn fade-in
"Fades `s` linearly from zero at the beginning to full volume at
`duration`."
[s ^double fade-duration]
(let [chans (channels s)]
(-> (linear fade-duration chans 0 1.0)
(append (constant (- (duration s) fade-duration) chans 1.0))
(envelope s))))
(defn fade-out
"Fades the s to zero for the last `duration`."
[s ^double fade-duration]
(let [chans (channels s)]
(-> (constant (- (duration s) fade-duration) chans 1.0)
(append (linear fade-duration chans 1.0 0))
(envelope s))))
(defn segmented-linear
"Produces a sound with `chans` channels whose amplitudes change
linearly as described by `spec`. Spec is a sequence of interleaved
amplitudes and durations. For example the spec
1.0 30
0 10
0 0.5
1.0
(written that way on purpose - durations and amplitudes are in columns)
would produce a sound whose amplitude starts at 1.0, linearly
changes to 0.0 at time 30, stays at 0 for 10 seconds, then ramps up
to its final value of 1.0 over 0.5 seconds"
[chans & spec]
{:pre [(and (odd? (count spec))
(< 3 (count spec)))]}
(->> spec
(partition 3 2)
(map (fn [[start duration end]] (linear duration chans start end)))
(reduce append)))
(defn timeshift
"Inserts `dur` seconds of silence at the beginning of `s`"
[s ^double dur]
(append (silence dur (channels s)) s))
(defn ->stereo
"Creates a stereo sound. If given one single-channel sound,
duplicates channel zero on two channels. If given a single stereo
sound, returns it. If given two single-channel sounds, returns a
sound with the first sound on channel 0 and the second sound on
channel 1."
([s]
(case (long (channels s))
2 s
1 (reify SampledSound
(duration [this] (duration s))
(channels [this] 2)
(chunks [this sample-rate]
(map (fn [[l] [r]] (vector l r))
(chunks s sample-rate) (chunks s sample-rate))))
(throw (ex-info "Can't steroize sound with other than one or two channels"
{:reason :cant-stereoize-channels
:s s}))))
([l r]
(when-not (= 1 (channels l) (channels r))
(throw (ex-info "Can't steroize two sounds unless they are both single-channel"
{:reason :cant-stereoize-channels
:l-channels (channels l)
:r-channels (channels r)})))
(reify SampledSound
(duration [this] (min (duration l) (duration r)))
(channels [this] 2)
(chunks [this sample-rate]
(combine-chunks (fn stereo-fn [samples [head1] offset1 [head2] offset2]
[(dbl-asub head1 offset1 (+ offset1 samples))
(dbl-asub head2 offset2 (+ offset2 samples))])
(chunks l sample-rate)
(chunks r sample-rate))))))
(defn pan
"Takes a two-channel sound and mixes the channels together by
`amount`, a float on the range [0.0, 1.0]. The ususal use is to take
a sound with separate left and right channels and combine them so
each appears closer to stereo center. An `amount` of 0.0 would leave
both channels unchanged, 0.5 would result in both channels being the
same (i.e. appearing to be mixed to stereo center), and 1.0 would
switch the channels."
[s ^double amount]
{:pre [(= 2 (channels s))]}
(let [amount-complement (- 1.0 amount)]
(reify SampledSound
(duration [this] (duration s))
(channels [this] 2)
(chunks [this sample-rate]
(map (fn [[arr1 arr2]]
[(dbl/amap [e1 arr1
e2 arr2]
(p/+ (p/* e1 amount-complement)
(p/* e2 amount)))
(dbl/amap [e1 arr1
e2 arr2]
(p/+ (p/* e1 amount)
(p/* e2 amount-complement)))])
(chunks s sample-rate))))))
;; TODO: maybe make these into functions that return operations rather
;; than sounds.
;;; Playback
;; TODO: This is identical to the one in sound.clj. Merge them if we
;; don't get rid of sound.clj
(defmacro shortify
"Takes a floating-point number f in the range [-1.0, 1.0] and scales
it to the range of a 16-bit integer. Clamps any overflows."
[f]
(let [max-short-as-double (double Short/MAX_VALUE)]
`(let [clamped# (-> ~f (min 1.0) (max -1.0))]
(short (p/* ~max-short-as-double clamped#)))))
(defn- sample-provider
[s ^LinkedBlockingQueue q ^long sample-rate]
(let [chans (channels s)]
(future
(loop [[head-chunk & more] (chunks s sample-rate)]
(if-not head-chunk
(.put q ::eof)
(let [chunk-len (dbl/alength (first head-chunk))
byte-count (p/* chans 2 chunk-len)
bb (ByteBuffer/allocate byte-count)
buffer (byte-array byte-count)]
(dotimes [n chunk-len]
;; TODO: Find a more efficient way to do this
(doseq [arr head-chunk]
(.putShort bb (shortify (dbl/aget arr n)))))
(.position bb 0)
(.get bb buffer)
;; Bail if the player gets too far behind
(when (.offer q buffer 2 java.util.concurrent.TimeUnit/SECONDS)
(recur more))))))))
;; TODO: This is identical to the one in sound.clj. Merge them if we
;; don't get rid of sound.clj
(defn play
"Plays `s` asynchronously. Returns a value that can be passed to `stop`."
[s]
(let [sample-rate 44100
chans (channels s)
sdl (AudioSystem/getSourceDataLine (AudioFormat. sample-rate
16
chans
true
true))
stopped (atom false)
q (LinkedBlockingQueue. 10)
provider (sample-provider s q sample-rate)]
{:player (future (.open sdl)
(loop [buf ^bytes (.take q)]
(when-not (or @stopped (= buf ::eof))
(.write sdl buf 0 (alength buf))
(.start sdl) ;; Doesn't hurt to do it more than once
(recur (.take q)))))
:stop (fn []
(reset! stopped true)
(future-cancel provider)
(.stop sdl))
:q q
:provider provider
:sdl sdl}))
(defn stop
"Stops playing the sound represented by `player` (returned from `play`)."
[player]
((:stop player)))
;;; Serialization
(defn- sampled-input-stream
"Returns an implementation of `InputStream` over the data in `s`."
[s sample-rate]
(let [;; Empty chunks, while valid, will screw us over by causing us
;; to return zero from read
useful-chunks (remove (fn [[arr]] (== 0 (dbl/alength arr)))
(chunks s sample-rate))
chunks-remaining (atom useful-chunks)
offset (atom 0)
chans (channels s)]
(proxy [java.io.InputStream] []
(available [] (-> (duration s) (* sample-rate) long (* (channels s) 2)))
(close [])
(mark [readLimit] (throw (UnsupportedOperationException.)))
(markSupported [] false)
(read ^int
([] (throw (ex-info "Not implemented" {:reason :not-implemented})))
([^bytes buf] (.read ^java.io.InputStream this buf 0 (alength buf)))
([^bytes buf off len]
(if-not @chunks-remaining
-1
(let [[head-chunk & more-chunks] @chunks-remaining
chunk-frames (dbl/alength (first head-chunk))
start-frame (long @offset)
chunk-frames-remaining (- chunk-frames start-frame)
chunk-bytes-remaining (* chunk-frames-remaining 2 chans)
frames-requested (/ len 2 chans)
read-remainder? (<= chunk-frames-remaining frames-requested)
frames-to-read (if read-remainder?
chunk-frames-remaining
frames-requested)
bytes-to-read (if read-remainder? chunk-bytes-remaining len)
bb (ByteBuffer/allocate bytes-to-read)]
(when (zero? bytes-to-read)
(throw (ex-info "Zero bytes requested"
{:reason :no-bytes-requested
:off off
:len len
:start-frame start-frame
:chunk-frames chunk-frames
:chunk-frames-remaining chunk-frames-remaining
:frames-requested frames-requested
:read-remainder? read-remainder?
:frames-to-read frames-to-read
:bytes-to-read bytes-to-read})))
(dotimes [n frames-to-read]
;; TODO: Find a more efficient way to do this
(doseq [arr head-chunk]
(.putShort bb (shortify (dbl/aget arr (p/+ start-frame n))))))
(.position bb 0)
(.get bb buf off bytes-to-read)
(if read-remainder?
(do (reset! chunks-remaining more-chunks)
(reset! offset 0))
(swap! offset + frames-to-read))
bytes-to-read))))
(reset [] (throw (UnsupportedOperationException.)))
(skip [n] (throw (ex-info "Not implemented" {:reason :not-implemented}))))))
(defn save
"Save sound `s` to `path` as a 16-bit WAV at `sample-rate`."
[s path sample-rate]
(AudioSystem/write (AudioInputStream.
(sampled-input-stream s sample-rate)
(AudioFormat. sample-rate 16 (channels s) true true)
(-> s duration (* sample-rate) long))
AudioFileFormat$Type/WAVE
(io/file path)))
;;; Visualization
(defn- every-nth
"Given a sequence of double arrays, return a collection holding
every `n`th sample."
[arrays period]
(loop [remaining arrays
n period
acc []]
(let [[head & more] remaining
head-length (when head (dbl/alength head))]
(if head
(if (< n head-length)
(recur remaining (+ n period) (conj acc (dbl/aget head n)))
(recur more (- n head-length) acc))
acc))))
;; TODO: There's definitely a protocol to be extracted here, assuming
;; the continuous-time stuff lives on.
(defn visualize
"Visualizes channel `c` (default 0) of `s` by plotting it on a graph."
([s] (visualize s 0))
([s c]
(let [num-data-points 4000
;; For short sounds, we need to sample at a higher rate, or
;; the graph won't be smooth enough. For longer sounds, we
;; can get away with a lower rate.
sample-rate (if (< (/ num-data-points 16000) (duration s))
16000
44100)
channel-chunks (map #(nth % c) (chunks s sample-rate))
num-samples (-> s duration (* sample-rate) long)
sample-period (max 1 (-> num-samples (/ num-data-points) long))
indexes (range 0 num-samples sample-period)
times (map #(/ (double %) sample-rate) indexes)
samples (every-nth channel-chunks sample-period)]
(incanter/view
(charts/set-stroke-color
(charts/xy-plot
times
samples)
java.awt.Color/black))
)))

View File

@ -0,0 +1,4 @@
(ns app.test.aggregate
(:use app.blocklist
app.wav
clojure.test))

View File

@ -0,0 +1,14 @@
(ns app.test.block
(:use app.block
clojure.test)
(:require [hiphip.double :as v]))
(deftest block
(let [block (build (v/amake [_ 512] 1))]
(is (= 512 (count (:fft block))))
(is (= 13 (count (:mfcc block))))
(let [block2 (build (v/amake [_ 512] 0))]
(is (not (= 0 (diff block block2 0))))
(is (not (= 0 (diff block block2 1))))
(is (not (= 0 (diff block block2 0.5)))))))

View File

@ -0,0 +1,11 @@
(ns app.test.blocklist
(:use app.blocklist
app.wav
clojure.test))
(deftest blocklist
(is (= 86 (count (build (sinusoid 1 440) 44100 512))))
(is (= 2 (count (build (sinusoid 1 440) 44100 22049))))
(let [bl (build (sinusoid 1 440) 44100 512)]
(is (not (= false (second (search bl (first bl) 0)))))
(is (= 0.0 (first (search bl (first bl) 0))))))

View File

@ -0,0 +1,6 @@
(ns app.test.core
(:use [app.core])
(:use [clojure.test]))
(deftest replace-me ;; FIXME: write
(is true "No tests have been written."))

View File

@ -0,0 +1,24 @@
(ns app.test.listen
(:use [app.listen])
(:use [clojure.test]))
(deftest fading-test
(is (= (seq [0.0 0.5 1.0 0.5 0.0]) (seq (fadeinout! (double-array [1 1 1 1 1]) 2 2))))
(is (= (seq[0.0 1.0 0.0]) (seq (fadeinout! (double-array [1 1 1]) 1 1)))))
(deftest normalise-test
(is (= (seq[0.0 1.0 0.0]) (seq (normalise! (double-array [0 10 0])))))
(is (= (seq[0.0 -1.0 0.0]) (seq (normalise! (double-array [0 -10 0]))))))
(deftest fft-test
(is 9 (count (fftify (double-array [1 0 0 0 1 0 0 0 1])))))
(deftest mfcc-test
(is 13 (count (first (mfccify (double-array [1 0 0 0 1 0 0 0 1]))))))
(deftest diff-test
(is 1.0 (diff (double-array [1 2 3 4]) (double-array [1 2 3 5])))
(is 1.0 (diff (double-array [1 3 3 4]) (double-array [1 2 3 4])))
(is 0.1 (diff (double-array [1 3 3.1 4]) (double-array [1 3 3 4])))
(is 0.1 (diff (double-array [0.9 3 3.1 4]) (double-array [0.9 2.9 3.1 4])))
)

69
test.cpp Normal file
View File

@ -0,0 +1,69 @@
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <vector>
#include <cmath>
using namespace std;
class desc {
public:
desc() {
randomise();
}
void randomise() {
for (unsigned int i=0; i<13; ++i) {
m_data[i]=rand()%1000/1000.0f;
}
}
float m_data[13];
};
class test {
public:
test(int brain_size) {
for (int i=0; i<brain_size; i++) {
m_brain.push_back(new desc());
}
}
vector<desc*> m_brain;
float dist(const desc &a, const desc &b) {
float ret = 0;
for(unsigned int i=0; i<13; ++i) {
ret += abs(a.m_data[i]-b.m_data[i]);
}
}
int find_closest(const desc &a) {
float closest = 999999999999;
unsigned int closest_index = 0;
unsigned int index = 0;
for (vector<desc*>::iterator i=m_brain.begin(); i!=m_brain.end(); ++i) {
float d = dist(a,**i);
if (d<closest) {
closest = d;
closest_index = index;
}
index++;
}
return closest_index;
}
};
int main() {
auto func = [] () { cout << "Hello world"; };
func(); // now call the function
/* test t=test(999);
desc d;
for (unsigned int i=0; i<99999; i++) {
t.find_closest(d);
d.randomise();
}
*/
return 0;
}

130
transponge-mfcc.py Normal file
View File

@ -0,0 +1,130 @@
import numpy as np
import scipy.io.wavfile
from features import mfcc
from features import logfbank
from features import base
def fadeinout(s,slength,elength):
for i in range(0,slength):
m = float(i)/slength;
s[i]*=m
for i in range(0,elength):
m = float(i)/elength;
s[(len(s)-1)-i]*=m
return s
def normalise(s):
m = 0
for i in range(0,len(s)):
if m<s[i]: m=s[i]
if m>0:
s/=float(m/10000.0)
return s
def chop(wav,size,overlap):
ret = []
pos = 0
seg = []
samples = wav[1]
while (pos+size<len(samples)):
ret.append(fadeinout(samples[pos:pos+size],50,100))
pos+=(size-overlap)
return ret
def fftify(chopped):
return map(lambda i: np.fft.fft(i), chopped)
def mfccify(chopped,rate):
ret = []
for sig in chopped:
ret.append(logfbank(sig,rate))
return ret
def fftdiff(a,b):
return (abs(a-b)).sum(dtype=np.float128)
def diffify(a,b):
return map(lambda a,b: fftdiff(a,b), a, b)
def search(fft,bank):
closest = 99999999999999999
ret = -1
for i,tfft in enumerate(bank):
dist = fftdiff(fft,tfft)
if dist<closest:
ret = i
closest = dist
print(ret)
return ret
def unit_test():
print(fftdiff(np.array([0,0,0,0]),np.array([1,1,1,1])))
#assert(fftdiff(np.array([0,0,0,0]),np.array([1,1,1,1]))==1)
print(fftdiff(np.array([-100,-1000,0,0]),np.array([-1,-1,-1,-1])))
print(fadeinout(np.array([10,10,10,10,10,10,10]),3))
#unit_test()
class transponge():
def __init__(self,chp_size,chp_overlap,dst_filename):
dst = scipy.io.wavfile.read(dst_filename)
self.src_chp=[]
self.src_fft=[]
self.chp_size = chp_size
self.chp_overlap = chp_overlap
self.dst_chp = chop(dst,self.chp_size,self.chp_overlap)
print(self.chp_overlap)
print (len(self.dst_chp))
#self.dst_fft = mfccify(self.dst_chp,dst[0])
self.dst_fft = fftify(self.dst_chp)
self.dst_chp = [] # clear
self.dst_size = len(dst[1])
def add(self,src_filename):
src = scipy.io.wavfile.read(src_filename)
src_chp=chop(src,self.chp_size,self.chp_overlap)
self.src_chp+=src_chp
#self.src_fft+=mfccify(src_chp,src[0])
self.src_fft+=fftify(src_chp)
def process(self):
out = np.zeros(self.dst_size,dtype=self.src_chp[0].dtype)
pos = 0
for i,seg in enumerate(self.dst_fft):
# collect indices of closest sections
ii = search(seg,self.src_fft)
for s in range(0,self.chp_size):
if pos+s<self.dst_size:
out[pos+s]+=self.src_chp[ii][s]*0.5
pos+=(self.chp_size-self.chp_overlap)
print((i/float(len(self.dst_fft)))*100.0)
if i%10==0: scipy.io.wavfile.write("mfcc-outr.wav",44100,out)
def render(self):
t = []
ret = np.zeros(self.dst_size,dtype=self.src_chp[0].dtype)
pos = 0
for i in self.indices:
#t.append(self.src_chp[i])
#print(pos)
for s in range(0,self.chp_size):
if pos+s<self.dst_size:
ret[pos+s]+=self.src_chp[i][s]*0.5
pos+=(self.chp_size-self.chp_overlap)
return ret
#return np.concatenate(t)
def run(l):
t = transponge(l,int(l*0.75),"pw2.wav")
# t.add("water.wav")
# t.add("cumbia.wav")
# t.add("pista07.wav")
t.add("sailingbybit.wav")
# t.add("full.wav")
print("processing")
t.process()
run(68)