This commit is contained in:
Dave Griffiths
2015-07-07 20:41:53 +01:00
commit 35b84c6da5
73 changed files with 11369 additions and 0 deletions

BIN
python/_magicsquares.so Executable file

Binary file not shown.

View File

@ -0,0 +1 @@
from .base import *

Binary file not shown.

173
python/features/base.py Normal file
View File

@ -0,0 +1,173 @@
# calculate filterbank features. Provides e.g. fbank and mfcc features for use in ASR applications
# Author: James Lyons 2012
import numpy
from features import sigproc
from scipy.fftpack import dct
# make it python3.x compatible
try:
xrange(1)
except:
xrange=range
def mfcc(signal,samplerate=16000,winlen=0.025,winstep=0.01,numcep=13,
nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97,ceplifter=22,appendEnergy=True):
"""Compute MFCC features from an audio signal.
:param signal: the audio signal from which to compute features. Should be an N*1 array
:param samplerate: the samplerate of the signal we are working with.
:param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
:param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
:param numcep: the number of cepstrum to return, default 13
:param nfilt: the number of filters in the filterbank, default 26.
:param nfft: the FFT size. Default is 512.
:param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
:param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
:param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
:param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22.
:param appendEnergy: if this is true, the zeroth cepstral coefficient is replaced with the log of the total frame energy.
:returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector.
"""
feat,energy = fbank(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,preemph)
feat = numpy.log(feat)
feat = dct(feat, type=2, axis=1, norm='ortho')[:,:numcep]
feat = lifter(feat,ceplifter)
if appendEnergy: feat[:,0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy
return feat
def fbank(signal,samplerate=16000,winlen=0.025,winstep=0.01,
nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97):
"""Compute Mel-filterbank energy features from an audio signal.
:param signal: the audio signal from which to compute features. Should be an N*1 array
:param samplerate: the samplerate of the signal we are working with.
:param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
:param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
:param nfilt: the number of filters in the filterbank, default 26.
:param nfft: the FFT size. Default is 512.
:param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
:param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
:param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
:returns: 2 values. The first is a numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector. The
second return value is the energy in each frame (total energy, unwindowed)
"""
highfreq= highfreq or samplerate/2
signal = sigproc.preemphasis(signal,preemph)
frames = sigproc.framesig(signal, winlen*samplerate, winstep*samplerate)
pspec = sigproc.powspec(frames,nfft)
energy = numpy.sum(pspec,1) # this stores the total energy in each frame
energy = numpy.where(energy == 0,numpy.finfo(float).eps,energy) # if energy is zero, we get problems with log
fb = get_filterbanks(nfilt,nfft,samplerate,lowfreq,highfreq)
feat = numpy.dot(pspec,fb.T) # compute the filterbank energies
feat = numpy.where(feat == 0,numpy.finfo(float).eps,feat) # if feat is zero, we get problems with log
return feat,energy
def logfbank(signal,samplerate=16000,winlen=0.025,winstep=0.01,
nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97):
"""Compute log Mel-filterbank energy features from an audio signal.
:param signal: the audio signal from which to compute features. Should be an N*1 array
:param samplerate: the samplerate of the signal we are working with.
:param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
:param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
:param nfilt: the number of filters in the filterbank, default 26.
:param nfft: the FFT size. Default is 512.
:param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
:param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
:param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
:returns: A numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector.
"""
feat,energy = fbank(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,preemph)
return numpy.log(feat)
def ssc(signal,samplerate=16000,winlen=0.025,winstep=0.01,
nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97):
"""Compute Spectral Subband Centroid features from an audio signal.
:param signal: the audio signal from which to compute features. Should be an N*1 array
:param samplerate: the samplerate of the signal we are working with.
:param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
:param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
:param nfilt: the number of filters in the filterbank, default 26.
:param nfft: the FFT size. Default is 512.
:param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
:param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
:param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
:returns: A numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector.
"""
highfreq= highfreq or samplerate/2
signal = sigproc.preemphasis(signal,preemph)
frames = sigproc.framesig(signal, winlen*samplerate, winstep*samplerate)
pspec = sigproc.powspec(frames,nfft)
pspec = numpy.where(pspec == 0,numpy.finfo(float).eps,pspec) # if things are all zeros we get problems
fb = get_filterbanks(nfilt,nfft,samplerate,lowfreq,highfreq)
feat = numpy.dot(pspec,fb.T) # compute the filterbank energies
R = numpy.tile(numpy.linspace(1,samplerate/2,numpy.size(pspec,1)),(numpy.size(pspec,0),1))
return numpy.dot(pspec*R,fb.T) / feat
def hz2mel(hz):
"""Convert a value in Hertz to Mels
:param hz: a value in Hz. This can also be a numpy array, conversion proceeds element-wise.
:returns: a value in Mels. If an array was passed in, an identical sized array is returned.
"""
return 2595 * numpy.log10(1+hz/700.0)
def mel2hz(mel):
"""Convert a value in Mels to Hertz
:param mel: a value in Mels. This can also be a numpy array, conversion proceeds element-wise.
:returns: a value in Hertz. If an array was passed in, an identical sized array is returned.
"""
return 700*(10**(mel/2595.0)-1)
def get_filterbanks(nfilt=20,nfft=512,samplerate=16000,lowfreq=0,highfreq=None):
"""Compute a Mel-filterbank. The filters are stored in the rows, the columns correspond
to fft bins. The filters are returned as an array of size nfilt * (nfft/2 + 1)
:param nfilt: the number of filters in the filterbank, default 20.
:param nfft: the FFT size. Default is 512.
:param samplerate: the samplerate of the signal we are working with. Affects mel spacing.
:param lowfreq: lowest band edge of mel filters, default 0 Hz
:param highfreq: highest band edge of mel filters, default samplerate/2
:returns: A numpy array of size nfilt * (nfft/2 + 1) containing filterbank. Each row holds 1 filter.
"""
highfreq= highfreq or samplerate/2
assert highfreq <= samplerate/2, "highfreq is greater than samplerate/2"
# compute points evenly spaced in mels
lowmel = hz2mel(lowfreq)
highmel = hz2mel(highfreq)
melpoints = numpy.linspace(lowmel,highmel,nfilt+2)
# our points are in Hz, but we use fft bins, so we have to convert
# from Hz to fft bin number
bin = numpy.floor((nfft+1)*mel2hz(melpoints)/samplerate)
fbank = numpy.zeros([nfilt,nfft/2+1])
for j in xrange(0,nfilt):
for i in xrange(int(bin[j]),int(bin[j+1])):
fbank[j,i] = (i - bin[j])/(bin[j+1]-bin[j])
for i in xrange(int(bin[j+1]),int(bin[j+2])):
fbank[j,i] = (bin[j+2]-i)/(bin[j+2]-bin[j+1])
return fbank
def lifter(cepstra,L=22):
"""Apply a cepstral lifter the the matrix of cepstra. This has the effect of increasing the
magnitude of the high frequency DCT coeffs.
:param cepstra: the matrix of mel-cepstra, will be numframes * numcep in size.
:param L: the liftering coefficient to use. Default is 22. L <= 0 disables lifter.
"""
if L > 0:
nframes,ncoeff = numpy.shape(cepstra)
n = numpy.arange(ncoeff)
lift = 1+ (L/2)*numpy.sin(numpy.pi*n/L)
return lift*cepstra
else:
# values of L <= 0, do nothing
return cepstra

BIN
python/features/base.pyc Normal file

Binary file not shown.

113
python/features/sigproc.py Normal file
View File

@ -0,0 +1,113 @@
# This file includes routines for basic signal processing including framing and computing power spectra.
# Author: James Lyons 2012
import numpy
import math
def framesig(sig,frame_len,frame_step,winfunc=lambda x:numpy.ones((1,x))):
"""Frame a signal into overlapping frames.
:param sig: the audio signal to frame.
:param frame_len: length of each frame measured in samples.
:param frame_step: number of samples after the start of the previous frame that the next frame should begin.
:param winfunc: the analysis window to apply to each frame. By default no window is applied.
:returns: an array of frames. Size is NUMFRAMES by frame_len.
"""
slen = len(sig)
frame_len = int(round(frame_len))
frame_step = int(round(frame_step))
if slen <= frame_len:
numframes = 1
else:
numframes = 1 + int(math.ceil((1.0*slen - frame_len)/frame_step))
padlen = int((numframes-1)*frame_step + frame_len)
zeros = numpy.zeros((padlen - slen,))
padsignal = numpy.concatenate((sig,zeros))
indices = numpy.tile(numpy.arange(0,frame_len),(numframes,1)) + numpy.tile(numpy.arange(0,numframes*frame_step,frame_step),(frame_len,1)).T
indices = numpy.array(indices,dtype=numpy.int32)
frames = padsignal[indices]
win = numpy.tile(winfunc(frame_len),(numframes,1))
return frames*win
def deframesig(frames,siglen,frame_len,frame_step,winfunc=lambda x:numpy.ones((1,x))):
"""Does overlap-add procedure to undo the action of framesig.
:param frames: the array of frames.
:param siglen: the length of the desired signal, use 0 if unknown. Output will be truncated to siglen samples.
:param frame_len: length of each frame measured in samples.
:param frame_step: number of samples after the start of the previous frame that the next frame should begin.
:param winfunc: the analysis window to apply to each frame. By default no window is applied.
:returns: a 1-D signal.
"""
frame_len = round(frame_len)
frame_step = round(frame_step)
numframes = numpy.shape(frames)[0]
assert numpy.shape(frames)[1] == frame_len, '"frames" matrix is wrong size, 2nd dim is not equal to frame_len'
indices = numpy.tile(numpy.arange(0,frame_len),(numframes,1)) + numpy.tile(numpy.arange(0,numframes*frame_step,frame_step),(frame_len,1)).T
indices = numpy.array(indices,dtype=numpy.int32)
padlen = (numframes-1)*frame_step + frame_len
if siglen <= 0: siglen = padlen
rec_signal = numpy.zeros((1,padlen))
window_correction = numpy.zeros((1,padlen))
win = winfunc(frame_len)
for i in range(0,numframes):
window_correction[indices[i,:]] = window_correction[indices[i,:]] + win + 1e-15 #add a little bit so it is never zero
rec_signal[indices[i,:]] = rec_signal[indices[i,:]] + frames[i,:]
rec_signal = rec_signal/window_correction
return rec_signal[0:siglen]
def magspec(frames,NFFT):
"""Compute the magnitude spectrum of each frame in frames. If frames is an NxD matrix, output will be NxNFFT.
:param frames: the array of frames. Each row is a frame.
:param NFFT: the FFT length to use. If NFFT > frame_len, the frames are zero-padded.
:returns: If frames is an NxD matrix, output will be NxNFFT. Each row will be the magnitude spectrum of the corresponding frame.
"""
complex_spec = numpy.fft.rfft(frames,NFFT)
return numpy.absolute(complex_spec)
def powspec(frames,NFFT):
"""Compute the power spectrum of each frame in frames. If frames is an NxD matrix, output will be NxNFFT.
:param frames: the array of frames. Each row is a frame.
:param NFFT: the FFT length to use. If NFFT > frame_len, the frames are zero-padded.
:returns: If frames is an NxD matrix, output will be NxNFFT. Each row will be the power spectrum of the corresponding frame.
"""
return 1.0/NFFT * numpy.square(magspec(frames,NFFT))
def logpowspec(frames,NFFT,norm=1):
"""Compute the log power spectrum of each frame in frames. If frames is an NxD matrix, output will be NxNFFT.
:param frames: the array of frames. Each row is a frame.
:param NFFT: the FFT length to use. If NFFT > frame_len, the frames are zero-padded.
:param norm: If norm=1, the log power spectrum is normalised so that the max value (across all frames) is 1.
:returns: If frames is an NxD matrix, output will be NxNFFT. Each row will be the log power spectrum of the corresponding frame.
"""
ps = powspec(frames,NFFT);
ps[ps<=1e-30] = 1e-30
lps = 10*numpy.log10(ps)
if norm:
return lps - numpy.max(lps)
else:
return lps
def preemphasis(signal,coeff=0.95):
"""perform preemphasis on the input signal.
:param signal: The signal to filter.
:param coeff: The preemphasis coefficient. 0 is no filter, default is 0.95.
:returns: the filtered signal.
"""
return numpy.append(signal[0],signal[1:]-coeff*signal[:-1])

BIN
python/features/sigproc.pyc Normal file

Binary file not shown.

438
python/magicsquares.py Normal file
View File

@ -0,0 +1,438 @@
# This file was automatically generated by SWIG (http://www.swig.org).
# Version 2.0.4
#
# Do not make changes to this file unless you know what you are doing--modify
# the SWIG interface file instead.
from sys import version_info
if version_info >= (2,6,0):
def swig_import_helper():
from os.path import dirname
import imp
fp = None
try:
fp, pathname, description = imp.find_module('_magicsquares', [dirname(__file__)])
except ImportError:
import _magicsquares
return _magicsquares
if fp is not None:
try:
_mod = imp.load_module('_magicsquares', fp, pathname, description)
finally:
fp.close()
return _mod
_magicsquares = swig_import_helper()
del swig_import_helper
else:
import _magicsquares
del version_info
try:
_swig_property = property
except NameError:
pass # Python < 2.2 doesn't have 'property'.
def _swig_setattr_nondynamic(self,class_type,name,value,static=1):
if (name == "thisown"): return self.this.own(value)
if (name == "this"):
if type(value).__name__ == 'SwigPyObject':
self.__dict__[name] = value
return
method = class_type.__swig_setmethods__.get(name,None)
if method: return method(self,value)
if (not static):
self.__dict__[name] = value
else:
raise AttributeError("You cannot add attributes to %s" % self)
def _swig_setattr(self,class_type,name,value):
return _swig_setattr_nondynamic(self,class_type,name,value,0)
def _swig_getattr(self,class_type,name):
if (name == "thisown"): return self.this.own()
method = class_type.__swig_getmethods__.get(name,None)
if method: return method(self)
raise AttributeError(name)
def _swig_repr(self):
try: strthis = "proxy of " + self.this.__repr__()
except: strthis = ""
return "<%s.%s; %s >" % (self.__class__.__module__, self.__class__.__name__, strthis,)
try:
_object = object
_newclass = 1
except AttributeError:
class _object : pass
_newclass = 0
class SwigPyIterator(_object):
__swig_setmethods__ = {}
__setattr__ = lambda self, name, value: _swig_setattr(self, SwigPyIterator, name, value)
__swig_getmethods__ = {}
__getattr__ = lambda self, name: _swig_getattr(self, SwigPyIterator, name)
def __init__(self, *args, **kwargs): raise AttributeError("No constructor defined - class is abstract")
__repr__ = _swig_repr
__swig_destroy__ = _magicsquares.delete_SwigPyIterator
__del__ = lambda self : None;
def value(self): return _magicsquares.SwigPyIterator_value(self)
def incr(self, n = 1): return _magicsquares.SwigPyIterator_incr(self, n)
def decr(self, n = 1): return _magicsquares.SwigPyIterator_decr(self, n)
def distance(self, *args): return _magicsquares.SwigPyIterator_distance(self, *args)
def equal(self, *args): return _magicsquares.SwigPyIterator_equal(self, *args)
def copy(self): return _magicsquares.SwigPyIterator_copy(self)
def next(self): return _magicsquares.SwigPyIterator_next(self)
def __next__(self): return _magicsquares.SwigPyIterator___next__(self)
def previous(self): return _magicsquares.SwigPyIterator_previous(self)
def advance(self, *args): return _magicsquares.SwigPyIterator_advance(self, *args)
def __eq__(self, *args): return _magicsquares.SwigPyIterator___eq__(self, *args)
def __ne__(self, *args): return _magicsquares.SwigPyIterator___ne__(self, *args)
def __iadd__(self, *args): return _magicsquares.SwigPyIterator___iadd__(self, *args)
def __isub__(self, *args): return _magicsquares.SwigPyIterator___isub__(self, *args)
def __add__(self, *args): return _magicsquares.SwigPyIterator___add__(self, *args)
def __sub__(self, *args): return _magicsquares.SwigPyIterator___sub__(self, *args)
def __iter__(self): return self
SwigPyIterator_swigregister = _magicsquares.SwigPyIterator_swigregister
SwigPyIterator_swigregister(SwigPyIterator)
class PCA(_object):
__swig_setmethods__ = {}
__setattr__ = lambda self, name, value: _swig_setattr(self, PCA, name, value)
__swig_getmethods__ = {}
__getattr__ = lambda self, name: _swig_getattr(self, PCA, name)
__repr__ = _swig_repr
def __init__(self, *args):
this = _magicsquares.new_PCA(*args)
try: self.this.append(this)
except: self.this = this
__swig_destroy__ = _magicsquares.delete_PCA
__del__ = lambda self : None;
def AddFeature(self, *args): return _magicsquares.PCA_AddFeature(self, *args)
def Calculate(self): return _magicsquares.PCA_Calculate(self)
def Compress(self, *args): return _magicsquares.PCA_Compress(self, *args)
def Mult(self, *args): return _magicsquares.PCA_Mult(self, *args)
def Project(self, *args): return _magicsquares.PCA_Project(self, *args)
def Synth(self, *args): return _magicsquares.PCA_Synth(self, *args)
__swig_getmethods__["RunTests"] = lambda x: _magicsquares.PCA_RunTests
if _newclass:RunTests = staticmethod(_magicsquares.PCA_RunTests)
def GetEigenValues(self): return _magicsquares.PCA_GetEigenValues(self)
def GetEigenTransform(self): return _magicsquares.PCA_GetEigenTransform(self)
def EigenTransform(self): return _magicsquares.PCA_EigenTransform(self)
def GetFeatures(self): return _magicsquares.PCA_GetFeatures(self)
def GetMean(self): return _magicsquares.PCA_GetMean(self)
def GetFeatureSize(self): return _magicsquares.PCA_GetFeatureSize(self)
def GetParamsSize(self): return _magicsquares.PCA_GetParamsSize(self)
def Load(self, *args): return _magicsquares.PCA_Load(self, *args)
def Save(self, *args): return _magicsquares.PCA_Save(self, *args)
PCA_swigregister = _magicsquares.PCA_swigregister
PCA_swigregister(PCA)
def PCA_RunTests():
return _magicsquares.PCA_RunTests()
PCA_RunTests = _magicsquares.PCA_RunTests
class Classifier(_object):
__swig_setmethods__ = {}
__setattr__ = lambda self, name, value: _swig_setattr(self, Classifier, name, value)
__swig_getmethods__ = {}
__getattr__ = lambda self, name: _swig_getattr(self, Classifier, name)
def __init__(self, *args, **kwargs): raise AttributeError("No constructor defined - class is abstract")
__repr__ = _swig_repr
__swig_destroy__ = _magicsquares.delete_Classifier
__del__ = lambda self : None;
def AddFeature(self, *args): return _magicsquares.Classifier_AddFeature(self, *args)
def Classify(self, *args): return _magicsquares.Classifier_Classify(self, *args)
def GroupExists(self, *args): return _magicsquares.Classifier_GroupExists(self, *args)
def GetGroupMean(self, *args): return _magicsquares.Classifier_GetGroupMean(self, *args)
Classifier_swigregister = _magicsquares.Classifier_swigregister
Classifier_swigregister(Classifier)
class LDAClassifier(Classifier):
__swig_setmethods__ = {}
for _s in [Classifier]: __swig_setmethods__.update(getattr(_s,'__swig_setmethods__',{}))
__setattr__ = lambda self, name, value: _swig_setattr(self, LDAClassifier, name, value)
__swig_getmethods__ = {}
for _s in [Classifier]: __swig_getmethods__.update(getattr(_s,'__swig_getmethods__',{}))
__getattr__ = lambda self, name: _swig_getattr(self, LDAClassifier, name)
def __init__(self, *args, **kwargs): raise AttributeError("No constructor defined - class is abstract")
__repr__ = _swig_repr
__swig_destroy__ = _magicsquares.delete_LDAClassifier
__del__ = lambda self : None;
def Classify(self, *args): return _magicsquares.LDAClassifier_Classify(self, *args)
LDAClassifier_swigregister = _magicsquares.LDAClassifier_swigregister
LDAClassifier_swigregister(LDAClassifier)
def matrix_inverse(*args):
return _magicsquares.matrix_inverse(*args)
matrix_inverse = _magicsquares.matrix_inverse
class Image(_object):
__swig_setmethods__ = {}
__setattr__ = lambda self, name, value: _swig_setattr(self, Image, name, value)
__swig_getmethods__ = {}
__getattr__ = lambda self, name: _swig_getattr(self, Image, name)
__repr__ = _swig_repr
def __init__(self, *args):
this = _magicsquares.new_Image(*args)
try: self.this.append(this)
except: self.this = this
__swig_destroy__ = _magicsquares.delete_Image
__del__ = lambda self : None;
def Clear(self): return _magicsquares.Image_Clear(self)
def Get(self, *args): return _magicsquares.Image_Get(self, *args)
def __sub__(self, *args): return _magicsquares.Image___sub__(self, *args)
def __add__(self, *args): return _magicsquares.Image___add__(self, *args)
def PrintInfo(self): return _magicsquares.Image_PrintInfo(self)
def Crop(self, *args): return _magicsquares.Image_Crop(self, *args)
def Scale(self, *args): return _magicsquares.Image_Scale(self, *args)
def SubImage(self, *args): return _magicsquares.Image_SubImage(self, *args)
def Blit(self, *args): return _magicsquares.Image_Blit(self, *args)
def SSD(self, *args): return _magicsquares.Image_SSD(self, *args)
def SubMean(self): return _magicsquares.Image_SubMean(self)
def LBP(self): return _magicsquares.Image_LBP(self)
def GRAY2RGB(self): return _magicsquares.Image_GRAY2RGB(self)
def RGB2GRAY(self): return _magicsquares.Image_RGB2GRAY(self)
def BayerGB2RGB(self): return _magicsquares.Image_BayerGB2RGB(self)
def Hist(self, *args): return _magicsquares.Image_Hist(self, *args)
def ToFloatVector(self): return _magicsquares.Image_ToFloatVector(self)
def NumElements(self): return _magicsquares.Image_NumElements(self)
def Save(self, *args): return _magicsquares.Image_Save(self, *args)
def GetBB(self, *args): return _magicsquares.Image_GetBB(self, *args)
__swig_setmethods__["m_Image"] = _magicsquares.Image_m_Image_set
__swig_getmethods__["m_Image"] = _magicsquares.Image_m_Image_get
if _newclass:m_Image = _swig_property(_magicsquares.Image_m_Image_get, _magicsquares.Image_m_Image_set)
Image_swigregister = _magicsquares.Image_swigregister
Image_swigregister(Image)
class PCAClassifier(Classifier):
__swig_setmethods__ = {}
for _s in [Classifier]: __swig_setmethods__.update(getattr(_s,'__swig_setmethods__',{}))
__setattr__ = lambda self, name, value: _swig_setattr(self, PCAClassifier, name, value)
__swig_getmethods__ = {}
for _s in [Classifier]: __swig_getmethods__.update(getattr(_s,'__swig_getmethods__',{}))
__getattr__ = lambda self, name: _swig_getattr(self, PCAClassifier, name)
__repr__ = _swig_repr
def __init__(self, *args):
this = _magicsquares.new_PCAClassifier(*args)
try: self.this.append(this)
except: self.this = this
__swig_destroy__ = _magicsquares.delete_PCAClassifier
__del__ = lambda self : None;
def AddFeature(self, *args): return _magicsquares.PCAClassifier_AddFeature(self, *args)
def Classify(self, *args): return _magicsquares.PCAClassifier_Classify(self, *args)
def GetPCA(self): return _magicsquares.PCAClassifier_GetPCA(self)
PCAClassifier_swigregister = _magicsquares.PCAClassifier_swigregister
PCAClassifier_swigregister(PCAClassifier)
def dsvd(*args):
return _magicsquares.dsvd(*args)
dsvd = _magicsquares.dsvd
def SVD(*args):
return _magicsquares.SVD(*args)
SVD = _magicsquares.SVD
def OpenFile(*args):
return _magicsquares.OpenFile(*args)
OpenFile = _magicsquares.OpenFile
def CloseFile(*args):
return _magicsquares.CloseFile(*args)
CloseFile = _magicsquares.CloseFile
def Glob(*args):
return _magicsquares.Glob(*args)
Glob = _magicsquares.Glob
class Rect(_object):
__swig_setmethods__ = {}
__setattr__ = lambda self, name, value: _swig_setattr(self, Rect, name, value)
__swig_getmethods__ = {}
__getattr__ = lambda self, name: _swig_getattr(self, Rect, name)
__repr__ = _swig_repr
def __init__(self, *args):
this = _magicsquares.new_Rect(*args)
try: self.this.append(this)
except: self.this = this
__swig_destroy__ = _magicsquares.delete_Rect
__del__ = lambda self : None;
__swig_setmethods__["x"] = _magicsquares.Rect_x_set
__swig_getmethods__["x"] = _magicsquares.Rect_x_get
if _newclass:x = _swig_property(_magicsquares.Rect_x_get, _magicsquares.Rect_x_set)
__swig_setmethods__["y"] = _magicsquares.Rect_y_set
__swig_getmethods__["y"] = _magicsquares.Rect_y_get
if _newclass:y = _swig_property(_magicsquares.Rect_y_get, _magicsquares.Rect_y_set)
__swig_setmethods__["w"] = _magicsquares.Rect_w_set
__swig_getmethods__["w"] = _magicsquares.Rect_w_get
if _newclass:w = _swig_property(_magicsquares.Rect_w_get, _magicsquares.Rect_w_set)
__swig_setmethods__["h"] = _magicsquares.Rect_h_set
__swig_getmethods__["h"] = _magicsquares.Rect_h_get
if _newclass:h = _swig_property(_magicsquares.Rect_h_get, _magicsquares.Rect_h_set)
Rect_swigregister = _magicsquares.Rect_swigregister
Rect_swigregister(Rect)
class FaceFinder(_object):
__swig_setmethods__ = {}
__setattr__ = lambda self, name, value: _swig_setattr(self, FaceFinder, name, value)
__swig_getmethods__ = {}
__getattr__ = lambda self, name: _swig_getattr(self, FaceFinder, name)
__repr__ = _swig_repr
def __init__(self):
this = _magicsquares.new_FaceFinder()
try: self.this.append(this)
except: self.this = this
__swig_destroy__ = _magicsquares.delete_FaceFinder
__del__ = lambda self : None;
def Find(self, *args): return _magicsquares.FaceFinder_Find(self, *args)
FaceFinder_swigregister = _magicsquares.FaceFinder_swigregister
FaceFinder_swigregister(FaceFinder)
class FloatVector(_object):
__swig_setmethods__ = {}
__setattr__ = lambda self, name, value: _swig_setattr(self, FloatVector, name, value)
__swig_getmethods__ = {}
__getattr__ = lambda self, name: _swig_getattr(self, FloatVector, name)
__repr__ = _swig_repr
__swig_destroy__ = _magicsquares.delete_FloatVector
__del__ = lambda self : None;
def __init__(self, *args):
this = _magicsquares.new_FloatVector(*args)
try: self.this.append(this)
except: self.this = this
def Size(self): return _magicsquares.FloatVector_Size(self)
def GetRawData(self): return _magicsquares.FloatVector_GetRawData(self)
def GetRawDataConst(self): return _magicsquares.FloatVector_GetRawDataConst(self)
def Print(self): return _magicsquares.FloatVector_Print(self)
def SetAll(self, *args): return _magicsquares.FloatVector_SetAll(self, *args)
def Zero(self): return _magicsquares.FloatVector_Zero(self)
def IsInf(self): return _magicsquares.FloatVector_IsInf(self)
def Mean(self): return _magicsquares.FloatVector_Mean(self)
def DistanceFrom(self, *args): return _magicsquares.FloatVector_DistanceFrom(self, *args)
def Magnitude(self): return _magicsquares.FloatVector_Magnitude(self)
def Dot(self, *args): return _magicsquares.FloatVector_Dot(self, *args)
def Normalised(self): return _magicsquares.FloatVector_Normalised(self)
def __add__(self, *args): return _magicsquares.FloatVector___add__(self, *args)
def __sub__(self, *args): return _magicsquares.FloatVector___sub__(self, *args)
def __mul__(self, *args): return _magicsquares.FloatVector___mul__(self, *args)
def __div__(self, *args): return _magicsquares.FloatVector___div__(self, *args)
def __iadd__(self, *args): return _magicsquares.FloatVector___iadd__(self, *args)
def __isub__(self, *args): return _magicsquares.FloatVector___isub__(self, *args)
def __imul__(self, *args): return _magicsquares.FloatVector___imul__(self, *args)
def __idiv__(self, *args): return _magicsquares.FloatVector___idiv__(self, *args)
def Save(self, *args): return _magicsquares.FloatVector_Save(self, *args)
def Load(self, *args): return _magicsquares.FloatVector_Load(self, *args)
__swig_getmethods__["RunTests"] = lambda x: _magicsquares.FloatVector_RunTests
if _newclass:RunTests = staticmethod(_magicsquares.FloatVector_RunTests)
def __getitem__(self, *args): return _magicsquares.FloatVector___getitem__(self, *args)
def __setitem__(self, *args): return _magicsquares.FloatVector___setitem__(self, *args)
FloatVector_swigregister = _magicsquares.FloatVector_swigregister
FloatVector_swigregister(FloatVector)
def FloatVector_RunTests():
return _magicsquares.FloatVector_RunTests()
FloatVector_RunTests = _magicsquares.FloatVector_RunTests
class FloatMatrix(_object):
__swig_setmethods__ = {}
__setattr__ = lambda self, name, value: _swig_setattr(self, FloatMatrix, name, value)
__swig_getmethods__ = {}
__getattr__ = lambda self, name: _swig_getattr(self, FloatMatrix, name)
__repr__ = _swig_repr
__swig_destroy__ = _magicsquares.delete_FloatMatrix
__del__ = lambda self : None;
def __init__(self, *args):
this = _magicsquares.new_FloatMatrix(*args)
try: self.this.append(this)
except: self.this = this
def GetRows(self): return _magicsquares.FloatMatrix_GetRows(self)
def GetCols(self): return _magicsquares.FloatMatrix_GetCols(self)
def GetRawData(self): return _magicsquares.FloatMatrix_GetRawData(self)
def GetRawDataConst(self): return _magicsquares.FloatMatrix_GetRawDataConst(self)
def GetRowVector(self, *args): return _magicsquares.FloatMatrix_GetRowVector(self, *args)
def GetColVector(self, *args): return _magicsquares.FloatMatrix_GetColVector(self, *args)
def SetRowVector(self, *args): return _magicsquares.FloatMatrix_SetRowVector(self, *args)
def SetColVector(self, *args): return _magicsquares.FloatMatrix_SetColVector(self, *args)
def NormaliseRows(self): return _magicsquares.FloatMatrix_NormaliseRows(self)
def NormaliseCols(self): return _magicsquares.FloatMatrix_NormaliseCols(self)
def Print(self): return _magicsquares.FloatMatrix_Print(self)
def SetAll(self, *args): return _magicsquares.FloatMatrix_SetAll(self, *args)
def Zero(self): return _magicsquares.FloatMatrix_Zero(self)
def IsInf(self): return _magicsquares.FloatMatrix_IsInf(self)
def Transposed(self): return _magicsquares.FloatMatrix_Transposed(self)
def Inverted(self): return _magicsquares.FloatMatrix_Inverted(self)
def __add__(self, *args): return _magicsquares.FloatMatrix___add__(self, *args)
def __sub__(self, *args): return _magicsquares.FloatMatrix___sub__(self, *args)
def __mul__(self, *args): return _magicsquares.FloatMatrix___mul__(self, *args)
def VecMulTransposed(self, *args): return _magicsquares.FloatMatrix_VecMulTransposed(self, *args)
def __iadd__(self, *args): return _magicsquares.FloatMatrix___iadd__(self, *args)
def __isub__(self, *args): return _magicsquares.FloatMatrix___isub__(self, *args)
def __imul__(self, *args): return _magicsquares.FloatMatrix___imul__(self, *args)
def __eq__(self, *args): return _magicsquares.FloatMatrix___eq__(self, *args)
def SortRows(self, *args): return _magicsquares.FloatMatrix_SortRows(self, *args)
def SortCols(self, *args): return _magicsquares.FloatMatrix_SortCols(self, *args)
def CropRows(self, *args): return _magicsquares.FloatMatrix_CropRows(self, *args)
def CropCols(self, *args): return _magicsquares.FloatMatrix_CropCols(self, *args)
def Save(self, *args): return _magicsquares.FloatMatrix_Save(self, *args)
def Load(self, *args): return _magicsquares.FloatMatrix_Load(self, *args)
__swig_getmethods__["RunTests"] = lambda x: _magicsquares.FloatMatrix_RunTests
if _newclass:RunTests = staticmethod(_magicsquares.FloatMatrix_RunTests)
def Get(self, *args): return _magicsquares.FloatMatrix_Get(self, *args)
def Set(self, *args): return _magicsquares.FloatMatrix_Set(self, *args)
FloatMatrix_swigregister = _magicsquares.FloatMatrix_swigregister
FloatMatrix_swigregister(FloatMatrix)
def FloatMatrix_RunTests():
return _magicsquares.FloatMatrix_RunTests()
FloatMatrix_RunTests = _magicsquares.FloatMatrix_RunTests
class RectVector(_object):
__swig_setmethods__ = {}
__setattr__ = lambda self, name, value: _swig_setattr(self, RectVector, name, value)
__swig_getmethods__ = {}
__getattr__ = lambda self, name: _swig_getattr(self, RectVector, name)
__repr__ = _swig_repr
def iterator(self): return _magicsquares.RectVector_iterator(self)
def __iter__(self): return self.iterator()
def __nonzero__(self): return _magicsquares.RectVector___nonzero__(self)
def __bool__(self): return _magicsquares.RectVector___bool__(self)
def __len__(self): return _magicsquares.RectVector___len__(self)
def pop(self): return _magicsquares.RectVector_pop(self)
def __getslice__(self, *args): return _magicsquares.RectVector___getslice__(self, *args)
def __setslice__(self, *args): return _magicsquares.RectVector___setslice__(self, *args)
def __delslice__(self, *args): return _magicsquares.RectVector___delslice__(self, *args)
def __delitem__(self, *args): return _magicsquares.RectVector___delitem__(self, *args)
def __getitem__(self, *args): return _magicsquares.RectVector___getitem__(self, *args)
def __setitem__(self, *args): return _magicsquares.RectVector___setitem__(self, *args)
def append(self, *args): return _magicsquares.RectVector_append(self, *args)
def empty(self): return _magicsquares.RectVector_empty(self)
def size(self): return _magicsquares.RectVector_size(self)
def clear(self): return _magicsquares.RectVector_clear(self)
def swap(self, *args): return _magicsquares.RectVector_swap(self, *args)
def get_allocator(self): return _magicsquares.RectVector_get_allocator(self)
def begin(self): return _magicsquares.RectVector_begin(self)
def end(self): return _magicsquares.RectVector_end(self)
def rbegin(self): return _magicsquares.RectVector_rbegin(self)
def rend(self): return _magicsquares.RectVector_rend(self)
def pop_back(self): return _magicsquares.RectVector_pop_back(self)
def erase(self, *args): return _magicsquares.RectVector_erase(self, *args)
def __init__(self, *args):
this = _magicsquares.new_RectVector(*args)
try: self.this.append(this)
except: self.this = this
def push_back(self, *args): return _magicsquares.RectVector_push_back(self, *args)
def front(self): return _magicsquares.RectVector_front(self)
def back(self): return _magicsquares.RectVector_back(self)
def assign(self, *args): return _magicsquares.RectVector_assign(self, *args)
def resize(self, *args): return _magicsquares.RectVector_resize(self, *args)
def insert(self, *args): return _magicsquares.RectVector_insert(self, *args)
def reserve(self, *args): return _magicsquares.RectVector_reserve(self, *args)
def capacity(self): return _magicsquares.RectVector_capacity(self)
__swig_destroy__ = _magicsquares.delete_RectVector
__del__ = lambda self : None;
RectVector_swigregister = _magicsquares.RectVector_swigregister
RectVector_swigregister(RectVector)
# This file is compatible with both classic and new-style classes.

BIN
python/magicsquares.pyc Normal file

Binary file not shown.

324
python/samplebrain Executable file
View File

@ -0,0 +1,324 @@
#!/usr/bin/env python
# aggregate sound from mfcc or fft similarity of chunks
import numpy as np
import scipy.io.wavfile
from features import mfcc
from features import logfbank
from features import base
import copy
import os
import platform;
if int(platform.python_version_tuple()[0])>2:
from tkinter import *
from tkinter.filedialog import *
from tkinter.messagebox import *
else:
from Tkinter import *
from tkFileDialog import *
from tkMessageBox import *
source_dir = "../sound/source/"
render_dir = "../sound/render/"
version = "0.0.2"
def msg(msg):
print(msg)
def clear_msg():
pass
def render_stats():
pass
def fadeinout(s,slength,elength):
s = copy.deepcopy(s)
for i in range(0,slength):
m = float(i)/slength;
s[i]*=m
for i in range(0,elength):
m = float(i)/elength;
s[(len(s)-1)-i]*=m
return s
def normalise(s):
m = 0
p = 999999999999999999
for i in range(0,len(s)):
if m<s[i]: m=s[i]
if p>s[i]: p=s[i]
b = max(m,-p)
if b>0:
s/=float(b/10000.0)
return s
def chop(wav,size,overlap,rand,norm):
ret = []
pos = 0
seg = []
samples = wav[1]
while (pos+size<len(samples)):
if norm:
ret.append([0,fadeinout(normalise(samples[pos:pos+size]),500,500)])
else:
ret.append([0,fadeinout(samples[pos:pos+size],500,500)])
pos+=(size-overlap)
return ret
def fftify(chopped):
return map(lambda i: np.fft.fft(i[1]), chopped)
def mfccify(chopped,rate):
ret = []
for sig in chopped:
ret.append(logfbank(sig[1],rate))
return ret
def fftdiff(a,b):
return (abs(a-b)).sum(dtype=np.float128)
def diffify(a,b):
return map(lambda a,b: fftdiff(a,b), a, b)
def search(fft,bank):
closest = 99999999999999999
ret = -1
for i,tfft in enumerate(bank):
dist = fftdiff(fft,tfft)
if dist<closest:
ret = i
closest = dist
msg(str(ret))
return ret
#unit_test()
class transponge():
def __init__(self):
self.src=[]
self.dst=[0,[]]
self.src_chp=[]
self.dst_chp=[]
self.src_fft=[]
self.dst_fft=[]
def set_target(self,dst_filename):
self.dst = scipy.io.wavfile.read(dst_filename)
msg("succesfully loaded "+dst_filename+" as the target")
def add_source(self,src_filename):
ret = [src_filename,scipy.io.wavfile.read(src_filename)]
self.src.append(ret)
msg("succesfully loaded "+src_filename+" as a source")
msg("(now have "+str(len(self.src))+" sounds in brain...)")
return ret
def delete_source(self,src_filename):
self.src = filter(lambda i: i[0]!=src_filename, self.src)
def prepare(self,chp_size,chp_overlap,mfcc,norm):
self.chp_size = chp_size
self.chp_overlap = chp_overlap
self.src_chp=[]
self.dst_chp=[]
self.src_fft=[]
self.dst_fft=[]
msg("chopping up target wav...")
self.dst_chp = chop(self.dst,self.chp_size,self.chp_overlap,0,norm)
msg("number of target blocks: "+str(len(self.dst_chp)))
if mfcc:
self.dst_fft = mfccify(self.dst_chp,self.dst[0])
else:
self.dst_fft = fftify(self.dst_chp)
render_stats()
self.dst_chp = [] # clear
self.dst_size = len(self.dst[1])
for i,src in enumerate(self.src):
name = src[0]
src = src[1]
chopped=chop(src,self.chp_size,self.chp_overlap,0,norm)
msg(name+" has "+str(len(chopped))+" blocks")
self.src_chp+=chopped
if mfcc:
self.src_fft+=mfccify(chopped,src[0])
else:
self.src_fft+=fftify(chopped)
render_stats()
def process(self,filename):
out = np.zeros(self.dst_size,dtype=self.src_chp[0][1].dtype)
pos = 0
for i,seg in enumerate(self.dst_fft):
# collect indices of closest sections
ii = search(seg,self.src_fft)
clear_msg()
for s in range(0,self.chp_size):
if pos+s<self.dst_size:
sample = self.src_chp[ii][1][s]
out[pos+s]=out[pos+s]+(sample*0.25)
pos+=(self.chp_size-self.chp_overlap)
msg(str((i/float(len(self.dst_fft)))*100.0)[:5]+"%")
if i%10==0: scipy.io.wavfile.write(filename,44100,out)
class win:
def __init__(self):
self.sponge = transponge()
# create window
self.root = Tk()
self.root.title("sample brain "+version)
top = Frame(self.root)
top.pack(fill=BOTH)
f=Frame(top)
f.pack(side=LEFT,fill=NONE);
Label(f,text="statistix (last run)").pack()
self.stats_brain_blocks = StringVar()
Label(f,textvariable=self.stats_brain_blocks).pack()
self.stats_brain_blocks.set("brain blocks: not run yet...")
self.stats_target_blocks = StringVar()
Label(f,textvariable=self.stats_target_blocks).pack()
self.stats_target_blocks.set("target blocks: not run yet...")
f=Frame(top)
f.pack(side=LEFT);
Button(f, text="add to brain", command=self.load_source).grid(row=0, column=0, sticky="we")
Button(f, text="set the target", command=self.load_target).grid(row=0, column=1, sticky="we")
Button(f, text="save as", command=self.save_as).grid(row=0, column=2, sticky="we")
self.output_filename = "brain_out.wav"
self.run_button = Button(f, text="run", command=self.run)
self.run_button.grid(row=0, column=3, sticky="we")
self.mfcc_var = IntVar()
cb=Checkbutton(f, text="use mfcc", variable=self.mfcc_var, command=self.mfcc)
cb.grid(row=1, column=0)
cb.select()
self.norm_var = IntVar()
cb=Checkbutton(f, text="normalise", variable=self.norm_var, command=self.norm)
cb.grid(row=1, column=1)
rf = Frame(f)
rf.grid(row=1, column=3)
Label(rf, text="overlap").grid(row=0,column=0)
self.overlap_entry = Entry(rf, width=5)
self.overlap_entry.grid(row=1, column=0)
self.overlap_entry.delete(0, END)
self.overlap_entry.insert(0, "0.75")
rf = Frame(f)
rf.grid(row=1, column=4)
Label(rf, text="window size").grid(row=0,column=0)
self.window_entry = Entry(rf, width=5)
self.window_entry.grid(row=1, column=0)
self.window_entry.delete(0, END)
self.window_entry.insert(0, "3000")
self.target_name = StringVar()
Label(top,textvariable=self.target_name).pack()
self.target_name.set("no target yet...")
Label(top,text="brain contents:").pack()
self.samples=Frame(top)
self.samples.pack(fill=NONE,side=RIGHT);
self.debug = Text(self.root, font = "Helvetica 24 bold", height=10, width=60)
self.debug.pack()
self.debug.insert(END, "ready...\n")
#Label(lf, text="Branch length").grid(row=0,column=0)
#self.length_scale = Scale(lf, from_=0, to=100, orient=HORIZONTAL, command=self.on_length)
#self.length_scale.grid(row=1, column=0)
#self.length_scale.set(30)
#t.grid_rowconfigure(1,weight=0)
# start event loop
def build_sample_gui(self,name,size):
f = Frame(self.samples)
f.pack()
Button(f, text="x", command=lambda: self.delete_sample(name)).pack(side=LEFT)
Label(f, text=os.path.basename(name)+" ("+str(size)[:5]+" secs)").pack(side=RIGHT)
def delete_sample(self,name):
self.sponge.delete_source(name)
msg("deleted "+name+" from my brain")
for widget in self.samples.winfo_children():
widget.destroy()
for i in self.sponge.src:
self.build_sample_gui(i[0],len(i[1][1])/float(i[1][0]))
def msg(self,msg):
self.debug.insert(0.0, msg+"\n")
self.root.update()
def clear_msg(self):
self.debug.delete(0.0, END)
self.root.update()
def load_target(self):
filename = askopenfilename(title = "load target wav")
if filename!="":
self.sponge.set_target(filename)
self.target_name.set("target: "+os.path.basename(filename))
def load_source(self):
filename = askopenfilename(title = "load source wav into brain")
if filename!="":
i=self.sponge.add_source(filename)
self.build_sample_gui(i[0],len(i[1][1])/float(i[1][0]))
def save_as(self):
filename = asksaveasfilename(title = "set the output wav")
if filename!="":
self.output_filename = filename
def render_stats(self):
self.stats_brain_blocks.set("brain blocks: "+str(len(self.sponge.src_chp)))
self.stats_target_blocks.set("target blocks: "+str(len(self.sponge.dst_fft)))
def run(self):
window_size = float(self.window_entry.get())
overlap = float(self.overlap_entry.get())
if self.norm_var.get()==1:
msg("normalising: on")
else:
msg("normalising: off")
self.sponge.prepare(int(window_size),int(window_size*overlap),
self.mfcc_var.get()==1,
self.norm_var.get()==1)
msg("processing...")
self.sponge.process(self.output_filename)
msg("done, saved in "+self.output_filename)
def mfcc(self):
pass
def norm(self):
pass
w = win()
msg = w.msg
clear_msg = w.clear_msg
render_stats = w.render_stats
try:
w.root.mainloop()
except Exception,e:
msg(e)

198
python/transponge-eigen.py Normal file
View File

@ -0,0 +1,198 @@
# wierd eigensound approach - project into learned pca space
# fft -> matrix -> ifft -> sound
from magicsquares import *
import numpy as np
import scipy.io.wavfile
import math
source_dir = "../sound/source/"
render_dir = "../sound/render/"
def fadeinout(s,slength,elength):
for i in range(0,slength):
m = float(i)/slength;
s[i]*=m
for i in range(0,elength):
m = float(i)/elength;
s[(len(s)-1)-i]*=m
return s
def normalise(s):
m = 0
p = 999999999999999999
for i in range(0,len(s)):
if m<s[i]: m=s[i]
if p>s[i]: p=s[i]
b = max(m,-p)
if b>0:
s/=float(b/10000.0)
return s
def arr_to_fv(a):
vp = FloatVector(len(a))
for i in range(0,vp.Size()):
vp[i]=float(a[i])
return vp
def fv_to_arr(a):
ret = np.zeros(a.Size(),dtype=np.float32)
for i in range(0,a.Size()):
ret[i]=a[i]
return ret
def build_pca(filename,s):
pca = PCA(len(s[0]))
for i,v in enumerate(s):
print(i/float(len(s))*100.0)
pca.AddFeature(arr_to_fv(v))
print("making eigenmatrix")
pca.Calculate()
print("saving eigenmatrix")
f = OpenFile(filename, "wb")
pca.Save(f)
CloseFile(f)
return pca
def load_pca(filename):
pca = PCA(1)
f = OpenFile(filename, "rb")
pca.Load(f)
CloseFile(f)
pca.Compress(2,40)
return pca
def synth_sound(s,pca):
p = pca.Project(arr_to_fv(s))
#for i in range(0,p.Size()):
# if i<50 and i>5: p[i]=0
# if i>100: p[i]=0
s = pca.Synth(p)
return s
def project_eigen_sound(pca,row,gain):
return fv_to_arr(pca.GetEigenTransform().GetRowVector(row)*gain)
def fftify(chopped):
return map(lambda i: np.array(np.fft.fft(i),dtype=np.float32), chopped)
def chop(wav,size,overlap):
ret = []
pos = 0
seg = []
samples = wav[1]
while (pos+size<len(samples)):
ret.append(samples[pos:pos+size]) # 50,100
pos+=(size-overlap)
return ret
class transponge():
def __init__(self,chp_size,chp_overlap,dst_filename):
dst = scipy.io.wavfile.read(dst_filename)
self.src_chp=[]
self.src_fft=[]
self.chp_size = chp_size
self.chp_overlap = chp_overlap
self.dst_chp = chop(dst,self.chp_size,self.chp_overlap)
self.dst_fft = fftify(self.dst_chp)
print(self.chp_overlap)
print (len(self.dst_chp))
self.dst_size = len(dst[1])
def add(self,src_filename):
src = scipy.io.wavfile.read(src_filename)
src_chp=chop(src,self.chp_size,self.chp_overlap)
self.src_fft+=fftify(src_chp)
self.src_chp+=src_chp
print("src now: "+str(len(self.src_chp)))
def learn(self,filename):
scipy.io.wavfile.write(render_dir+"input.wav",44100,np.concatenate(self.src_fft))
self.pca = build_pca(filename,self.src_fft)
def load(self,filename):
self.pca = load_pca(filename)
def process(self):
out = np.zeros(self.dst_size,dtype=np.int16)
pos = 0
for i,seg in enumerate(self.dst_fft):
fft = fv_to_arr(synth_sound(seg,self.pca))
resynth = fadeinout(np.fft.ifft(fft),50,50)
print(resynth[55])
for s in range(0,self.chp_size):
if pos+s<self.dst_size:
out[pos+s]+=resynth[s]
pos+=(self.chp_size-self.chp_overlap)
print((i/float(len(self.dst_chp)))*100.0)
if i%10==0: scipy.io.wavfile.write(render_dir+"pca-out-amen-wierd.wav",44100,out)
def process_eigensound(self,row):
print self.pca.GetEigenValues()[0]
out = []
for row in range(0,50):
for i in range(0,10):
#gain = (i/float(100)-0.5)*2
#gain = 1/self.pca.GetEigenValues()[row]
gain =100000
#print("gain:"+str(gain))
n = project_eigen_sound(self.pca,row,gain)
pcm = np.fft.ifft(n).astype(np.float32)
pcm*=3
print(pcm[0])
out.append(pcm)
scipy.io.wavfile.write(render_dir+"eigensound.wav",44100,np.concatenate(out).astype(np.int16))
def run(l):
t = transponge(l,int(l*0.75),source_dir+"pw2.wav")
# t.add(source_dir+"full.wav")
# t.add(source_dir+"808.wav")
# t.add(source_dir+"amen_brother.wav")
# t.add(source_dir+"sailingbybit.wav")
# t.add(source_dir+"dreambit.wav")
#t.add(source_dir+"pw2.wav")
#print("learning")
#t.learn("amen-fft.pca")
#print("loading")
#t.load("sailingby-fft.pca")
# t.load("acid-fft.pca")
#t.load("full-fft.pca")
# t.load("808-fft.pca")
t.load("amen-fft.pca")
#t.process_eigensound(3)
# print("processing")
t.process()
run(512)
'''import matplotlib.pyplot as plt
sound = scipy.io.wavfile.read(source_dir+"808.wav")[1]
fourier = np.fft.fft(sound)
n = len(sound)
timestep = 0.01
freq = np.fft.fftfreq(n, d=timestep)
freq = freq.astype(np.float32)
for i in range(0,1000):
print(freq[i])
scipy.io.wavfile.write(render_dir+"fft.wav", 44100, freq)
'''

261
python/transponge-gui.py Normal file
View File

@ -0,0 +1,261 @@
# aggregate sound from mfcc or fft similarity of chunks
import numpy as np
import scipy.io.wavfile
from features import mfcc
from features import logfbank
from features import base
import copy
import platform;
if int(platform.python_version_tuple()[0])>2:
from tkinter import *
from tkinter.filedialog import *
from tkinter.messagebox import *
else:
from Tkinter import *
from tkFileDialog import *
from tkMessageBox import *
source_dir = "../sound/source/"
render_dir = "../sound/render/"
version = "0.0.1"
def msg(msg):
print(msg)
def clear_msg():
pass
def fadeinout(s,slength,elength):
s = copy.deepcopy(s)
for i in range(0,slength):
m = float(i)/slength;
s[i]*=m
for i in range(0,elength):
m = float(i)/elength;
s[(len(s)-1)-i]*=m
return s
def normalise(s):
m = 0
p = 999999999999999999
for i in range(0,len(s)):
if m<s[i]: m=s[i]
if p>s[i]: p=s[i]
b = max(m,-p)
if b>0:
s/=float(b/10000.0)
return s
def chop(wav,size,overlap,rand):
ret = []
pos = 0
seg = []
samples = wav[1]
while (pos+size<len(samples)):
ret.append([0,fadeinout(samples[pos:pos+size],500,500)])
pos+=(size-overlap)
return ret
def fftify(chopped):
return map(lambda i: np.fft.fft(i[1]), chopped)
def mfccify(chopped,rate):
ret = []
for sig in chopped:
ret.append(logfbank(sig[1],rate))
return ret
def fftdiff(a,b):
return (abs(a-b)).sum(dtype=np.float128)
def diffify(a,b):
return map(lambda a,b: fftdiff(a,b), a, b)
def search(fft,bank):
closest = 99999999999999999
ret = -1
for i,tfft in enumerate(bank):
dist = fftdiff(fft,tfft)
if dist<closest:
ret = i
closest = dist
msg(str(ret))
return ret
#unit_test()
class transponge():
def __init__(self):
self.src=[]
self.dst=[0,[]]
self.src_chp=[]
self.dst_chp=[]
self.src_fft=[]
self.dst_fft=[]
def set_target(self,dst_filename):
self.dst = scipy.io.wavfile.read(dst_filename)
msg("succesfully loaded "+dst_filename+" as the target")
def add_source(self,src_filename):
self.src.append(scipy.io.wavfile.read(src_filename))
msg("succesfully loaded "+src_filename+" as a source")
msg("(now have "+str(len(self.src))+" sounds in brain...)")
def prepare(self,chp_size,chp_overlap,mfcc):
self.chp_size = chp_size
self.chp_overlap = chp_overlap
msg("chopping up target wav...")
self.dst_chp = chop(self.dst,self.chp_size,self.chp_overlap,0)
msg("number of target blocks: "+str(len(self.dst_chp)))
msg("processing target blocks")
if mfcc:
self.dst_fft = mfccify(self.dst_chp,self.dst[0])
else:
self.dst_fft = fftify(self.dst_chp)
self.dst_chp = [] # clear
self.dst_size = len(self.dst[1])
for i,src in enumerate(self.src):
msg("chopping up source wav no. "+str(i+1))
chopped=chop(src,self.chp_size,self.chp_overlap,0)
msg("number of target blocks: "+str(len(chopped)))
self.src_chp+=chopped
if mfcc:
self.src_fft+=mfccify(chopped,src[0])
else:
self.src_fft+=fftify(chopped)
msg("total number of source blocks now: "+str(len(self.dst_fft)))
def process(self,filename):
out = np.zeros(self.dst_size,dtype=self.src_chp[0][1].dtype)
pos = 0
for i,seg in enumerate(self.dst_fft):
# collect indices of closest sections
ii = search(seg,self.src_fft)
clear_msg()
for s in range(0,self.chp_size):
if pos+s<self.dst_size:
sample = self.src_chp[ii][1][s]
out[pos+s]=out[pos+s]+(sample*0.25)
pos+=(self.chp_size-self.chp_overlap)
msg(str((i/float(len(self.dst_fft)))*100.0)[:5]+"%")
if i%10==0: scipy.io.wavfile.write(render_dir+filename,44100,out)
class win:
def __init__(self):
self.sponge = transponge()
# create window
self.root = Tk()
self.root.title("transpongerator brain "+version)
f=Frame(self.root)
f.pack(fill=NONE);
Button(f, text="load a source", command=self.load_source).grid(row=0, column=0, sticky="we")
Button(f, text="load the target", command=self.load_target).grid(row=0, column=1, sticky="we")
self.run_button = Button(f, text="run", command=self.run)
self.run_button.grid(row=0, column=2, sticky="we")
self.mfcc_var = IntVar()
cb=Checkbutton(f, text="use mfcc", variable=self.mfcc_var, command=self.mfcc)
cb.grid(row=1, column=0)
cb.select()
rf = Frame(f)
rf.grid(row=1, column=3)
Label(rf, text="overlap").grid(row=0,column=0)
self.overlap_entry = Entry(rf, width=5)
self.overlap_entry.grid(row=1, column=0)
self.overlap_entry.bind("<Return>", self.on_overlap)
self.overlap_entry.delete(0, END)
self.overlap_entry.insert(0, "0.75")
self.overlap=0.75
rf = Frame(f)
rf.grid(row=1, column=4)
Label(rf, text="window size").grid(row=0,column=0)
self.window_entry = Entry(rf, width=5)
self.window_entry.grid(row=1, column=0)
self.window_entry.bind("<Return>", self.on_window_size)
self.window_entry.delete(0, END)
self.window_entry.insert(0, "3000")
self.window_size=3000
self.debug = Text(self.root, font = "Helvetica 24 bold", height=10, width=60)
self.debug.pack()
self.debug.insert(END, "ready...\n")
#Label(lf, text="Branch length").grid(row=0,column=0)
#self.length_scale = Scale(lf, from_=0, to=100, orient=HORIZONTAL, command=self.on_length)
#self.length_scale.grid(row=1, column=0)
#self.length_scale.set(30)
#t.grid_rowconfigure(1,weight=0)
# start event loop
def msg(self,msg):
self.debug.insert(0.0, msg+"\n")
self.root.update()
def clear_msg(self):
self.debug.delete(0.0, END)
self.root.update()
def load_target(self):
filename = askopenfilename(title = "load target wav")
if filename!="":
self.sponge.set_target(filename)
def load_source(self):
filename = askopenfilename(title = "load source wav into brain")
if filename!="":
self.sponge.add_source(filename)
def on_window_size(self,event):
try:
self.window_size = float(self.window_entry.get())
except:
pass
def on_overlap(self,event):
try:
self.overlap = float(self.overlap_entry.get())
except:
pass
def run(self):
self.msg("preparing: windowsize: "+str(self.window_size))
self.msg("overlap: "+str(self.overlap))
self.sponge.prepare(int(self.window_size),int(self.window_size*self.overlap),self.mfcc_var.get()==1)
msg("processing...")
self.sponge.process("brain_out.wav")
msg("done, saved in brain_out.wav")
def mfcc(self):
pass
w = win()
msg = w.msg
clear_msg = w.clear_msg
try:
w.root.mainloop()
except Exception,e:
msg(e)

133
python/transponge-mfcc.py Normal file
View File

@ -0,0 +1,133 @@
# aggregate sound from mfcc or fft similarity of chunks
import numpy as np
import scipy.io.wavfile
from features import mfcc
from features import logfbank
from features import base
import copy
source_dir = "../sound/source/"
render_dir = "../sound/render/"
def fadeinout(s,slength,elength):
s = copy.deepcopy(s)
for i in range(0,slength):
m = float(i)/slength;
s[i]*=m
for i in range(0,elength):
m = float(i)/elength;
s[(len(s)-1)-i]*=m
return s
def normalise(s):
m = 0
p = 999999999999999999
for i in range(0,len(s)):
if m<s[i]: m=s[i]
if p>s[i]: p=s[i]
b = max(m,-p)
if b>0:
s/=float(b/10000.0)
return s
def chop(wav,size,overlap,rand):
ret = []
pos = 0
seg = []
samples = wav[1]
while (pos+size<len(samples)):
ret.append([0,fadeinout(samples[pos:pos+size],500,500)])
pos+=(size-overlap)
return ret
def fftify(chopped):
return map(lambda i: np.fft.fft(i[1]), chopped)
def mfccify(chopped,rate):
ret = []
for sig in chopped:
ret.append(logfbank(sig[1],rate))
return ret
def fftdiff(a,b):
return (abs(a-b)).sum(dtype=np.float128)
def diffify(a,b):
return map(lambda a,b: fftdiff(a,b), a, b)
def search(fft,bank):
closest = 99999999999999999
ret = -1
for i,tfft in enumerate(bank):
dist = fftdiff(fft,tfft)
if dist<closest:
ret = i
closest = dist
print(ret)
return ret
def unit_test():
print(fftdiff(np.array([0,0,0,0]),np.array([1,1,1,1])))
#assert(fftdiff(np.array([0,0,0,0]),np.array([1,1,1,1]))==1)
print(fftdiff(np.array([-100,-1000,0,0]),np.array([-1,-1,-1,-1])))
print(fadeinout(np.array([10,10,10,10,10,10,10]),3))
#unit_test()
class transponge():
def __init__(self,chp_size,chp_overlap,dst_filename):
dst = scipy.io.wavfile.read(dst_filename)
self.src_chp=[]
self.src_fft=[]
self.chp_size = chp_size
self.chp_overlap = chp_overlap
self.dst_chp = chop(dst,self.chp_size,self.chp_overlap,0)
print("number of target blocks: "+str(len(self.dst_chp)))
self.dst_fft = mfccify(self.dst_chp,dst[0])
#self.dst_fft = fftify(self.dst_chp)
self.dst_chp = [] # clear
self.dst_size = len(dst[1])
def add(self,src_filename):
src = scipy.io.wavfile.read(src_filename)
print("adding "+src_filename)
src_chp=chop(src,self.chp_size,self.chp_overlap,0)
self.src_chp+=src_chp
self.src_fft+=mfccify(src_chp,src[0])
print("number of source blocks now: "+str(len(self.dst_fft)))
#self.src_fft+=fftify(src_chp)
def process(self):
out = np.zeros(self.dst_size,dtype=self.src_chp[0][1].dtype)
pos = 0
for i,seg in enumerate(self.dst_fft):
# collect indices of closest sections
ii = search(seg,self.src_fft)
for s in range(0,self.chp_size):
if pos+s<self.dst_size:
sample = self.src_chp[ii][1][s]
out[pos+s]=out[pos+s]+(sample*0.25)
pos+=(self.chp_size-self.chp_overlap)
print((i/float(len(self.dst_fft)))*100.0)
if i%10==0: scipy.io.wavfile.write(render_dir+"pwr-acid-mfccnonorm1500.wav",44100,out)
def run(l):
t = transponge(l,int(l*0.75),source_dir+"pw-right.wav")
t.add(source_dir+"totalsine.wav")
# t.add(source_dir+"water.wav")
# t.add(source_dir+"cumbia.wav")
# t.add(source_dir+"pista07.wav")
# t.add(source_dir+"sailingbybit.wav")
t.add(source_dir+"808.wav")
t.add(source_dir+"joey.wav")
# t.add("full.wav")
print("processing")
t.process()
run(1500)

118
python/transponge.py Normal file
View File

@ -0,0 +1,118 @@
import numpy as np
import scipy.io.wavfile
def fadeinout(s,slength,elength):
for i in range(0,slength):
m = float(i)/slength;
s[i]*=m
for i in range(0,elength):
m = float(i)/elength;
s[(len(s)-1)-i]*=m
return s
def normalise(s):
m = 0
for i in range(0,len(s)):
if m<s[i]: m=s[i]
if m>0:
s/=float(m/10000.0)
return s
def chop(wav,size,overlap):
ret = []
pos = 0
seg = []
samples = wav[1]
while (pos+size<len(samples)):
ret.append(fadeinout(normalise(samples[pos:pos+size]),30,100))
pos+=(size-overlap)
return ret
def fftify(chopped):
return map(lambda i: np.fft.fft(i), chopped)
def fftdiff(a,b):
return (abs(a-b)).sum(dtype=np.float128)
def diffify(a,b):
return map(lambda a,b: fftdiff(a,b), a, b)
def search(fft,bank):
closest = 99999999999999999
ret = -1
for i,tfft in enumerate(bank):
dist = fftdiff(fft,tfft)
if dist<closest:
ret = i
closest = dist
print(ret)
return ret
def unit_test():
print(fftdiff(np.array([0,0,0,0]),np.array([1,1,1,1])))
#assert(fftdiff(np.array([0,0,0,0]),np.array([1,1,1,1]))==1)
print(fftdiff(np.array([-100,-1000,0,0]),np.array([-1,-1,-1,-1])))
print(fadeinout(np.array([10,10,10,10,10,10,10]),3))
#unit_test()
class transponge():
def __init__(self,chp_size,chp_overlap,dst_filename):
dst = scipy.io.wavfile.read(dst_filename)
self.src_chp=[]
self.src_fft=[]
self.chp_size = chp_size
self.chp_overlap = chp_overlap
self.dst_chp = chop(dst,self.chp_size,self.chp_overlap)
print(self.chp_overlap)
print (len(self.dst_chp))
self.dst_fft = fftify(self.dst_chp)
self.dst_chp = [] # clear
self.dst_size = len(dst[1])
def add(self,src_filename):
src = scipy.io.wavfile.read(src_filename)
src_chp=chop(src,self.chp_size,self.chp_overlap)
self.src_chp+=src_chp
self.src_fft+=fftify(src_chp)
def process(self):
out = np.zeros(self.dst_size,dtype=self.src_chp[0].dtype)
pos = 0
for i,seg in enumerate(self.dst_fft):
# collect indices of closest sections
ii = search(seg,self.src_fft)
for s in range(0,self.chp_size):
if pos+s<self.dst_size:
out[pos+s]+=self.src_chp[ii][s]*0.5
pos+=(self.chp_size-self.chp_overlap)
print((i/float(len(self.dst_fft)))*100.0)
if i%10==0: scipy.io.wavfile.write("outl.wav",44100,out)
def render(self):
t = []
ret = np.zeros(self.dst_size,dtype=self.src_chp[0].dtype)
pos = 0
for i in self.indices:
#t.append(self.src_chp[i])
#print(pos)
for s in range(0,self.chp_size):
if pos+s<self.dst_size:
ret[pos+s]+=self.src_chp[i][s]*0.5
pos+=(self.chp_size-self.chp_overlap)
return ret
#return np.concatenate(t)
def run(l):
t = transponge(l,int(l*0.75),"pw-left.wav")
t.add("water.wav")
t.add("cumbia.wav")
t.add("pista07.wav")
t.add("sailingbybit.wav")
# t.add("full.wav")
print("processing")
t.process()
run(3000)