mirror of
https://gitlab.com/then-try-this/samplebrain.git
synced 2025-05-09 17:17:20 +00:00
removed some old code
This commit is contained in:
parent
eec610c06b
commit
1a0546ed53
@ -1 +0,0 @@
|
||||
from .base import *
|
Binary file not shown.
@ -1,173 +0,0 @@
|
||||
# calculate filterbank features. Provides e.g. fbank and mfcc features for use in ASR applications
|
||||
# Author: James Lyons 2012
|
||||
import numpy
|
||||
from features import sigproc
|
||||
from scipy.fftpack import dct
|
||||
|
||||
# make it python3.x compatible
|
||||
try:
|
||||
xrange(1)
|
||||
except:
|
||||
xrange=range
|
||||
|
||||
def mfcc(signal,samplerate=16000,winlen=0.025,winstep=0.01,numcep=13,
|
||||
nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97,ceplifter=22,appendEnergy=True):
|
||||
"""Compute MFCC features from an audio signal.
|
||||
|
||||
:param signal: the audio signal from which to compute features. Should be an N*1 array
|
||||
:param samplerate: the samplerate of the signal we are working with.
|
||||
:param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
|
||||
:param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
|
||||
:param numcep: the number of cepstrum to return, default 13
|
||||
:param nfilt: the number of filters in the filterbank, default 26.
|
||||
:param nfft: the FFT size. Default is 512.
|
||||
:param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
|
||||
:param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
|
||||
:param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
|
||||
:param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22.
|
||||
:param appendEnergy: if this is true, the zeroth cepstral coefficient is replaced with the log of the total frame energy.
|
||||
:returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector.
|
||||
"""
|
||||
feat,energy = fbank(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,preemph)
|
||||
feat = numpy.log(feat)
|
||||
feat = dct(feat, type=2, axis=1, norm='ortho')[:,:numcep]
|
||||
feat = lifter(feat,ceplifter)
|
||||
if appendEnergy: feat[:,0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy
|
||||
return feat
|
||||
|
||||
def fbank(signal,samplerate=16000,winlen=0.025,winstep=0.01,
|
||||
nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97):
|
||||
"""Compute Mel-filterbank energy features from an audio signal.
|
||||
|
||||
:param signal: the audio signal from which to compute features. Should be an N*1 array
|
||||
:param samplerate: the samplerate of the signal we are working with.
|
||||
:param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
|
||||
:param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
|
||||
:param nfilt: the number of filters in the filterbank, default 26.
|
||||
:param nfft: the FFT size. Default is 512.
|
||||
:param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
|
||||
:param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
|
||||
:param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
|
||||
:returns: 2 values. The first is a numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector. The
|
||||
second return value is the energy in each frame (total energy, unwindowed)
|
||||
"""
|
||||
highfreq= highfreq or samplerate/2
|
||||
signal = sigproc.preemphasis(signal,preemph)
|
||||
frames = sigproc.framesig(signal, winlen*samplerate, winstep*samplerate)
|
||||
pspec = sigproc.powspec(frames,nfft)
|
||||
energy = numpy.sum(pspec,1) # this stores the total energy in each frame
|
||||
energy = numpy.where(energy == 0,numpy.finfo(float).eps,energy) # if energy is zero, we get problems with log
|
||||
|
||||
fb = get_filterbanks(nfilt,nfft,samplerate,lowfreq,highfreq)
|
||||
feat = numpy.dot(pspec,fb.T) # compute the filterbank energies
|
||||
feat = numpy.where(feat == 0,numpy.finfo(float).eps,feat) # if feat is zero, we get problems with log
|
||||
|
||||
return feat,energy
|
||||
|
||||
def logfbank(signal,samplerate=16000,winlen=0.025,winstep=0.01,
|
||||
nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97):
|
||||
"""Compute log Mel-filterbank energy features from an audio signal.
|
||||
|
||||
:param signal: the audio signal from which to compute features. Should be an N*1 array
|
||||
:param samplerate: the samplerate of the signal we are working with.
|
||||
:param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
|
||||
:param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
|
||||
:param nfilt: the number of filters in the filterbank, default 26.
|
||||
:param nfft: the FFT size. Default is 512.
|
||||
:param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
|
||||
:param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
|
||||
:param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
|
||||
:returns: A numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector.
|
||||
"""
|
||||
feat,energy = fbank(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,preemph)
|
||||
return numpy.log(feat)
|
||||
|
||||
def ssc(signal,samplerate=16000,winlen=0.025,winstep=0.01,
|
||||
nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97):
|
||||
"""Compute Spectral Subband Centroid features from an audio signal.
|
||||
|
||||
:param signal: the audio signal from which to compute features. Should be an N*1 array
|
||||
:param samplerate: the samplerate of the signal we are working with.
|
||||
:param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
|
||||
:param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
|
||||
:param nfilt: the number of filters in the filterbank, default 26.
|
||||
:param nfft: the FFT size. Default is 512.
|
||||
:param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
|
||||
:param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
|
||||
:param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
|
||||
:returns: A numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector.
|
||||
"""
|
||||
highfreq= highfreq or samplerate/2
|
||||
signal = sigproc.preemphasis(signal,preemph)
|
||||
frames = sigproc.framesig(signal, winlen*samplerate, winstep*samplerate)
|
||||
pspec = sigproc.powspec(frames,nfft)
|
||||
pspec = numpy.where(pspec == 0,numpy.finfo(float).eps,pspec) # if things are all zeros we get problems
|
||||
|
||||
fb = get_filterbanks(nfilt,nfft,samplerate,lowfreq,highfreq)
|
||||
feat = numpy.dot(pspec,fb.T) # compute the filterbank energies
|
||||
R = numpy.tile(numpy.linspace(1,samplerate/2,numpy.size(pspec,1)),(numpy.size(pspec,0),1))
|
||||
|
||||
return numpy.dot(pspec*R,fb.T) / feat
|
||||
|
||||
def hz2mel(hz):
|
||||
"""Convert a value in Hertz to Mels
|
||||
|
||||
:param hz: a value in Hz. This can also be a numpy array, conversion proceeds element-wise.
|
||||
:returns: a value in Mels. If an array was passed in, an identical sized array is returned.
|
||||
"""
|
||||
return 2595 * numpy.log10(1+hz/700.0)
|
||||
|
||||
def mel2hz(mel):
|
||||
"""Convert a value in Mels to Hertz
|
||||
|
||||
:param mel: a value in Mels. This can also be a numpy array, conversion proceeds element-wise.
|
||||
:returns: a value in Hertz. If an array was passed in, an identical sized array is returned.
|
||||
"""
|
||||
return 700*(10**(mel/2595.0)-1)
|
||||
|
||||
def get_filterbanks(nfilt=20,nfft=512,samplerate=16000,lowfreq=0,highfreq=None):
|
||||
"""Compute a Mel-filterbank. The filters are stored in the rows, the columns correspond
|
||||
to fft bins. The filters are returned as an array of size nfilt * (nfft/2 + 1)
|
||||
|
||||
:param nfilt: the number of filters in the filterbank, default 20.
|
||||
:param nfft: the FFT size. Default is 512.
|
||||
:param samplerate: the samplerate of the signal we are working with. Affects mel spacing.
|
||||
:param lowfreq: lowest band edge of mel filters, default 0 Hz
|
||||
:param highfreq: highest band edge of mel filters, default samplerate/2
|
||||
:returns: A numpy array of size nfilt * (nfft/2 + 1) containing filterbank. Each row holds 1 filter.
|
||||
"""
|
||||
highfreq= highfreq or samplerate/2
|
||||
assert highfreq <= samplerate/2, "highfreq is greater than samplerate/2"
|
||||
|
||||
# compute points evenly spaced in mels
|
||||
lowmel = hz2mel(lowfreq)
|
||||
highmel = hz2mel(highfreq)
|
||||
melpoints = numpy.linspace(lowmel,highmel,nfilt+2)
|
||||
# our points are in Hz, but we use fft bins, so we have to convert
|
||||
# from Hz to fft bin number
|
||||
bin = numpy.floor((nfft+1)*mel2hz(melpoints)/samplerate)
|
||||
|
||||
fbank = numpy.zeros([nfilt,nfft/2+1])
|
||||
for j in xrange(0,nfilt):
|
||||
for i in xrange(int(bin[j]),int(bin[j+1])):
|
||||
fbank[j,i] = (i - bin[j])/(bin[j+1]-bin[j])
|
||||
for i in xrange(int(bin[j+1]),int(bin[j+2])):
|
||||
fbank[j,i] = (bin[j+2]-i)/(bin[j+2]-bin[j+1])
|
||||
return fbank
|
||||
|
||||
def lifter(cepstra,L=22):
|
||||
"""Apply a cepstral lifter the the matrix of cepstra. This has the effect of increasing the
|
||||
magnitude of the high frequency DCT coeffs.
|
||||
|
||||
:param cepstra: the matrix of mel-cepstra, will be numframes * numcep in size.
|
||||
:param L: the liftering coefficient to use. Default is 22. L <= 0 disables lifter.
|
||||
"""
|
||||
if L > 0:
|
||||
nframes,ncoeff = numpy.shape(cepstra)
|
||||
n = numpy.arange(ncoeff)
|
||||
lift = 1+ (L/2)*numpy.sin(numpy.pi*n/L)
|
||||
return lift*cepstra
|
||||
else:
|
||||
# values of L <= 0, do nothing
|
||||
return cepstra
|
||||
|
Binary file not shown.
@ -1,113 +0,0 @@
|
||||
# This file includes routines for basic signal processing including framing and computing power spectra.
|
||||
# Author: James Lyons 2012
|
||||
|
||||
import numpy
|
||||
import math
|
||||
|
||||
def framesig(sig,frame_len,frame_step,winfunc=lambda x:numpy.ones((1,x))):
|
||||
"""Frame a signal into overlapping frames.
|
||||
|
||||
:param sig: the audio signal to frame.
|
||||
:param frame_len: length of each frame measured in samples.
|
||||
:param frame_step: number of samples after the start of the previous frame that the next frame should begin.
|
||||
:param winfunc: the analysis window to apply to each frame. By default no window is applied.
|
||||
:returns: an array of frames. Size is NUMFRAMES by frame_len.
|
||||
"""
|
||||
slen = len(sig)
|
||||
frame_len = int(round(frame_len))
|
||||
frame_step = int(round(frame_step))
|
||||
if slen <= frame_len:
|
||||
numframes = 1
|
||||
else:
|
||||
numframes = 1 + int(math.ceil((1.0*slen - frame_len)/frame_step))
|
||||
|
||||
padlen = int((numframes-1)*frame_step + frame_len)
|
||||
|
||||
zeros = numpy.zeros((padlen - slen,))
|
||||
padsignal = numpy.concatenate((sig,zeros))
|
||||
|
||||
indices = numpy.tile(numpy.arange(0,frame_len),(numframes,1)) + numpy.tile(numpy.arange(0,numframes*frame_step,frame_step),(frame_len,1)).T
|
||||
indices = numpy.array(indices,dtype=numpy.int32)
|
||||
frames = padsignal[indices]
|
||||
win = numpy.tile(winfunc(frame_len),(numframes,1))
|
||||
return frames*win
|
||||
|
||||
|
||||
def deframesig(frames,siglen,frame_len,frame_step,winfunc=lambda x:numpy.ones((1,x))):
|
||||
"""Does overlap-add procedure to undo the action of framesig.
|
||||
|
||||
:param frames: the array of frames.
|
||||
:param siglen: the length of the desired signal, use 0 if unknown. Output will be truncated to siglen samples.
|
||||
:param frame_len: length of each frame measured in samples.
|
||||
:param frame_step: number of samples after the start of the previous frame that the next frame should begin.
|
||||
:param winfunc: the analysis window to apply to each frame. By default no window is applied.
|
||||
:returns: a 1-D signal.
|
||||
"""
|
||||
frame_len = round(frame_len)
|
||||
frame_step = round(frame_step)
|
||||
numframes = numpy.shape(frames)[0]
|
||||
assert numpy.shape(frames)[1] == frame_len, '"frames" matrix is wrong size, 2nd dim is not equal to frame_len'
|
||||
|
||||
indices = numpy.tile(numpy.arange(0,frame_len),(numframes,1)) + numpy.tile(numpy.arange(0,numframes*frame_step,frame_step),(frame_len,1)).T
|
||||
indices = numpy.array(indices,dtype=numpy.int32)
|
||||
padlen = (numframes-1)*frame_step + frame_len
|
||||
|
||||
if siglen <= 0: siglen = padlen
|
||||
|
||||
rec_signal = numpy.zeros((1,padlen))
|
||||
window_correction = numpy.zeros((1,padlen))
|
||||
win = winfunc(frame_len)
|
||||
|
||||
for i in range(0,numframes):
|
||||
window_correction[indices[i,:]] = window_correction[indices[i,:]] + win + 1e-15 #add a little bit so it is never zero
|
||||
rec_signal[indices[i,:]] = rec_signal[indices[i,:]] + frames[i,:]
|
||||
|
||||
rec_signal = rec_signal/window_correction
|
||||
return rec_signal[0:siglen]
|
||||
|
||||
def magspec(frames,NFFT):
|
||||
"""Compute the magnitude spectrum of each frame in frames. If frames is an NxD matrix, output will be NxNFFT.
|
||||
|
||||
:param frames: the array of frames. Each row is a frame.
|
||||
:param NFFT: the FFT length to use. If NFFT > frame_len, the frames are zero-padded.
|
||||
:returns: If frames is an NxD matrix, output will be NxNFFT. Each row will be the magnitude spectrum of the corresponding frame.
|
||||
"""
|
||||
complex_spec = numpy.fft.rfft(frames,NFFT)
|
||||
return numpy.absolute(complex_spec)
|
||||
|
||||
def powspec(frames,NFFT):
|
||||
"""Compute the power spectrum of each frame in frames. If frames is an NxD matrix, output will be NxNFFT.
|
||||
|
||||
:param frames: the array of frames. Each row is a frame.
|
||||
:param NFFT: the FFT length to use. If NFFT > frame_len, the frames are zero-padded.
|
||||
:returns: If frames is an NxD matrix, output will be NxNFFT. Each row will be the power spectrum of the corresponding frame.
|
||||
"""
|
||||
return 1.0/NFFT * numpy.square(magspec(frames,NFFT))
|
||||
|
||||
def logpowspec(frames,NFFT,norm=1):
|
||||
"""Compute the log power spectrum of each frame in frames. If frames is an NxD matrix, output will be NxNFFT.
|
||||
|
||||
:param frames: the array of frames. Each row is a frame.
|
||||
:param NFFT: the FFT length to use. If NFFT > frame_len, the frames are zero-padded.
|
||||
:param norm: If norm=1, the log power spectrum is normalised so that the max value (across all frames) is 1.
|
||||
:returns: If frames is an NxD matrix, output will be NxNFFT. Each row will be the log power spectrum of the corresponding frame.
|
||||
"""
|
||||
ps = powspec(frames,NFFT);
|
||||
ps[ps<=1e-30] = 1e-30
|
||||
lps = 10*numpy.log10(ps)
|
||||
if norm:
|
||||
return lps - numpy.max(lps)
|
||||
else:
|
||||
return lps
|
||||
|
||||
def preemphasis(signal,coeff=0.95):
|
||||
"""perform preemphasis on the input signal.
|
||||
|
||||
:param signal: The signal to filter.
|
||||
:param coeff: The preemphasis coefficient. 0 is no filter, default is 0.95.
|
||||
:returns: the filtered signal.
|
||||
"""
|
||||
return numpy.append(signal[0],signal[1:]-coeff*signal[:-1])
|
||||
|
||||
|
||||
|
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user