Using synthetic audio signal generation process a driver for examining perfromance. Starting with initial slow implementation, we update the method to improve pefromance while keeping the same output.
# Load Extensions
# cpu line profiler
%load_ext line_profiler
# memory profiler
%load_ext memory_profiler
# Cython support
%load_ext Cython
# Reload modules before executing user code
%load_ext autoreload
%autoreload 2
# setup backend for matplotlibs plots
%matplotlib inline
# Imports
import pandas as pd
import numpy as np
import sympy as sp
import matplotlib.pyplot as plt
import math
import random
# set random seed for reproducibility
np.random.seed(1)
random.seed(1)
# time in seconds
sample_time = 60*60
# samples in a second
samples_per_second = 1000
# Quick data view
def graph(data,seconds):
fig,axes=plt.subplots()
plt.plot(data[0], data[1])
plt.xlim(0, seconds)
plt.ylim(-3, 3)
plt.xlabel('time')
plt.ylabel('signal')
plt.grid(True)
plt.show()
# Really slow generation of synthetic data
def reallySlowGenerateTimeSeriesData(seconds,samples_per_second):
"""Generate synthetic data"""
time = []
signal = []
# generate signal
sample_time = 0
for s in range(seconds):
for sps in range(samples_per_second):
sample_time += 1/samples_per_second
noise = random.random()
scaled_noise = -1 + (noise * 2)
sample = math.sin(2*math.pi*10*sample_time) + scaled_noise
time.append(sample_time)
signal.append(sample)
# return time and signal
return [time,signal]
%lprun -f reallySlowGenerateTimeSeriesData data1 = reallySlowGenerateTimeSeriesData(sample_time,samples_per_second)
graph(data1,10)
graph(data1,1)
# Try removing one of the for loops
def slightlyFasterGenerateTimeSeriesData(seconds,samples_per_second):
"""Generate synthetic data"""
# generate time
time = np.arange(0,seconds,1/samples_per_second)
# generate signal
signal = []
for t in time:
noise = random.random()
scaled_noise = -1 + (noise * 2)
sample = math.sin(2*math.pi*10*t) + scaled_noise
signal.append(sample)
# return time and signal
return [time,signal]
%lprun -f slightlyFasterGenerateTimeSeriesData data2 = slightlyFasterGenerateTimeSeriesData(sample_time,samples_per_second)
graph(data2,10)
graph(data2,1)
# replace python math lib with numpy, process the whole array of data instead of a row by row
def reallyFastGenerateTimeSeriesData(seconds,samples_per_second):
"""Generate synthetic data"""
# generate time
time = np.arange(0,seconds,1/samples_per_second)
# generate signal
noise = -2 * np.random.random(len(time)) + 1
signal = np.sin(2*np.pi*10*time) + noise
# return time and signal
return [time,signal]
%lprun -f reallyFastGenerateTimeSeriesData data3 = reallyFastGenerateTimeSeriesData(sample_time,samples_per_second)
graph(data3,10)
graph(data3,1)
import math
import random
def pandasReallySlowGenerateTimeSeriesData(seconds,samples_per_second):
"""Generate synthetic data"""
# generate time
time = np.arange(0,seconds,1/samples_per_second)
# create pandas
df = pd.DataFrame(data=time, columns=['time'])
def generateSignal(t):
noise = random.random()
scaled_noise = -1 + (noise * 2)
return math.sin(2*math.pi*10*t) + scaled_noise
# generate signal
df['signal'] = df['time'].apply(lambda t: generateSignal(t))
# return time and signal
return [df['time'],df['signal']]
%lprun -f pandasReallySlowGenerateTimeSeriesData data4 = pandasReallySlowGenerateTimeSeriesData(sample_time,samples_per_second)
graph(data4,10)
graph(data4,1)
def pandasFasterSlowGenerateTimeSeriesData(seconds,samples_per_second):
"""Generate synthetic data"""
# generate time
time = np.arange(0,seconds,1/samples_per_second)
# create pandas
df = pd.DataFrame(data=time, columns=['time'])
def generateSignal(t):
noise = -2 * np.random.random(len(t)) + 1
return np.sin(2*np.pi*10*t) + noise
# generate signal
df['signal'] = generateSignal(df['time'])
# return time and signal
return [df['time'],df['signal']]
%lprun -f pandasFasterSlowGenerateTimeSeriesData data5 = pandasFasterSlowGenerateTimeSeriesData(sample_time,samples_per_second)
graph(data5,10)
graph(data5,1)
def pandasNumpyFastSlowGenerateTimeSeriesData(seconds,samples_per_second):
"""Generate synthetic data"""
# generate time
time = np.arange(0,seconds,1/samples_per_second)
# create pandas
df = pd.DataFrame(data=time, columns=['time'])
def generateSignal(t):
noise = -2 * np.random.random(len(t)) + 1
return np.sin(2*np.pi*10*t) + noise
# generate signal
df['signal'] = generateSignal(df['time'].values)
# return time and signal
return [df['time'],df['signal']]
%lprun -f pandasNumpyFastSlowGenerateTimeSeriesData data6 = pandasNumpyFastSlowGenerateTimeSeriesData(sample_time,samples_per_second)
graph(data6,10)
graph(data6,1)
# Sanity check
print("Data1[0]: {}, Data1[1]: {}".format(len(data1[0]),len(data1[1])))
print("Data2[0]: {}, Data2[1]: {}".format(len(data2[0]),len(data2[1])))
print("Data3[0]: {}, Data3[1]: {}".format(len(data3[0]),len(data3[1])))
print("Data4[0]: {}, Data4[1]: {}".format(len(data4[0]),len(data4[1])))
print("Data5[0]: {}, Data5[1]: {}".format(len(data5[0]),len(data5[1])))
print("Data6[0]: {}, Data6[1]: {}".format(len(data6[0]),len(data6[1])))
# Generate synthetic signal - TODO: improve random
def generateSignal(time):
"""Generate synthetic data"""
noise = -2 * np.random.random(len(time)) + 1
return np.sin(2*np.pi*10*time) + noise
# generate time
def generateTime(seconds,samples_per_second):
"""Generate time data"""
return np.arange(0,seconds,1/samples_per_second)
def processTimeSeriesData(seconds,samples_per_second):
"""Process data"""
# create pandas - init with time
df = pd.DataFrame(data=generateTime(seconds,samples_per_second), columns=['time'])
# generate signal
df['signal'] = generateSignal(df['time'].values)
# return data frame with signal
return df
%lprun -f processTimeSeriesData dataFrame = processTimeSeriesData(sample_time,samples_per_second)
ax = dataFrame.plot(kind='line',x='time',y='signal',xlim=(0,10),ylim=(-3,3),grid=True)
ax = dataFrame.plot(kind='line',x='time',y='signal',xlim=(0,1),ylim=(-3,3),grid=True)
This time series data has no information, it is just noise.
# Let's see how the signal looks in frequency domain
fig, (ax1, ax2) = plt.subplots(nrows=2)
ax1.plot(dataFrame['time'], dataFrame['signal'])
im = ax2.specgram(dataFrame['signal'], NFFT=sample_time, Fs=samples_per_second, noverlap=900)
plt.show()
# add info into the signal - Add C Note at 261.63Hz
def addTones(time,signal):
new_signal = np.copy(signal)
new_signal[(time >= 29*60) & (time <= 31*60)] = np.sin(2*np.pi*261.63*time[(time >= 29*60) & (time <= 31*60)])
return new_signal
dataFrame['signal_new'] = addTones(dataFrame['time'].values,dataFrame['signal'].values)
fig, (ax1, ax2) = plt.subplots(nrows=2)
ax1.plot(dataFrame['time'], dataFrame['signal_new'])
im = ax2.specgram(dataFrame['signal_new'], NFFT=sample_time, Fs=samples_per_second, noverlap=900)
plt.show()
ax = dataFrame.plot(kind='line',x='time',y='signal_new',xlim=(28*60,32*60),ylim=(-3,3),grid=True)
from IPython.display import Audio
Audio(dataFrame[(dataFrame.time <= 29.02*60) & (dataFrame.time >= 28.98*60)]['signal_new'],rate=samples_per_second)