• Neural network (1-17-17-1) - input (1) time index and output (1) amplitude
Visual illustration of the network configuration.
Complete Code
// Learning Sound Deep Neural Network // (1) time (time index for input) // (17,17) hidden layers // (1) amplitude await xinitialize( {layers:[1,17,17,1], build:'cpu', learningrate:0.1} );
let div = document.createElement('div'); document.body.appendChild( div ); div.innerHTML = ` <br><br> Load Training Sound (hello.mp3) and Start Training Neural Network <br><br> Number of training iterations:<br> <div id="counter">Iteration:</div> Error between ideal and trained version: <div id="error">Error:</div> <br> `; let but = (f, n) => { let el = document.createElement('button'); document.body.appendChild( el ); el.innerHTML = n; el.onclick = ()=>{ f(); }; } but( getData, 'Start' ); but( stopTraining, 'Stop' ); but( playOriginalSound, 'Play Original Sound' ); but( playTrainedSound, 'Play Trained Sound' );
let audioCtx = null; let sndData = null; let sndTrainedData = null; let iteration = 0; let training = true;
function stopTraining() { training = false; }
async function getData() { if(window.webkitAudioContext) { audioCtx = new window.webkitAudioContext(); } else { audioCtx = new window.AudioContext(); }
training = true; source = audioCtx.createBufferSource(); request = new XMLHttpRequest();
// start training the neural network await iterate(); },
function(e){"Error with decoding audio data" + e.error}); } request.send(); }
function playSoundData( sdata ) { var source = audioCtx.createBufferSource();
// set the buffer in the AudioBufferSourceNode source.buffer = sdata;
// connect the AudioBufferSourceNode to the // destination so you can hear the sound source.connect(audioCtx.destination);
// start the source playing source.start(); }
function getAmplitude( dataBuffer, t ){ let dataArray = dataBuffer.getChannelData(0); // get data
return dataArray[ t ]; }
function playOriginalSound() { playSoundData( sndData ); }
async function playTrainedSound(){ // preview
let noSamples = sndTrainedData.getChannelData(0).length; let bdata = sndTrainedData.getChannelData(0);
for (let tt = 0; tt < noSamples; tt+=1) { let amp = await xactivate( [ tt/noSamples ] ); let val = amp[0] * 2.0 - 1.0; console.assert( val >=-1 && val <= 1); bdata[tt] = val; }
playSoundData( sndTrainedData ); }
// iteration starts once the data has been loaded async function iterate(){ if ( !training ) return;
iteration++; let noSamples = sndData.getChannelData(0).length;
console.log('noSamples:', noSamples );
let totalError = 0; for (let tt = 0; tt < noSamples; tt+=1) { let dynamicRate = 1.0/(100.0 + 1*iteration); let amp = getAmplitude(sndData,tt);//-1 to 1 let ampNormalized = (amp + 1.0)*0.5; // 0 to 1 let val = await xactivate( [ tt/noSamples ] ); console.assert( val >= 0 ); console.assert( val <= 1 ); await xpropagate( [amp], dynamicRate ); totalError = totalError + Math.abs( val[0] - amp ); }
let elem1 = document.getElementById('counter'); elem1.innerHTML = "Iteration: " + iteration;
let elem2 = document.getElementById('error'); elem2.innerHTML = "Error: " + totalError;
requestAnimationFrame(iterate); }
Things to Try
• Try other test sounds (keep them small) - saying other words
• Try sounds with more complex audio (e.g., other languages with tones, like Chinese)
• Provide to sounds of the same length to the neural network and an extra number; which can be varied between 0 to 1 once the network has been trained
• Try filtering the sound signal; give a sound input with an echo and an output sound without an echo