Learning How to Capture and Record Audio in HTML5

It is known that getUserMedia, the API defined by WebRTC, is the main method for accessing media capture devices such as webcam and microphone in web pages. The compatible desktop web browsers include Chrome, Firefox, Opera, and Edge. This article will share the resources for learning how to capture and record audio in HTML5.

record audio in HTML5

How to capture audio from a microphone in HTML5?

Here is the code snippet from MDN (Mozilla Developer Network):

var p = navigator.mediaDevices.getUserMedia({ audio: true, video: true });

p.then(function(mediaStream) {
  var video = document.querySelector('video');
  video.src = window.URL.createObjectURL(mediaStream);
  video.onloadedmetadata = function(e) {
// Do something with the video here.
    video.play();

  };
});

p.catch(function(err) { console.log(err.name); }); // always check for errors at the end.

The returned media stream includes video stream track and audio stream track.

Browser compatibility:

Desktop

getusermedia desktop

Mobile

getusermedia mobile

How to record audio stream and upload it to a remote server?

Interfaces for Recording Audio

Using MediaRecorder API is the ideal way for recording media streams, but it is not well supported so far.

Browser compatibility:

Desktop

mediarecorder desktop

Mobile

mediarecorder mobile

Alternatively, we can record audio with AudioNodes, which represents audio sources, the audio destination, and intermediate processing modules.

The basic steps of recording audio in HTML5

  1. Capture media stream with getUserMedia.
  2. Create a MediaStreamAudioSourceNode with createMediaStreamSource.

     if (navigator.getUserMedia) {
        console.log('getUserMedia supported.');
        navigator.getUserMedia (
           // constraints: audio and video for this app
           {
              audio: true,
              video: true
           },
        
           // Success callback
           function(stream) {
              video.src = (window.URL && window.URL.createObjectURL(stream)) || stream;
              video.onloadedmetadata = function(e) {
                 video.play();
                 video.muted = 'true';
              };
        
              // Create a MediaStreamAudioSourceNode
              // Feed the HTMLMediaElement into it
              var source = audioCtx.createMediaStreamSource(stream);
        
           },
        
           // Error callback
           function(err) {
              console.log('The following gUM error occured: ' + err);
           }
        );
     } else {
        console.log('getUserMedia not supported on your browser!');
     }
    
  3. Connect AudioNodes. Process audio with ScriptProcessorNode. Use the onaudioprocess event to get the audio buffer.

     var scriptNode = audioCtx.createScriptProcessor(4096, 1, 1);
        
     scriptNode.onaudioprocess = function(audioProcessingEvent) {
       // The input buffer is the song we loaded earlier
       var inputBuffer = audioProcessingEvent.inputBuffer;
        
       // Loop through the output channels (in this case there is only one)
       for (var channel = 0; channel < outputBuffer.numberOfChannels; channel++) {
         var inputData = inputBuffer.getChannelData(channel);
       }
     }
        
     source.connect(scriptNode);
     scriptNode.connect(audioCtx.destination);
    
  4. Encode the audio buffer and send it via XMLHttpRequest or WebSockets.

JavaScript Libraries

I found two useful JavaScript libraries for audio recording on GitHub: RecordRTC and Recorderjs.

RecordRTC is powerful for both video and audio recording (try the online demo), whereas Recorderjs is less complicated that it only supports audio recording. Let’s anatomize the code logic of Recorderjs.

Use onaudioprocess event to receive audio buffer:

this.context = source.context;
        this.node = (this.context.createScriptProcessor || this.context.createJavaScriptNode).call(this.context, this.config.bufferLen, this.config.numChannels, this.config.numChannels);

        this.node.onaudioprocess = function (e) {
            if (!_this.recording) return;

            var buffer = [];
            for (var channel = 0; channel < _this.config.numChannels; channel++) {
                buffer.push(e.inputBuffer.getChannelData(channel));
            }
            _this.worker.postMessage({
                command: 'record',
                buffer: buffer
            });
        };

        source.connect(this.node);
        this.node.connect(this.context.destination); //this should not be necessary

Cache received audio buffer:

function record(inputBuffer) {
                for (var channel = 0; channel < numChannels; channel++) {
                    recBuffers[channel].push(inputBuffer[channel]);
                }
                recLength += inputBuffer[0].length;
            }

Encode audio buffer to WAV format:

function encodeWAV(samples) {
                var buffer = new ArrayBuffer(44 + samples.length * 2);
                var view = new DataView(buffer);

                /* RIFF identifier */
                writeString(view, 0, 'RIFF');
                /* RIFF chunk length */
                view.setUint32(4, 36 + samples.length * 2, true);
                /* RIFF type */
                writeString(view, 8, 'WAVE');
                /* format chunk identifier */
                writeString(view, 12, 'fmt ');
                /* format chunk length */
                view.setUint32(16, 16, true);
                /* sample format (raw) */
                view.setUint16(20, 1, true);
                /* channel count */
                view.setUint16(22, numChannels, true);
                /* sample rate */
                view.setUint32(24, sampleRate, true);
                /* byte rate (sample rate * block align) */
                view.setUint32(28, sampleRate * 4, true);
                /* block align (channel count * bytes per sample) */
                view.setUint16(32, numChannels * 2, true);
                /* bits per sample */
                view.setUint16(34, 16, true);
                /* data chunk identifier */
                writeString(view, 36, 'data');
                /* data chunk length */
                view.setUint32(40, samples.length * 2, true);

                floatTo16BitPCM(view, 44, samples);

                return view;
            }

Merge audio buffer and export WAV data:

function exportWAV(type) {
                var buffers = [];
                for (var channel = 0; channel < numChannels; channel++) {
                    buffers.push(mergeBuffers(recBuffers[channel], recLength));
                }
                var interleaved = undefined;
                if (numChannels === 2) {
                    interleaved = interleave(buffers[0], buffers[1]);
                } else {
                    interleaved = buffers[0];
                }
                var dataview = encodeWAV(interleaved);
                var audioBlob = new Blob([dataview], { type: type });
            }

References