I’m trying to capture audio data from a YouTube video using the Web Audio API. My goal is to save the raw audio in PCM format while the video is playing. However, I’m noticing inconsistencies in the captured audio data across different runs for the same video. While most of the audio data remains consistent, certain sections (e.g., the beginning, middle, or end) show differences, even when no changes are made to the video playback.
Interestingly, when these audio files are played, there is no perceptible difference in the listening experience. However, the binary data of the captured audio differs across runs, and this is causing issues for my use case where consistency in raw data is critical.
What I’m Doing:
I’m using an AudioContext, an AudioWorkletNode, and a MediaStreamDestination to capture the audio
Environment
Browser: Chrome
Audio API: Web Audio API
PCM Format: Float32, mono channel
I have developed a browser extension with simple start and stop buttons for recording. Additionally, the recording automatically stops when the video finishes playing.
content.js
let audioContext;
let audioWorkletNode;
let audioChunks = [];
let isRecording = false;
chrome.runtime.onMessage.addListener((message, sender, sendResponse) => {
if (message.type === 'start-recording') {
startRecording();
}
if (message.type === 'stop-recording') {
stopRecording();
console.log('Recording stopped.');
}
});
async function startRecording() {
try {
// Select the video element
const videoElement = document.querySelector('video');
audioChunks = [];
monitorVideo(videoElement);
if (!videoElement) {
throw new Error('No video element found!');
}
// Capture the audio stream
const stream = videoElement.captureStream();
if (!stream) {
throw new Error('Failed to capture audio stream!');
}
// Initialize AudioContext
audioContext = new AudioContext();
// Add an AudioWorkletProcessor for audio processing
await audioContext.audioWorklet.addModule(chrome.runtime.getURL('processor.js'));
// Create an AudioWorkletNode
audioWorkletNode = new AudioWorkletNode(audioContext, 'audio-processor');
// Connect the audio stream to the AudioContext
const source = audioContext.createMediaStreamSource(stream);
source.connect(audioWorkletNode).connect(audioContext.destination);
// Collect audio chunks from AudioWorkletProcessor
audioWorkletNode.port.onmessage = (event) => {
if (event.data.type === 'chunk') {
audioChunks.push(event.data.chunk);
}
};
// Start the AudioContext
await audioContext.resume();
isRecording = true;
console.log('Recording started...');
} catch (error) {
console.error('Failed to start recording:', error);
}
function stopRecording() {
if (!isRecording) {
console.warn('No recording in progress.');
return;
}
isRecording = false;
// Stop the audio context
if (audioWorkletNode) {
audioWorkletNode.disconnect();
audioContext.close();
}
if (audioChunks.length > 0) {
savePCMFile(audioChunks);
console.log('Recording stopped and saved');
}
console.log('Recording stopped and file saved.');
}
// Function to save PCM data as a binary file
function savePCMFile(pcmBuffer) {
// Flatten the chunks into one Float32Array
const totalLength = pcmBuffer.reduce((sum, chunk) => sum + chunk.length, 0);
const combinedArray = new Float32Array(totalLength);
let offset = 0;
for (const chunk of pcmBuffer) {
combinedArray.set(chunk, offset);
offset += chunk.length;
}
// Create a Blob from the combined Float32Array
const blob = new Blob([combinedArray.buffer], { type: 'application/octet-stream' });
const url = URL.createObjectURL(blob);
// Trigger download
const a = document.createElement('a');
a.href = url;
a.download = 'audio.pcm';
a.click();
}``
audio-processor.js
class AudioProcessor extends AudioWorkletProcessor {
constructor() {
super();
this.audioChunks = [];
}
process(inputs, outputs, parameters) {
const input = inputs[0]; // Get the first channel's input
if (input && input.length > 0) {
const channelData = input[0]; // First channel's audio data
this.port.postMessage({
type: 'chunk',
chunk: channelData.slice(0), // Copy the audio data
});
}
// Return true to keep processing
return true;
}
}
registerProcessor('audio-processor', AudioProcessor);