Add dist/ and lib/ folders needed by Realtime translation example (#1769)

This commit is contained in:
erikakettleson-openai 2025-04-15 08:06:32 -07:00 committed by GitHub
parent dbf884f931
commit acfa8abce9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
26 changed files with 1877 additions and 2 deletions

View File

@ -7,8 +7,7 @@ This cookbook demonstrates how to use OpenAI's [ Realtime API](https://platform.
A real-world use case for this demo is a multilingual, conversational translation where a speaker talks into the speaker app and listeners hear translations in their selected native language via the listener app. Imagine a conference room with a speaker talking in English and a participant with headphones in choosing to listen to a Tagalog translation. Due to the current turn-based nature of audio models, the speaker must pause briefly to allow the model to process and translate speech. However, as models become faster and more efficient, this latency will decrease significantly and the translation will become more seamless. A real-world use case for this demo is a multilingual, conversational translation where a speaker talks into the speaker app and listeners hear translations in their selected native language via the listener app. Imagine a conference room with a speaker talking in English and a participant with headphones in choosing to listen to a Tagalog translation. Due to the current turn-based nature of audio models, the speaker must pause briefly to allow the model to process and translate speech. However, as models become faster and more efficient, this latency will decrease significantly and the translation will become more seamless.
Let's explore the main functionalities and code snippets that illustrate how the app works. You can find the code in the [accompanying repo](https://github.com/openai/openai-cookbook/tree/main/examples/voice_solutions/one_way_translation_using_realtime_api/README.md Let's explore the main functionalities and code snippets that illustrate how the app works. You can find the code in the [accompanying repo](https://github.com/openai/openai-cookbook/tree/main/examples/voice_solutions/one_way_translation_using_realtime_api) if you want to run the app locally.
) if you want to run the app locally.
## High Level Architecture Overview ## High Level Architecture Overview

View File

@ -0,0 +1,84 @@
import { WebSocketServer } from 'ws';
import { RealtimeClient } from '@openai/realtime-api-beta';
export class RealtimeRelay {
constructor(apiKey) {
this.apiKey = apiKey;
this.sockets = new WeakMap();
this.wss = null;
}
listen(port) {
this.wss = new WebSocketServer({ port });
this.wss.on('connection', this.connectionHandler.bind(this));
this.log(`Listening on ws://localhost:${port}`);
}
async connectionHandler(ws, req) {
if (!req.url) {
this.log('No URL provided, closing connection.');
ws.close();
return;
}
const url = new URL(req.url, `http://${req.headers.host}`);
const pathname = url.pathname;
if (pathname !== '/') {
this.log(`Invalid pathname: "${pathname}"`);
ws.close();
return;
}
// Instantiate new client
this.log(`Connecting with key "${this.apiKey.slice(0, 3)}..."`);
const client = new RealtimeClient({ apiKey: this.apiKey });
// Relay: OpenAI Realtime API Event -> Browser Event
client.realtime.on('server.*', (event) => {
this.log(`Relaying "${event.type}" to Client`);
ws.send(JSON.stringify(event));
});
client.realtime.on('close', () => ws.close());
// Relay: Browser Event -> OpenAI Realtime API Event
// We need to queue data waiting for the OpenAI connection
const messageQueue = [];
const messageHandler = (data) => {
try {
const event = JSON.parse(data);
this.log(`Relaying "${event.type}" to OpenAI`);
client.realtime.send(event.type, event);
} catch (e) {
console.error(e.message);
this.log(`Error parsing event from client: ${data}`);
}
};
ws.on('message', (data) => {
if (!client.isConnected()) {
messageQueue.push(data);
} else {
messageHandler(data);
}
});
ws.on('close', () => client.disconnect());
// Connect to OpenAI Realtime API
try {
this.log(`Connecting to OpenAI...`);
await client.connect();
} catch (e) {
this.log(`Error connecting to OpenAI: ${e.message}`);
ws.close();
return;
}
this.log(`Connected to OpenAI successfully!`);
while (messageQueue.length) {
messageHandler(messageQueue.shift());
}
}
log(...args) {
console.log(`[RealtimeRelay]`, ...args);
}
}

View File

@ -0,0 +1,6 @@
import { AudioAnalysis } from './lib/analysis/audio_analysis.js';
import { WavPacker } from './lib/wav_packer.js';
import { WavStreamPlayer } from './lib/wav_stream_player.js';
import { WavRecorder } from './lib/wav_recorder.js';
export { AudioAnalysis, WavPacker, WavStreamPlayer, WavRecorder };
//# sourceMappingURL=index.d.ts.map

View File

@ -0,0 +1 @@
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../index.js"],"names":[],"mappings":"8BAC8B,kCAAkC;0BADtC,qBAAqB;gCAEf,4BAA4B;4BAChC,uBAAuB"}

View File

@ -0,0 +1,70 @@
/**
* Output of AudioAnalysis for the frequency domain of the audio
* @typedef {Object} AudioAnalysisOutputType
* @property {Float32Array} values Amplitude of this frequency between {0, 1} inclusive
* @property {number[]} frequencies Raw frequency bucket values
* @property {string[]} labels Labels for the frequency bucket values
*/
/**
* Analyzes audio for visual output
* @class
*/
export class AudioAnalysis {
/**
* Retrieves frequency domain data from an AnalyserNode adjusted to a decibel range
* returns human-readable formatting and labels
* @param {AnalyserNode} analyser
* @param {number} sampleRate
* @param {Float32Array} [fftResult]
* @param {"frequency"|"music"|"voice"} [analysisType]
* @param {number} [minDecibels] default -100
* @param {number} [maxDecibels] default -30
* @returns {AudioAnalysisOutputType}
*/
static getFrequencies(analyser: AnalyserNode, sampleRate: number, fftResult?: Float32Array, analysisType?: "frequency" | "music" | "voice", minDecibels?: number, maxDecibels?: number): AudioAnalysisOutputType;
/**
* Creates a new AudioAnalysis instance for an HTMLAudioElement
* @param {HTMLAudioElement} audioElement
* @param {AudioBuffer|null} [audioBuffer] If provided, will cache all frequency domain data from the buffer
* @returns {AudioAnalysis}
*/
constructor(audioElement: HTMLAudioElement, audioBuffer?: AudioBuffer | null);
fftResults: any[];
audio: HTMLAudioElement;
context: any;
analyser: any;
sampleRate: any;
audioBuffer: any;
/**
* Gets the current frequency domain data from the playing audio track
* @param {"frequency"|"music"|"voice"} [analysisType]
* @param {number} [minDecibels] default -100
* @param {number} [maxDecibels] default -30
* @returns {AudioAnalysisOutputType}
*/
getFrequencies(analysisType?: "frequency" | "music" | "voice", minDecibels?: number, maxDecibels?: number): AudioAnalysisOutputType;
/**
* Resume the internal AudioContext if it was suspended due to the lack of
* user interaction when the AudioAnalysis was instantiated.
* @returns {Promise<true>}
*/
resumeIfSuspended(): Promise<true>;
}
/**
* Output of AudioAnalysis for the frequency domain of the audio
*/
export type AudioAnalysisOutputType = {
/**
* Amplitude of this frequency between {0, 1} inclusive
*/
values: Float32Array;
/**
* Raw frequency bucket values
*/
frequencies: number[];
/**
* Labels for the frequency bucket values
*/
labels: string[];
};
//# sourceMappingURL=audio_analysis.d.ts.map

View File

@ -0,0 +1 @@
{"version":3,"file":"audio_analysis.d.ts","sourceRoot":"","sources":["../../../lib/analysis/audio_analysis.js"],"names":[],"mappings":"AAOA;;;;;;GAMG;AAEH;;;GAGG;AACH;IACE;;;;;;;;;;OAUG;IACH,gCARW,YAAY,cACZ,MAAM,cACN,YAAY,iBACZ,WAAW,GAAC,OAAO,GAAC,OAAO,gBAC3B,MAAM,gBACN,MAAM,GACJ,uBAAuB,CAwDnC;IAED;;;;;OAKG;IACH,0BAJW,gBAAgB,gBAChB,WAAW,GAAC,IAAI,EAkE1B;IA9DC,kBAAoB;IA2ClB,wBAAyB;IACzB,aAAkC;IAClC,cAAwB;IACxB,gBAA4B;IAC5B,iBAA8B;IAiBlC;;;;;;OAMG;IACH,8BALW,WAAW,GAAC,OAAO,GAAC,OAAO,gBAC3B,MAAM,gBACN,MAAM,GACJ,uBAAuB,CAwBnC;IAED;;;;OAIG;IACH,qBAFa,OAAO,CAAC,IAAI,CAAC,CAOzB;CACF;;;;;;;;YA9La,YAAY;;;;iBACZ,MAAM,EAAE;;;;YACR,MAAM,EAAE"}

View File

@ -0,0 +1,9 @@
/**
* All note frequencies from 1st to 8th octave
* in format "A#8" (A#, 8th octave)
*/
export const noteFrequencies: any[];
export const noteFrequencyLabels: any[];
export const voiceFrequencies: any[];
export const voiceFrequencyLabels: any[];
//# sourceMappingURL=constants.d.ts.map

View File

@ -0,0 +1 @@
{"version":3,"file":"constants.d.ts","sourceRoot":"","sources":["../../../lib/analysis/constants.js"],"names":[],"mappings":"AA6BA;;;GAGG;AACH,oCAAkC;AAClC,wCAAsC;AActC,qCAKG;AACH,yCAKG"}

View File

@ -0,0 +1,58 @@
/**
* Raw wav audio file contents
* @typedef {Object} WavPackerAudioType
* @property {Blob} blob
* @property {string} url
* @property {number} channelCount
* @property {number} sampleRate
* @property {number} duration
*/
/**
* Utility class for assembling PCM16 "audio/wav" data
* @class
*/
export class WavPacker {
/**
* Converts Float32Array of amplitude data to ArrayBuffer in Int16Array format
* @param {Float32Array} float32Array
* @returns {ArrayBuffer}
*/
static floatTo16BitPCM(float32Array: Float32Array): ArrayBuffer;
/**
* Concatenates two ArrayBuffers
* @param {ArrayBuffer} leftBuffer
* @param {ArrayBuffer} rightBuffer
* @returns {ArrayBuffer}
*/
static mergeBuffers(leftBuffer: ArrayBuffer, rightBuffer: ArrayBuffer): ArrayBuffer;
/**
* Packs data into an Int16 format
* @private
* @param {number} size 0 = 1x Int16, 1 = 2x Int16
* @param {number} arg value to pack
* @returns
*/
private _packData;
/**
* Packs audio into "audio/wav" Blob
* @param {number} sampleRate
* @param {{bitsPerSample: number, channels: Array<Float32Array>, data: Int16Array}} audio
* @returns {WavPackerAudioType}
*/
pack(sampleRate: number, audio: {
bitsPerSample: number;
channels: Array<Float32Array>;
data: Int16Array;
}): WavPackerAudioType;
}
/**
* Raw wav audio file contents
*/
export type WavPackerAudioType = {
blob: Blob;
url: string;
channelCount: number;
sampleRate: number;
duration: number;
};
//# sourceMappingURL=wav_packer.d.ts.map

View File

@ -0,0 +1 @@
{"version":3,"file":"wav_packer.d.ts","sourceRoot":"","sources":["../../lib/wav_packer.js"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH;;;GAGG;AACH;IACE;;;;OAIG;IACH,qCAHW,YAAY,GACV,WAAW,CAWvB;IAED;;;;;OAKG;IACH,gCAJW,WAAW,eACX,WAAW,GACT,WAAW,CASvB;IAED;;;;;;OAMG;IACH,kBAKC;IAED;;;;;OAKG;IACH,iBAJW,MAAM,SACN;QAAC,aAAa,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,KAAK,CAAC,YAAY,CAAC,CAAC;QAAC,IAAI,EAAE,UAAU,CAAA;KAAC,GACtE,kBAAkB,CA6C9B;CACF;;;;;UA3Ga,IAAI;SACJ,MAAM;kBACN,MAAM;gBACN,MAAM;cACN,MAAM"}

View File

@ -0,0 +1,167 @@
/**
* Decodes audio into a wav file
* @typedef {Object} DecodedAudioType
* @property {Blob} blob
* @property {string} url
* @property {Float32Array} values
* @property {AudioBuffer} audioBuffer
*/
/**
* Records live stream of user audio as PCM16 "audio/wav" data
* @class
*/
export class WavRecorder {
/**
* Decodes audio data from multiple formats to a Blob, url, Float32Array and AudioBuffer
* @param {Blob|Float32Array|Int16Array|ArrayBuffer|number[]} audioData
* @param {number} sampleRate
* @param {number} fromSampleRate
* @returns {Promise<DecodedAudioType>}
*/
static decode(audioData: Blob | Float32Array | Int16Array | ArrayBuffer | number[], sampleRate?: number, fromSampleRate?: number): Promise<DecodedAudioType>;
/**
* Create a new WavRecorder instance
* @param {{sampleRate?: number, outputToSpeakers?: boolean, debug?: boolean}} [options]
* @returns {WavRecorder}
*/
constructor({ sampleRate, outputToSpeakers, debug, }?: {
sampleRate?: number;
outputToSpeakers?: boolean;
debug?: boolean;
});
scriptSrc: any;
sampleRate: number;
outputToSpeakers: boolean;
debug: boolean;
_deviceChangeCallback: () => Promise<void>;
_devices: any[];
stream: any;
processor: any;
source: any;
node: any;
recording: boolean;
_lastEventId: number;
eventReceipts: {};
eventTimeout: number;
_chunkProcessor: () => void;
_chunkProcessorBuffer: {
raw: ArrayBuffer;
mono: ArrayBuffer;
};
/**
* Logs data in debug mode
* @param {...any} arguments
* @returns {true}
*/
log(...args: any[]): true;
/**
* Retrieves the current sampleRate for the recorder
* @returns {number}
*/
getSampleRate(): number;
/**
* Retrieves the current status of the recording
* @returns {"ended"|"paused"|"recording"}
*/
getStatus(): "ended" | "paused" | "recording";
/**
* Sends an event to the AudioWorklet
* @private
* @param {string} name
* @param {{[key: string]: any}} data
* @param {AudioWorkletNode} [_processor]
* @returns {Promise<{[key: string]: any}>}
*/
private _event;
/**
* Sets device change callback, remove if callback provided is `null`
* @param {(Array<MediaDeviceInfo & {default: boolean}>): void|null} callback
* @returns {true}
*/
listenForDeviceChange(callback: any): true;
/**
* Manually request permission to use the microphone
* @returns {Promise<true>}
*/
requestPermission(): Promise<true>;
/**
* List all eligible devices for recording, will request permission to use microphone
* @returns {Promise<Array<MediaDeviceInfo & {default: boolean}>>}
*/
listDevices(): Promise<Array<MediaDeviceInfo & {
default: boolean;
}>>;
/**
* Begins a recording session and requests microphone permissions if not already granted
* Microphone recording indicator will appear on browser tab but status will be "paused"
* @param {string} [deviceId] if no device provided, default device will be used
* @returns {Promise<true>}
*/
begin(deviceId?: string): Promise<true>;
analyser: any;
/**
* Gets the current frequency domain data from the recording track
* @param {"frequency"|"music"|"voice"} [analysisType]
* @param {number} [minDecibels] default -100
* @param {number} [maxDecibels] default -30
* @returns {import('./analysis/audio_analysis.js').AudioAnalysisOutputType}
*/
getFrequencies(analysisType?: "frequency" | "music" | "voice", minDecibels?: number, maxDecibels?: number): import("./analysis/audio_analysis.js").AudioAnalysisOutputType;
/**
* Pauses the recording
* Keeps microphone stream open but halts storage of audio
* @returns {Promise<true>}
*/
pause(): Promise<true>;
/**
* Start recording stream and storing to memory from the connected audio source
* @param {(data: { mono: Int16Array; raw: Int16Array }) => any} [chunkProcessor]
* @param {number} [chunkSize] chunkProcessor will not be triggered until this size threshold met in mono audio
* @returns {Promise<true>}
*/
record(chunkProcessor?: (data: {
mono: Int16Array;
raw: Int16Array;
}) => any, chunkSize?: number): Promise<true>;
_chunkProcessorSize: number;
/**
* Clears the audio buffer, empties stored recording
* @returns {Promise<true>}
*/
clear(): Promise<true>;
/**
* Reads the current audio stream data
* @returns {Promise<{meanValues: Float32Array, channels: Array<Float32Array>}>}
*/
read(): Promise<{
meanValues: Float32Array;
channels: Array<Float32Array>;
}>;
/**
* Saves the current audio stream to a file
* @param {boolean} [force] Force saving while still recording
* @returns {Promise<import('./wav_packer.js').WavPackerAudioType>}
*/
save(force?: boolean): Promise<import("./wav_packer.js").WavPackerAudioType>;
/**
* Ends the current recording session and saves the result
* @returns {Promise<import('./wav_packer.js').WavPackerAudioType>}
*/
end(): Promise<import("./wav_packer.js").WavPackerAudioType>;
/**
* Performs a full cleanup of WavRecorder instance
* Stops actively listening via microphone and removes existing listeners
* @returns {Promise<true>}
*/
quit(): Promise<true>;
}
/**
* Decodes audio into a wav file
*/
export type DecodedAudioType = {
blob: Blob;
url: string;
values: Float32Array;
audioBuffer: AudioBuffer;
};
//# sourceMappingURL=wav_recorder.d.ts.map

View File

@ -0,0 +1 @@
{"version":3,"file":"wav_recorder.d.ts","sourceRoot":"","sources":["../../lib/wav_recorder.js"],"names":[],"mappings":"AAIA;;;;;;;GAOG;AAEH;;;GAGG;AACH;IAsCE;;;;;;OAMG;IACH,yBALW,IAAI,GAAC,YAAY,GAAC,UAAU,GAAC,WAAW,GAAC,MAAM,EAAE,eACjD,MAAM,mBACN,MAAM,GACJ,OAAO,CAAC,gBAAgB,CAAC,CAqErC;IA/GD;;;;OAIG;IACH,uDAHW;QAAC,UAAU,CAAC,EAAE,MAAM,CAAC;QAAC,gBAAgB,CAAC,EAAE,OAAO,CAAC;QAAC,KAAK,CAAC,EAAE,OAAO,CAAA;KAAC,EAiC5E;IAxBC,eAAkC;IAElC,mBAA4B;IAC5B,0BAAwC;IACxC,eAAoB;IACpB,2CAAiC;IACjC,gBAAkB;IAElB,YAAkB;IAClB,eAAqB;IACrB,YAAkB;IAClB,UAAgB;IAChB,mBAAsB;IAEtB,qBAAqB;IACrB,kBAAuB;IACvB,qBAAwB;IAExB,4BAA+B;IAE/B;;;MAGC;IA+EH;;;;OAIG;IACH,qBAFa,IAAI,CAOhB;IAED;;;OAGG;IACH,iBAFa,MAAM,CAIlB;IAED;;;OAGG;IACH,aAFa,OAAO,GAAC,QAAQ,GAAC,WAAW,CAUxC;IAED;;;;;;;OAOG;IACH,eAqBC;IAED;;;;OAIG;IACH,sCAFa,IAAI,CAmChB;IAED;;;OAGG;IACH,qBAFa,OAAO,CAAC,IAAI,CAAC,CAoBzB;IAED;;;OAGG;IACH,eAFa,OAAO,CAAC,KAAK,CAAC,eAAe,GAAG;QAAC,OAAO,EAAE,OAAO,CAAA;KAAC,CAAC,CAAC,CA8BhE;IAED;;;;;OAKG;IACH,iBAHW,MAAM,GACJ,OAAO,CAAC,IAAI,CAAC,CAkFzB;IAHC,cAAwB;IAK1B;;;;;;OAMG;IACH,8BALW,WAAW,GAAC,OAAO,GAAC,OAAO,gBAC3B,MAAM,gBACN,MAAM,GACJ,OAAO,8BAA8B,EAAE,uBAAuB,CAkB1E;IAED;;;;OAIG;IACH,SAFa,OAAO,CAAC,IAAI,CAAC,CAezB;IAED;;;;;OAKG;IACH,wBAJW,CAAC,IAAI,EAAE;QAAE,IAAI,EAAE,UAAU,CAAC;QAAC,GAAG,EAAE,UAAU,CAAA;KAAE,KAAK,GAAG,cACpD,MAAM,GACJ,OAAO,CAAC,IAAI,CAAC,CAoBzB;IATC,4BAAoC;IAWtC;;;OAGG;IACH,SAFa,OAAO,CAAC,IAAI,CAAC,CAQzB;IAED;;;OAGG;IACH,QAFa,OAAO,CAAC;QAAC,UAAU,EAAE,YAAY,CAAC;QAAC,QAAQ,EAAE,KAAK,CAAC,YAAY,CAAC,CAAA;KAAC,CAAC,CAS9E;IAED;;;;OAIG;IACH,aAHW,OAAO,GACL,OAAO,CAAC,OAAO,iBAAiB,EAAE,kBAAkB,CAAC,CAgBjE;IAED;;;OAGG;IACH,OAFa,OAAO,CAAC,OAAO,iBAAiB,EAAE,kBAAkB,CAAC,CA8BjE;IAED;;;;OAIG;IACH,QAFa,OAAO,CAAC,IAAI,CAAC,CAQzB;CACF;;;;;UA1hBa,IAAI;SACJ,MAAM;YACN,YAAY;iBACZ,WAAW"}

View File

@ -0,0 +1,69 @@
/**
* Plays audio streams received in raw PCM16 chunks from the browser
* @class
*/
export class WavStreamPlayer {
/**
* Creates a new WavStreamPlayer instance
* @param {{sampleRate?: number}} options
* @returns {WavStreamPlayer}
*/
constructor({ sampleRate }?: {
sampleRate?: number;
});
scriptSrc: any;
sampleRate: number;
context: any;
stream: any;
analyser: any;
trackSampleOffsets: {};
interruptedTrackIds: {};
/**
* Connects the audio context and enables output to speakers
* @returns {Promise<true>}
*/
connect(): Promise<true>;
/**
* Gets the current frequency domain data from the playing track
* @param {"frequency"|"music"|"voice"} [analysisType]
* @param {number} [minDecibels] default -100
* @param {number} [maxDecibels] default -30
* @returns {import('./analysis/audio_analysis.js').AudioAnalysisOutputType}
*/
getFrequencies(analysisType?: "frequency" | "music" | "voice", minDecibels?: number, maxDecibels?: number): import("./analysis/audio_analysis.js").AudioAnalysisOutputType;
/**
* Starts audio streaming
* @private
* @returns {Promise<true>}
*/
private _start;
/**
* Adds 16BitPCM data to the currently playing audio stream
* You can add chunks beyond the current play point and they will be queued for play
* @param {ArrayBuffer|Int16Array} arrayBuffer
* @param {string} [trackId]
* @returns {Int16Array}
*/
add16BitPCM(arrayBuffer: ArrayBuffer | Int16Array, trackId?: string): Int16Array;
/**
* Gets the offset (sample count) of the currently playing stream
* @param {boolean} [interrupt]
* @returns {{trackId: string|null, offset: number, currentTime: number}}
*/
getTrackSampleOffset(interrupt?: boolean): {
trackId: string | null;
offset: number;
currentTime: number;
};
/**
* Strips the current stream and returns the sample offset of the audio
* @param {boolean} [interrupt]
* @returns {{trackId: string|null, offset: number, currentTime: number}}
*/
interrupt(): {
trackId: string | null;
offset: number;
currentTime: number;
};
}
//# sourceMappingURL=wav_stream_player.d.ts.map

View File

@ -0,0 +1 @@
{"version":3,"file":"wav_stream_player.d.ts","sourceRoot":"","sources":["../../lib/wav_stream_player.js"],"names":[],"mappings":"AAGA;;;GAGG;AACH;IACE;;;;OAIG;IACH,6BAHW;QAAC,UAAU,CAAC,EAAE,MAAM,CAAA;KAAC,EAW/B;IAPC,eAAmC;IACnC,mBAA4B;IAC5B,aAAmB;IACnB,YAAkB;IAClB,cAAoB;IACpB,uBAA4B;IAC5B,wBAA6B;IAG/B;;;OAGG;IACH,WAFa,OAAO,CAAC,IAAI,CAAC,CAkBzB;IAED;;;;;;OAMG;IACH,8BALW,WAAW,GAAC,OAAO,GAAC,OAAO,gBAC3B,MAAM,gBACN,MAAM,GACJ,OAAO,8BAA8B,EAAE,uBAAuB,CAkB1E;IAED;;;;OAIG;IACH,eAkBC;IAED;;;;;;OAMG;IACH,yBAJW,WAAW,GAAC,UAAU,YACtB,MAAM,GACJ,UAAU,CAqBtB;IAED;;;;OAIG;IACH,iCAHW,OAAO,GACL;QAAC,OAAO,EAAE,MAAM,GAAC,IAAI,CAAC;QAAC,MAAM,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAA;KAAC,CAqBvE;IAED;;;;OAIG;IACH,aAFa;QAAC,OAAO,EAAE,MAAM,GAAC,IAAI,CAAC;QAAC,MAAM,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAA;KAAC,CAIvE;CACF"}

View File

@ -0,0 +1,2 @@
export const AudioProcessorSrc: any;
//# sourceMappingURL=audio_processor.d.ts.map

View File

@ -0,0 +1 @@
{"version":3,"file":"audio_processor.d.ts","sourceRoot":"","sources":["../../../lib/worklets/audio_processor.js"],"names":[],"mappings":"AAqNA,oCAAqC"}

View File

@ -0,0 +1,3 @@
export const StreamProcessorWorklet: "\nclass StreamProcessor extends AudioWorkletProcessor {\n constructor() {\n super();\n this.hasStarted = false;\n this.hasInterrupted = false;\n this.outputBuffers = [];\n this.bufferLength = 128;\n this.write = { buffer: new Float32Array(this.bufferLength), trackId: null };\n this.writeOffset = 0;\n this.trackSampleOffsets = {};\n this.port.onmessage = (event) => {\n if (event.data) {\n const payload = event.data;\n if (payload.event === 'write') {\n const int16Array = payload.buffer;\n const float32Array = new Float32Array(int16Array.length);\n for (let i = 0; i < int16Array.length; i++) {\n float32Array[i] = int16Array[i] / 0x8000; // Convert Int16 to Float32\n }\n this.writeData(float32Array, payload.trackId);\n } else if (\n payload.event === 'offset' ||\n payload.event === 'interrupt'\n ) {\n const requestId = payload.requestId;\n const trackId = this.write.trackId;\n const offset = this.trackSampleOffsets[trackId] || 0;\n this.port.postMessage({\n event: 'offset',\n requestId,\n trackId,\n offset,\n });\n if (payload.event === 'interrupt') {\n this.hasInterrupted = true;\n }\n } else {\n throw new Error(`Unhandled event \"${payload.event}\"`);\n }\n }\n };\n }\n\n writeData(float32Array, trackId = null) {\n let { buffer } = this.write;\n let offset = this.writeOffset;\n for (let i = 0; i < float32Array.length; i++) {\n buffer[offset++] = float32Array[i];\n if (offset >= buffer.length) {\n this.outputBuffers.push(this.write);\n this.write = { buffer: new Float32Array(this.bufferLength), trackId };\n buffer = this.write.buffer;\n offset = 0;\n }\n }\n this.writeOffset = offset;\n return true;\n }\n\n process(inputs, outputs, parameters) {\n const output = outputs[0];\n const outputChannelData = output[0];\n const outputBuffers = this.outputBuffers;\n if (this.hasInterrupted) {\n this.port.postMessage({ event: 'stop' });\n return false;\n } else if (outputBuffers.length) {\n this.hasStarted = true;\n const { buffer, trackId } = outputBuffers.shift();\n for (let i = 0; i < outputChannelData.length; i++) {\n outputChannelData[i] = buffer[i] || 0;\n }\n if (trackId) {\n this.trackSampleOffsets[trackId] =\n this.trackSampleOffsets[trackId] || 0;\n this.trackSampleOffsets[trackId] += buffer.length;\n }\n return true;\n } else if (this.hasStarted) {\n this.port.postMessage({ event: 'stop' });\n return false;\n } else {\n return true;\n }\n }\n}\n\nregisterProcessor('stream_processor', StreamProcessor);\n";
export const StreamProcessorSrc: any;
//# sourceMappingURL=stream_processor.d.ts.map

View File

@ -0,0 +1 @@
{"version":3,"file":"stream_processor.d.ts","sourceRoot":"","sources":["../../../lib/worklets/stream_processor.js"],"names":[],"mappings":"AAAA,q4FAyFE;AAMF,qCAAsC"}

View File

@ -0,0 +1,6 @@
import { WavPacker } from './lib/wav_packer.js';
import { AudioAnalysis } from './lib/analysis/audio_analysis.js';
import { WavStreamPlayer } from './lib/wav_stream_player.js';
import { WavRecorder } from './lib/wav_recorder.js';
export { AudioAnalysis, WavPacker, WavStreamPlayer, WavRecorder };

View File

@ -0,0 +1,203 @@
import {
noteFrequencies,
noteFrequencyLabels,
voiceFrequencies,
voiceFrequencyLabels,
} from './constants.js';
/**
* Output of AudioAnalysis for the frequency domain of the audio
* @typedef {Object} AudioAnalysisOutputType
* @property {Float32Array} values Amplitude of this frequency between {0, 1} inclusive
* @property {number[]} frequencies Raw frequency bucket values
* @property {string[]} labels Labels for the frequency bucket values
*/
/**
* Analyzes audio for visual output
* @class
*/
export class AudioAnalysis {
/**
* Retrieves frequency domain data from an AnalyserNode adjusted to a decibel range
* returns human-readable formatting and labels
* @param {AnalyserNode} analyser
* @param {number} sampleRate
* @param {Float32Array} [fftResult]
* @param {"frequency"|"music"|"voice"} [analysisType]
* @param {number} [minDecibels] default -100
* @param {number} [maxDecibels] default -30
* @returns {AudioAnalysisOutputType}
*/
static getFrequencies(
analyser,
sampleRate,
fftResult,
analysisType = 'frequency',
minDecibels = -100,
maxDecibels = -30,
) {
if (!fftResult) {
fftResult = new Float32Array(analyser.frequencyBinCount);
analyser.getFloatFrequencyData(fftResult);
}
const nyquistFrequency = sampleRate / 2;
const frequencyStep = (1 / fftResult.length) * nyquistFrequency;
let outputValues;
let frequencies;
let labels;
if (analysisType === 'music' || analysisType === 'voice') {
const useFrequencies =
analysisType === 'voice' ? voiceFrequencies : noteFrequencies;
const aggregateOutput = Array(useFrequencies.length).fill(minDecibels);
for (let i = 0; i < fftResult.length; i++) {
const frequency = i * frequencyStep;
const amplitude = fftResult[i];
for (let n = useFrequencies.length - 1; n >= 0; n--) {
if (frequency > useFrequencies[n]) {
aggregateOutput[n] = Math.max(aggregateOutput[n], amplitude);
break;
}
}
}
outputValues = aggregateOutput;
frequencies =
analysisType === 'voice' ? voiceFrequencies : noteFrequencies;
labels =
analysisType === 'voice' ? voiceFrequencyLabels : noteFrequencyLabels;
} else {
outputValues = Array.from(fftResult);
frequencies = outputValues.map((_, i) => frequencyStep * i);
labels = frequencies.map((f) => `${f.toFixed(2)} Hz`);
}
// We normalize to {0, 1}
const normalizedOutput = outputValues.map((v) => {
return Math.max(
0,
Math.min((v - minDecibels) / (maxDecibels - minDecibels), 1),
);
});
const values = new Float32Array(normalizedOutput);
return {
values,
frequencies,
labels,
};
}
/**
* Creates a new AudioAnalysis instance for an HTMLAudioElement
* @param {HTMLAudioElement} audioElement
* @param {AudioBuffer|null} [audioBuffer] If provided, will cache all frequency domain data from the buffer
* @returns {AudioAnalysis}
*/
constructor(audioElement, audioBuffer = null) {
this.fftResults = [];
if (audioBuffer) {
/**
* Modified from
* https://stackoverflow.com/questions/75063715/using-the-web-audio-api-to-analyze-a-song-without-playing
*
* We do this to populate FFT values for the audio if provided an `audioBuffer`
* The reason to do this is that Safari fails when using `createMediaElementSource`
* This has a non-zero RAM cost so we only opt-in to run it on Safari, Chrome is better
*/
const { length, sampleRate } = audioBuffer;
const offlineAudioContext = new OfflineAudioContext({
length,
sampleRate,
});
const source = offlineAudioContext.createBufferSource();
source.buffer = audioBuffer;
const analyser = offlineAudioContext.createAnalyser();
analyser.fftSize = 8192;
analyser.smoothingTimeConstant = 0.1;
source.connect(analyser);
// limit is :: 128 / sampleRate;
// but we just want 60fps - cuts ~1s from 6MB to 1MB of RAM
const renderQuantumInSeconds = 1 / 60;
const durationInSeconds = length / sampleRate;
const analyze = (index) => {
const suspendTime = renderQuantumInSeconds * index;
if (suspendTime < durationInSeconds) {
offlineAudioContext.suspend(suspendTime).then(() => {
const fftResult = new Float32Array(analyser.frequencyBinCount);
analyser.getFloatFrequencyData(fftResult);
this.fftResults.push(fftResult);
analyze(index + 1);
});
}
if (index === 1) {
offlineAudioContext.startRendering();
} else {
offlineAudioContext.resume();
}
};
source.start(0);
analyze(1);
this.audio = audioElement;
this.context = offlineAudioContext;
this.analyser = analyser;
this.sampleRate = sampleRate;
this.audioBuffer = audioBuffer;
} else {
const audioContext = new AudioContext();
const track = audioContext.createMediaElementSource(audioElement);
const analyser = audioContext.createAnalyser();
analyser.fftSize = 8192;
analyser.smoothingTimeConstant = 0.1;
track.connect(analyser);
analyser.connect(audioContext.destination);
this.audio = audioElement;
this.context = audioContext;
this.analyser = analyser;
this.sampleRate = this.context.sampleRate;
this.audioBuffer = null;
}
}
/**
* Gets the current frequency domain data from the playing audio track
* @param {"frequency"|"music"|"voice"} [analysisType]
* @param {number} [minDecibels] default -100
* @param {number} [maxDecibels] default -30
* @returns {AudioAnalysisOutputType}
*/
getFrequencies(
analysisType = 'frequency',
minDecibels = -100,
maxDecibels = -30,
) {
let fftResult = null;
if (this.audioBuffer && this.fftResults.length) {
const pct = this.audio.currentTime / this.audio.duration;
const index = Math.min(
(pct * this.fftResults.length) | 0,
this.fftResults.length - 1,
);
fftResult = this.fftResults[index];
}
return AudioAnalysis.getFrequencies(
this.analyser,
this.sampleRate,
fftResult,
analysisType,
minDecibels,
maxDecibels,
);
}
/**
* Resume the internal AudioContext if it was suspended due to the lack of
* user interaction when the AudioAnalysis was instantiated.
* @returns {Promise<true>}
*/
async resumeIfSuspended() {
if (this.context.state === 'suspended') {
await this.context.resume();
}
return true;
}
}
globalThis.AudioAnalysis = AudioAnalysis;

View File

@ -0,0 +1,60 @@
/**
* Constants for help with visualization
* Helps map frequency ranges from Fast Fourier Transform
* to human-interpretable ranges, notably music ranges and
* human vocal ranges.
*/
// Eighth octave frequencies
const octave8Frequencies = [
4186.01, 4434.92, 4698.63, 4978.03, 5274.04, 5587.65, 5919.91, 6271.93,
6644.88, 7040.0, 7458.62, 7902.13,
];
// Labels for each of the above frequencies
const octave8FrequencyLabels = [
'C',
'C#',
'D',
'D#',
'E',
'F',
'F#',
'G',
'G#',
'A',
'A#',
'B',
];
/**
* All note frequencies from 1st to 8th octave
* in format "A#8" (A#, 8th octave)
*/
export const noteFrequencies = [];
export const noteFrequencyLabels = [];
for (let i = 1; i <= 8; i++) {
for (let f = 0; f < octave8Frequencies.length; f++) {
const freq = octave8Frequencies[f];
noteFrequencies.push(freq / Math.pow(2, 8 - i));
noteFrequencyLabels.push(octave8FrequencyLabels[f] + i);
}
}
/**
* Subset of the note frequencies between 32 and 2000 Hz
* 6 octave range: C1 to B6
*/
const voiceFrequencyRange = [32.0, 2000.0];
export const voiceFrequencies = noteFrequencies.filter((_, i) => {
return (
noteFrequencies[i] > voiceFrequencyRange[0] &&
noteFrequencies[i] < voiceFrequencyRange[1]
);
});
export const voiceFrequencyLabels = noteFrequencyLabels.filter((_, i) => {
return (
noteFrequencies[i] > voiceFrequencyRange[0] &&
noteFrequencies[i] < voiceFrequencyRange[1]
);
});

View File

@ -0,0 +1,113 @@
/**
* Raw wav audio file contents
* @typedef {Object} WavPackerAudioType
* @property {Blob} blob
* @property {string} url
* @property {number} channelCount
* @property {number} sampleRate
* @property {number} duration
*/
/**
* Utility class for assembling PCM16 "audio/wav" data
* @class
*/
export class WavPacker {
/**
* Converts Float32Array of amplitude data to ArrayBuffer in Int16Array format
* @param {Float32Array} float32Array
* @returns {ArrayBuffer}
*/
static floatTo16BitPCM(float32Array) {
const buffer = new ArrayBuffer(float32Array.length * 2);
const view = new DataView(buffer);
let offset = 0;
for (let i = 0; i < float32Array.length; i++, offset += 2) {
let s = Math.max(-1, Math.min(1, float32Array[i]));
view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7fff, true);
}
return buffer;
}
/**
* Concatenates two ArrayBuffers
* @param {ArrayBuffer} leftBuffer
* @param {ArrayBuffer} rightBuffer
* @returns {ArrayBuffer}
*/
static mergeBuffers(leftBuffer, rightBuffer) {
const tmpArray = new Uint8Array(
leftBuffer.byteLength + rightBuffer.byteLength
);
tmpArray.set(new Uint8Array(leftBuffer), 0);
tmpArray.set(new Uint8Array(rightBuffer), leftBuffer.byteLength);
return tmpArray.buffer;
}
/**
* Packs data into an Int16 format
* @private
* @param {number} size 0 = 1x Int16, 1 = 2x Int16
* @param {number} arg value to pack
* @returns
*/
_packData(size, arg) {
return [
new Uint8Array([arg, arg >> 8]),
new Uint8Array([arg, arg >> 8, arg >> 16, arg >> 24]),
][size];
}
/**
* Packs audio into "audio/wav" Blob
* @param {number} sampleRate
* @param {{bitsPerSample: number, channels: Array<Float32Array>, data: Int16Array}} audio
* @returns {WavPackerAudioType}
*/
pack(sampleRate, audio) {
if (!audio?.bitsPerSample) {
throw new Error(`Missing "bitsPerSample"`);
} else if (!audio?.channels) {
throw new Error(`Missing "channels"`);
} else if (!audio?.data) {
throw new Error(`Missing "data"`);
}
const { bitsPerSample, channels, data } = audio;
const output = [
// Header
'RIFF',
this._packData(
1,
4 + (8 + 24) /* chunk 1 length */ + (8 + 8) /* chunk 2 length */
), // Length
'WAVE',
// chunk 1
'fmt ', // Sub-chunk identifier
this._packData(1, 16), // Chunk length
this._packData(0, 1), // Audio format (1 is linear quantization)
this._packData(0, channels.length),
this._packData(1, sampleRate),
this._packData(1, (sampleRate * channels.length * bitsPerSample) / 8), // Byte rate
this._packData(0, (channels.length * bitsPerSample) / 8),
this._packData(0, bitsPerSample),
// chunk 2
'data', // Sub-chunk identifier
this._packData(
1,
(channels[0].length * channels.length * bitsPerSample) / 8
), // Chunk length
data,
];
const blob = new Blob(output, { type: 'audio/mpeg' });
const url = URL.createObjectURL(blob);
return {
blob,
url,
channelCount: channels.length,
sampleRate,
duration: data.byteLength / (channels.length * sampleRate * 2),
};
}
}
globalThis.WavPacker = WavPacker;

View File

@ -0,0 +1,548 @@
import { AudioProcessorSrc } from './worklets/audio_processor.js';
import { AudioAnalysis } from './analysis/audio_analysis.js';
import { WavPacker } from './wav_packer.js';
/**
* Decodes audio into a wav file
* @typedef {Object} DecodedAudioType
* @property {Blob} blob
* @property {string} url
* @property {Float32Array} values
* @property {AudioBuffer} audioBuffer
*/
/**
* Records live stream of user audio as PCM16 "audio/wav" data
* @class
*/
export class WavRecorder {
/**
* Create a new WavRecorder instance
* @param {{sampleRate?: number, outputToSpeakers?: boolean, debug?: boolean}} [options]
* @returns {WavRecorder}
*/
constructor({
sampleRate = 44100,
outputToSpeakers = false,
debug = false,
} = {}) {
// Script source
this.scriptSrc = AudioProcessorSrc;
// Config
this.sampleRate = sampleRate;
this.outputToSpeakers = outputToSpeakers;
this.debug = !!debug;
this._deviceChangeCallback = null;
this._devices = [];
// State variables
this.stream = null;
this.processor = null;
this.source = null;
this.node = null;
this.recording = false;
// Event handling with AudioWorklet
this._lastEventId = 0;
this.eventReceipts = {};
this.eventTimeout = 5000;
// Process chunks of audio
this._chunkProcessor = () => {};
this._chunkProcessorSize = void 0;
this._chunkProcessorBuffer = {
raw: new ArrayBuffer(0),
mono: new ArrayBuffer(0),
};
}
/**
* Decodes audio data from multiple formats to a Blob, url, Float32Array and AudioBuffer
* @param {Blob|Float32Array|Int16Array|ArrayBuffer|number[]} audioData
* @param {number} sampleRate
* @param {number} fromSampleRate
* @returns {Promise<DecodedAudioType>}
*/
static async decode(audioData, sampleRate = 44100, fromSampleRate = -1) {
const context = new AudioContext({ sampleRate });
let arrayBuffer;
let blob;
if (audioData instanceof Blob) {
if (fromSampleRate !== -1) {
throw new Error(
`Can not specify "fromSampleRate" when reading from Blob`,
);
}
blob = audioData;
arrayBuffer = await blob.arrayBuffer();
} else if (audioData instanceof ArrayBuffer) {
if (fromSampleRate !== -1) {
throw new Error(
`Can not specify "fromSampleRate" when reading from ArrayBuffer`,
);
}
arrayBuffer = audioData;
blob = new Blob([arrayBuffer], { type: 'audio/wav' });
} else {
let float32Array;
let data;
if (audioData instanceof Int16Array) {
data = audioData;
float32Array = new Float32Array(audioData.length);
for (let i = 0; i < audioData.length; i++) {
float32Array[i] = audioData[i] / 0x8000;
}
} else if (audioData instanceof Float32Array) {
float32Array = audioData;
} else if (audioData instanceof Array) {
float32Array = new Float32Array(audioData);
} else {
throw new Error(
`"audioData" must be one of: Blob, Float32Arrray, Int16Array, ArrayBuffer, Array<number>`,
);
}
if (fromSampleRate === -1) {
throw new Error(
`Must specify "fromSampleRate" when reading from Float32Array, In16Array or Array`,
);
} else if (fromSampleRate < 3000) {
throw new Error(`Minimum "fromSampleRate" is 3000 (3kHz)`);
}
if (!data) {
data = WavPacker.floatTo16BitPCM(float32Array);
}
const audio = {
bitsPerSample: 16,
channels: [float32Array],
data,
};
const packer = new WavPacker();
const result = packer.pack(fromSampleRate, audio);
blob = result.blob;
arrayBuffer = await blob.arrayBuffer();
}
const audioBuffer = await context.decodeAudioData(arrayBuffer);
const values = audioBuffer.getChannelData(0);
const url = URL.createObjectURL(blob);
return {
blob,
url,
values,
audioBuffer,
};
}
/**
* Logs data in debug mode
* @param {...any} arguments
* @returns {true}
*/
log() {
if (this.debug) {
this.log(...arguments);
}
return true;
}
/**
* Retrieves the current sampleRate for the recorder
* @returns {number}
*/
getSampleRate() {
return this.sampleRate;
}
/**
* Retrieves the current status of the recording
* @returns {"ended"|"paused"|"recording"}
*/
getStatus() {
if (!this.processor) {
return 'ended';
} else if (!this.recording) {
return 'paused';
} else {
return 'recording';
}
}
/**
* Sends an event to the AudioWorklet
* @private
* @param {string} name
* @param {{[key: string]: any}} data
* @param {AudioWorkletNode} [_processor]
* @returns {Promise<{[key: string]: any}>}
*/
async _event(name, data = {}, _processor = null) {
_processor = _processor || this.processor;
if (!_processor) {
throw new Error('Can not send events without recording first');
}
const message = {
event: name,
id: this._lastEventId++,
data,
};
_processor.port.postMessage(message);
const t0 = new Date().valueOf();
while (!this.eventReceipts[message.id]) {
if (new Date().valueOf() - t0 > this.eventTimeout) {
throw new Error(`Timeout waiting for "${name}" event`);
}
await new Promise((res) => setTimeout(() => res(true), 1));
}
const payload = this.eventReceipts[message.id];
delete this.eventReceipts[message.id];
return payload;
}
/**
* Sets device change callback, remove if callback provided is `null`
* @param {(Array<MediaDeviceInfo & {default: boolean}>): void|null} callback
* @returns {true}
*/
listenForDeviceChange(callback) {
if (callback === null && this._deviceChangeCallback) {
navigator.mediaDevices.removeEventListener(
'devicechange',
this._deviceChangeCallback,
);
this._deviceChangeCallback = null;
} else if (callback !== null) {
// Basically a debounce; we only want this called once when devices change
// And we only want the most recent callback() to be executed
// if a few are operating at the same time
let lastId = 0;
let lastDevices = [];
const serializeDevices = (devices) =>
devices
.map((d) => d.deviceId)
.sort()
.join(',');
const cb = async () => {
let id = ++lastId;
const devices = await this.listDevices();
if (id === lastId) {
if (serializeDevices(lastDevices) !== serializeDevices(devices)) {
lastDevices = devices;
callback(devices.slice());
}
}
};
navigator.mediaDevices.addEventListener('devicechange', cb);
cb();
this._deviceChangeCallback = cb;
}
return true;
}
/**
* Manually request permission to use the microphone
* @returns {Promise<true>}
*/
async requestPermission() {
const permissionStatus = await navigator.permissions.query({
name: 'microphone',
});
if (permissionStatus.state === 'denied') {
window.alert('You must grant microphone access to use this feature.');
} else if (permissionStatus.state === 'prompt') {
try {
const stream = await navigator.mediaDevices.getUserMedia({
audio: true,
});
const tracks = stream.getTracks();
tracks.forEach((track) => track.stop());
} catch (e) {
window.alert('You must grant microphone access to use this feature.');
}
}
return true;
}
/**
* List all eligible devices for recording, will request permission to use microphone
* @returns {Promise<Array<MediaDeviceInfo & {default: boolean}>>}
*/
async listDevices() {
if (
!navigator.mediaDevices ||
!('enumerateDevices' in navigator.mediaDevices)
) {
throw new Error('Could not request user devices');
}
await this.requestPermission();
const devices = await navigator.mediaDevices.enumerateDevices();
const audioDevices = devices.filter(
(device) => device.kind === 'audioinput',
);
const defaultDeviceIndex = audioDevices.findIndex(
(device) => device.deviceId === 'default',
);
const deviceList = [];
if (defaultDeviceIndex !== -1) {
let defaultDevice = audioDevices.splice(defaultDeviceIndex, 1)[0];
let existingIndex = audioDevices.findIndex(
(device) => device.groupId === defaultDevice.groupId,
);
if (existingIndex !== -1) {
defaultDevice = audioDevices.splice(existingIndex, 1)[0];
}
defaultDevice.default = true;
deviceList.push(defaultDevice);
}
return deviceList.concat(audioDevices);
}
/**
* Begins a recording session and requests microphone permissions if not already granted
* Microphone recording indicator will appear on browser tab but status will be "paused"
* @param {string} [deviceId] if no device provided, default device will be used
* @returns {Promise<true>}
*/
async begin(deviceId) {
if (this.processor) {
throw new Error(
`Already connected: please call .end() to start a new session`,
);
}
if (
!navigator.mediaDevices ||
!('getUserMedia' in navigator.mediaDevices)
) {
throw new Error('Could not request user media');
}
try {
const config = { audio: true };
if (deviceId) {
config.audio = { deviceId: { exact: deviceId } };
}
this.stream = await navigator.mediaDevices.getUserMedia(config);
} catch (err) {
throw new Error('Could not start media stream');
}
const context = new AudioContext({ sampleRate: this.sampleRate });
const source = context.createMediaStreamSource(this.stream);
// Load and execute the module script.
try {
await context.audioWorklet.addModule(this.scriptSrc);
} catch (e) {
console.error(e);
throw new Error(`Could not add audioWorklet module: ${this.scriptSrc}`);
}
const processor = new AudioWorkletNode(context, 'audio_processor');
processor.port.onmessage = (e) => {
const { event, id, data } = e.data;
if (event === 'receipt') {
this.eventReceipts[id] = data;
} else if (event === 'chunk') {
if (this._chunkProcessorSize) {
const buffer = this._chunkProcessorBuffer;
this._chunkProcessorBuffer = {
raw: WavPacker.mergeBuffers(buffer.raw, data.raw),
mono: WavPacker.mergeBuffers(buffer.mono, data.mono),
};
if (
this._chunkProcessorBuffer.mono.byteLength >=
this._chunkProcessorSize
) {
this._chunkProcessor(this._chunkProcessorBuffer);
this._chunkProcessorBuffer = {
raw: new ArrayBuffer(0),
mono: new ArrayBuffer(0),
};
}
} else {
this._chunkProcessor(data);
}
}
};
const node = source.connect(processor);
const analyser = context.createAnalyser();
analyser.fftSize = 8192;
analyser.smoothingTimeConstant = 0.1;
node.connect(analyser);
if (this.outputToSpeakers) {
// eslint-disable-next-line no-console
console.warn(
'Warning: Output to speakers may affect sound quality,\n' +
'especially due to system audio feedback preventative measures.\n' +
'use only for debugging',
);
analyser.connect(context.destination);
}
this.source = source;
this.node = node;
this.analyser = analyser;
this.processor = processor;
return true;
}
/**
* Gets the current frequency domain data from the recording track
* @param {"frequency"|"music"|"voice"} [analysisType]
* @param {number} [minDecibels] default -100
* @param {number} [maxDecibels] default -30
* @returns {import('./analysis/audio_analysis.js').AudioAnalysisOutputType}
*/
getFrequencies(
analysisType = 'frequency',
minDecibels = -100,
maxDecibels = -30,
) {
if (!this.processor) {
throw new Error('Session ended: please call .begin() first');
}
return AudioAnalysis.getFrequencies(
this.analyser,
this.sampleRate,
null,
analysisType,
minDecibels,
maxDecibels,
);
}
/**
* Pauses the recording
* Keeps microphone stream open but halts storage of audio
* @returns {Promise<true>}
*/
async pause() {
if (!this.processor) {
throw new Error('Session ended: please call .begin() first');
} else if (!this.recording) {
throw new Error('Already paused: please call .record() first');
}
if (this._chunkProcessorBuffer.raw.byteLength) {
this._chunkProcessor(this._chunkProcessorBuffer);
}
this.log('Pausing ...');
await this._event('stop');
this.recording = false;
return true;
}
/**
* Start recording stream and storing to memory from the connected audio source
* @param {(data: { mono: Int16Array; raw: Int16Array }) => any} [chunkProcessor]
* @param {number} [chunkSize] chunkProcessor will not be triggered until this size threshold met in mono audio
* @returns {Promise<true>}
*/
async record(chunkProcessor = () => {}, chunkSize = 8192) {
if (!this.processor) {
throw new Error('Session ended: please call .begin() first');
} else if (this.recording) {
throw new Error('Already recording: please call .pause() first');
} else if (typeof chunkProcessor !== 'function') {
throw new Error(`chunkProcessor must be a function`);
}
this._chunkProcessor = chunkProcessor;
this._chunkProcessorSize = chunkSize;
this._chunkProcessorBuffer = {
raw: new ArrayBuffer(0),
mono: new ArrayBuffer(0),
};
this.log('Recording ...');
await this._event('start');
this.recording = true;
return true;
}
/**
* Clears the audio buffer, empties stored recording
* @returns {Promise<true>}
*/
async clear() {
if (!this.processor) {
throw new Error('Session ended: please call .begin() first');
}
await this._event('clear');
return true;
}
/**
* Reads the current audio stream data
* @returns {Promise<{meanValues: Float32Array, channels: Array<Float32Array>}>}
*/
async read() {
if (!this.processor) {
throw new Error('Session ended: please call .begin() first');
}
this.log('Reading ...');
const result = await this._event('read');
return result;
}
/**
* Saves the current audio stream to a file
* @param {boolean} [force] Force saving while still recording
* @returns {Promise<import('./wav_packer.js').WavPackerAudioType>}
*/
async save(force = false) {
if (!this.processor) {
throw new Error('Session ended: please call .begin() first');
}
if (!force && this.recording) {
throw new Error(
'Currently recording: please call .pause() first, or call .save(true) to force',
);
}
this.log('Exporting ...');
const exportData = await this._event('export');
const packer = new WavPacker();
const result = packer.pack(this.sampleRate, exportData.audio);
return result;
}
/**
* Ends the current recording session and saves the result
* @returns {Promise<import('./wav_packer.js').WavPackerAudioType>}
*/
async end() {
if (!this.processor) {
throw new Error('Session ended: please call .begin() first');
}
const _processor = this.processor;
this.log('Stopping ...');
await this._event('stop');
this.recording = false;
const tracks = this.stream.getTracks();
tracks.forEach((track) => track.stop());
this.log('Exporting ...');
const exportData = await this._event('export', {}, _processor);
this.processor.disconnect();
this.source.disconnect();
this.node.disconnect();
this.analyser.disconnect();
this.stream = null;
this.processor = null;
this.source = null;
this.node = null;
const packer = new WavPacker();
const result = packer.pack(this.sampleRate, exportData.audio);
return result;
}
/**
* Performs a full cleanup of WavRecorder instance
* Stops actively listening via microphone and removes existing listeners
* @returns {Promise<true>}
*/
async quit() {
this.listenForDeviceChange(null);
if (this.processor) {
await this.end();
}
return true;
}
}
globalThis.WavRecorder = WavRecorder;

View File

@ -0,0 +1,160 @@
import { StreamProcessorSrc } from './worklets/stream_processor.js';
import { AudioAnalysis } from './analysis/audio_analysis.js';
/**
* Plays audio streams received in raw PCM16 chunks from the browser
* @class
*/
export class WavStreamPlayer {
/**
* Creates a new WavStreamPlayer instance
* @param {{sampleRate?: number}} options
* @returns {WavStreamPlayer}
*/
constructor({ sampleRate = 44100 } = {}) {
this.scriptSrc = StreamProcessorSrc;
this.sampleRate = sampleRate;
this.context = null;
this.stream = null;
this.analyser = null;
this.trackSampleOffsets = {};
this.interruptedTrackIds = {};
}
/**
* Connects the audio context and enables output to speakers
* @returns {Promise<true>}
*/
async connect() {
this.context = new AudioContext({ sampleRate: this.sampleRate });
if (this.context.state === 'suspended') {
await this.context.resume();
}
try {
await this.context.audioWorklet.addModule(this.scriptSrc);
} catch (e) {
console.error(e);
throw new Error(`Could not add audioWorklet module: ${this.scriptSrc}`);
}
const analyser = this.context.createAnalyser();
analyser.fftSize = 8192;
analyser.smoothingTimeConstant = 0.1;
this.analyser = analyser;
return true;
}
/**
* Gets the current frequency domain data from the playing track
* @param {"frequency"|"music"|"voice"} [analysisType]
* @param {number} [minDecibels] default -100
* @param {number} [maxDecibels] default -30
* @returns {import('./analysis/audio_analysis.js').AudioAnalysisOutputType}
*/
getFrequencies(
analysisType = 'frequency',
minDecibels = -100,
maxDecibels = -30
) {
if (!this.analyser) {
throw new Error('Not connected, please call .connect() first');
}
return AudioAnalysis.getFrequencies(
this.analyser,
this.sampleRate,
null,
analysisType,
minDecibels,
maxDecibels
);
}
/**
* Starts audio streaming
* @private
* @returns {Promise<true>}
*/
_start() {
const streamNode = new AudioWorkletNode(this.context, 'stream_processor');
streamNode.connect(this.context.destination);
streamNode.port.onmessage = (e) => {
const { event } = e.data;
if (event === 'stop') {
streamNode.disconnect();
this.stream = null;
} else if (event === 'offset') {
const { requestId, trackId, offset } = e.data;
const currentTime = offset / this.sampleRate;
this.trackSampleOffsets[requestId] = { trackId, offset, currentTime };
}
};
this.analyser.disconnect();
streamNode.connect(this.analyser);
this.stream = streamNode;
return true;
}
/**
* Adds 16BitPCM data to the currently playing audio stream
* You can add chunks beyond the current play point and they will be queued for play
* @param {ArrayBuffer|Int16Array} arrayBuffer
* @param {string} [trackId]
* @returns {Int16Array}
*/
add16BitPCM(arrayBuffer, trackId = 'default') {
if (typeof trackId !== 'string') {
throw new Error(`trackId must be a string`);
} else if (this.interruptedTrackIds[trackId]) {
return;
}
if (!this.stream) {
this._start();
}
let buffer;
if (arrayBuffer instanceof Int16Array) {
buffer = arrayBuffer;
} else if (arrayBuffer instanceof ArrayBuffer) {
buffer = new Int16Array(arrayBuffer);
} else {
throw new Error(`argument must be Int16Array or ArrayBuffer`);
}
this.stream.port.postMessage({ event: 'write', buffer, trackId });
return buffer;
}
/**
* Gets the offset (sample count) of the currently playing stream
* @param {boolean} [interrupt]
* @returns {{trackId: string|null, offset: number, currentTime: number}}
*/
async getTrackSampleOffset(interrupt = false) {
if (!this.stream) {
return null;
}
const requestId = crypto.randomUUID();
this.stream.port.postMessage({
event: interrupt ? 'interrupt' : 'offset',
requestId,
});
let trackSampleOffset;
while (!trackSampleOffset) {
trackSampleOffset = this.trackSampleOffsets[requestId];
await new Promise((r) => setTimeout(() => r(), 1));
}
const { trackId } = trackSampleOffset;
if (interrupt && trackId) {
this.interruptedTrackIds[trackId] = true;
}
return trackSampleOffset;
}
/**
* Strips the current stream and returns the sample offset of the audio
* @param {boolean} [interrupt]
* @returns {{trackId: string|null, offset: number, currentTime: number}}
*/
async interrupt() {
return this.getTrackSampleOffset(true);
}
}
globalThis.WavStreamPlayer = WavStreamPlayer;

View File

@ -0,0 +1,214 @@
const AudioProcessorWorklet = `
class AudioProcessor extends AudioWorkletProcessor {
constructor() {
super();
this.port.onmessage = this.receive.bind(this);
this.initialize();
}
initialize() {
this.foundAudio = false;
this.recording = false;
this.chunks = [];
}
/**
* Concatenates sampled chunks into channels
* Format is chunk[Left[], Right[]]
*/
readChannelData(chunks, channel = -1, maxChannels = 9) {
let channelLimit;
if (channel !== -1) {
if (chunks[0] && chunks[0].length - 1 < channel) {
throw new Error(
\`Channel \${channel} out of range: max \${chunks[0].length}\`
);
}
channelLimit = channel + 1;
} else {
channel = 0;
channelLimit = Math.min(chunks[0] ? chunks[0].length : 1, maxChannels);
}
const channels = [];
for (let n = channel; n < channelLimit; n++) {
const length = chunks.reduce((sum, chunk) => {
return sum + chunk[n].length;
}, 0);
const buffers = chunks.map((chunk) => chunk[n]);
const result = new Float32Array(length);
let offset = 0;
for (let i = 0; i < buffers.length; i++) {
result.set(buffers[i], offset);
offset += buffers[i].length;
}
channels[n] = result;
}
return channels;
}
/**
* Combines parallel audio data into correct format,
* channels[Left[], Right[]] to float32Array[LRLRLRLR...]
*/
formatAudioData(channels) {
if (channels.length === 1) {
// Simple case is only one channel
const float32Array = channels[0].slice();
const meanValues = channels[0].slice();
return { float32Array, meanValues };
} else {
const float32Array = new Float32Array(
channels[0].length * channels.length
);
const meanValues = new Float32Array(channels[0].length);
for (let i = 0; i < channels[0].length; i++) {
const offset = i * channels.length;
let meanValue = 0;
for (let n = 0; n < channels.length; n++) {
float32Array[offset + n] = channels[n][i];
meanValue += channels[n][i];
}
meanValues[i] = meanValue / channels.length;
}
return { float32Array, meanValues };
}
}
/**
* Converts 32-bit float data to 16-bit integers
*/
floatTo16BitPCM(float32Array) {
const buffer = new ArrayBuffer(float32Array.length * 2);
const view = new DataView(buffer);
let offset = 0;
for (let i = 0; i < float32Array.length; i++, offset += 2) {
let s = Math.max(-1, Math.min(1, float32Array[i]));
view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7fff, true);
}
return buffer;
}
/**
* Retrieves the most recent amplitude values from the audio stream
* @param {number} channel
*/
getValues(channel = -1) {
const channels = this.readChannelData(this.chunks, channel);
const { meanValues } = this.formatAudioData(channels);
return { meanValues, channels };
}
/**
* Exports chunks as an audio/wav file
*/
export() {
const channels = this.readChannelData(this.chunks);
const { float32Array, meanValues } = this.formatAudioData(channels);
const audioData = this.floatTo16BitPCM(float32Array);
return {
meanValues: meanValues,
audio: {
bitsPerSample: 16,
channels: channels,
data: audioData,
},
};
}
receive(e) {
const { event, id } = e.data;
let receiptData = {};
switch (event) {
case 'start':
this.recording = true;
break;
case 'stop':
this.recording = false;
break;
case 'clear':
this.initialize();
break;
case 'export':
receiptData = this.export();
break;
case 'read':
receiptData = this.getValues();
break;
default:
break;
}
// Always send back receipt
this.port.postMessage({ event: 'receipt', id, data: receiptData });
}
sendChunk(chunk) {
const channels = this.readChannelData([chunk]);
const { float32Array, meanValues } = this.formatAudioData(channels);
const rawAudioData = this.floatTo16BitPCM(float32Array);
const monoAudioData = this.floatTo16BitPCM(meanValues);
this.port.postMessage({
event: 'chunk',
data: {
mono: monoAudioData,
raw: rawAudioData,
},
});
}
process(inputList, outputList, parameters) {
// Copy input to output (e.g. speakers)
// Note that this creates choppy sounds with Mac products
const sourceLimit = Math.min(inputList.length, outputList.length);
for (let inputNum = 0; inputNum < sourceLimit; inputNum++) {
const input = inputList[inputNum];
const output = outputList[inputNum];
const channelCount = Math.min(input.length, output.length);
for (let channelNum = 0; channelNum < channelCount; channelNum++) {
input[channelNum].forEach((sample, i) => {
output[channelNum][i] = sample;
});
}
}
const inputs = inputList[0];
// There's latency at the beginning of a stream before recording starts
// Make sure we actually receive audio data before we start storing chunks
let sliceIndex = 0;
if (!this.foundAudio) {
for (const channel of inputs) {
sliceIndex = 0; // reset for each channel
if (this.foundAudio) {
break;
}
if (channel) {
for (const value of channel) {
if (value !== 0) {
// find only one non-zero entry in any channel
this.foundAudio = true;
break;
} else {
sliceIndex++;
}
}
}
}
}
if (inputs && inputs[0] && this.foundAudio && this.recording) {
// We need to copy the TypedArray, because the \`process\`
// internals will reuse the same buffer to hold each input
const chunk = inputs.map((input) => input.slice(sliceIndex));
this.chunks.push(chunk);
this.sendChunk(chunk);
}
return true;
}
}
registerProcessor('audio_processor', AudioProcessor);
`;
const script = new Blob([AudioProcessorWorklet], {
type: 'application/javascript',
});
const src = URL.createObjectURL(script);
export const AudioProcessorSrc = src;

View File

@ -0,0 +1,96 @@
export const StreamProcessorWorklet = `
class StreamProcessor extends AudioWorkletProcessor {
constructor() {
super();
this.hasStarted = false;
this.hasInterrupted = false;
this.outputBuffers = [];
this.bufferLength = 128;
this.write = { buffer: new Float32Array(this.bufferLength), trackId: null };
this.writeOffset = 0;
this.trackSampleOffsets = {};
this.port.onmessage = (event) => {
if (event.data) {
const payload = event.data;
if (payload.event === 'write') {
const int16Array = payload.buffer;
const float32Array = new Float32Array(int16Array.length);
for (let i = 0; i < int16Array.length; i++) {
float32Array[i] = int16Array[i] / 0x8000; // Convert Int16 to Float32
}
this.writeData(float32Array, payload.trackId);
} else if (
payload.event === 'offset' ||
payload.event === 'interrupt'
) {
const requestId = payload.requestId;
const trackId = this.write.trackId;
const offset = this.trackSampleOffsets[trackId] || 0;
this.port.postMessage({
event: 'offset',
requestId,
trackId,
offset,
});
if (payload.event === 'interrupt') {
this.hasInterrupted = true;
}
} else {
throw new Error(\`Unhandled event "\${payload.event}"\`);
}
}
};
}
writeData(float32Array, trackId = null) {
let { buffer } = this.write;
let offset = this.writeOffset;
for (let i = 0; i < float32Array.length; i++) {
buffer[offset++] = float32Array[i];
if (offset >= buffer.length) {
this.outputBuffers.push(this.write);
this.write = { buffer: new Float32Array(this.bufferLength), trackId };
buffer = this.write.buffer;
offset = 0;
}
}
this.writeOffset = offset;
return true;
}
process(inputs, outputs, parameters) {
const output = outputs[0];
const outputChannelData = output[0];
const outputBuffers = this.outputBuffers;
if (this.hasInterrupted) {
this.port.postMessage({ event: 'stop' });
return false;
} else if (outputBuffers.length) {
this.hasStarted = true;
const { buffer, trackId } = outputBuffers.shift();
for (let i = 0; i < outputChannelData.length; i++) {
outputChannelData[i] = buffer[i] || 0;
}
if (trackId) {
this.trackSampleOffsets[trackId] =
this.trackSampleOffsets[trackId] || 0;
this.trackSampleOffsets[trackId] += buffer.length;
}
return true;
} else if (this.hasStarted) {
this.port.postMessage({ event: 'stop' });
return false;
} else {
return true;
}
}
}
registerProcessor('stream_processor', StreamProcessor);
`;
const script = new Blob([StreamProcessorWorklet], {
type: 'application/javascript',
});
const src = URL.createObjectURL(script);
export const StreamProcessorSrc = src;