From 60b0b6c913ca32e6e62c662b7551ba7c965ff3eb Mon Sep 17 00:00:00 2001 From: Ricky Samore Date: Sat, 2 Dec 2023 17:02:10 -0600 Subject: [PATCH] tweak + test destroy (#65) * tweak + test destroy * bump versions --- package-lock.json | 7 +- packages/_common/src/messages.ts | 2 +- packages/react/package.json | 2 +- packages/react/src/index.ts | 4 +- packages/web/package.json | 2 +- packages/web/src/asset-path.ts | 10 +- packages/web/src/default-model-fetcher.ts | 5 +- packages/web/src/index.ts | 20 ++- packages/web/src/real-time-vad.ts | 174 ++++++++++++---------- packages/web/src/worklet.ts | 6 +- test-site/src/index.html | 1 + test-site/src/react-destroy/index.html | 16 ++ test-site/src/react-destroy/index.jsx | 88 +++++++++++ 13 files changed, 230 insertions(+), 107 deletions(-) create mode 100644 test-site/src/react-destroy/index.html create mode 100644 test-site/src/react-destroy/index.jsx diff --git a/package-lock.json b/package-lock.json index 89c0bc3..33858cd 100644 --- a/package-lock.json +++ b/package-lock.json @@ -5,6 +5,7 @@ "requires": true, "packages": { "": { + "name": "vad", "version": "0.0.0", "license": "ISC", "workspaces": [ @@ -6172,7 +6173,7 @@ "version": "0.0.18", "license": "ISC", "dependencies": { - "@ricky0123/vad-web": "^0.0.13", + "@ricky0123/vad-web": "^0.0.14", "onnxruntime-web": "^1.14.0" }, "devDependencies": { @@ -6185,7 +6186,7 @@ }, "packages/web": { "name": "@ricky0123/vad-web", - "version": "0.0.13", + "version": "0.0.14", "license": "ISC", "dependencies": { "onnxruntime-web": "^1.14.0" @@ -6488,7 +6489,7 @@ "@ricky0123/vad-react": { "version": "file:packages/react", "requires": { - "@ricky0123/vad-web": "^0.0.13", + "@ricky0123/vad-web": "^0.0.14", "@types/react": "^18.0.28", "onnxruntime-web": "^1.14.0" } diff --git a/packages/_common/src/messages.ts b/packages/_common/src/messages.ts index 83a69ba..bf356bc 100644 --- a/packages/_common/src/messages.ts +++ b/packages/_common/src/messages.ts @@ -3,5 +3,5 @@ export enum Message { SpeechStart = "SPEECH_START", VADMisfire = "VAD_MISFIRE", SpeechEnd = "SPEECH_END", - SpeechStop = "SPEECH_STOP" + SpeechStop = "SPEECH_STOP", } diff --git a/packages/react/package.json b/packages/react/package.json index 62098d2..27ee421 100644 --- a/packages/react/package.json +++ b/packages/react/package.json @@ -21,7 +21,7 @@ }, "dependencies": { "onnxruntime-web": "^1.14.0", - "@ricky0123/vad-web": "^0.0.13" + "@ricky0123/vad-web": "^0.0.14" }, "peerDependencies": { "react": "^18", diff --git a/packages/react/src/index.ts b/packages/react/src/index.ts index 6eb7048..aa5dc7f 100644 --- a/packages/react/src/index.ts +++ b/packages/react/src/index.ts @@ -80,8 +80,8 @@ export function useMicVAD(options: Partial) { vadOptions.onVADMisfire = _onVADMisfire useEffect(() => { + let myvad: MicVAD | null const setup = async (): Promise => { - let myvad: MicVAD | null try { myvad = await MicVAD.new(vadOptions) } catch (e) { @@ -105,8 +105,8 @@ export function useMicVAD(options: Partial) { console.log("Well that didn't work") }) return function cleanUp() { + myvad?.destroy() if (!loading && !errored) { - vad?.destroy() setListening(false) } } diff --git a/packages/web/package.json b/packages/web/package.json index 3d2bdb9..28e41d1 100644 --- a/packages/web/package.json +++ b/packages/web/package.json @@ -12,7 +12,7 @@ "offline-speech-recognition" ], "homepage": "https://github.com/ricky0123/vad", - "version": "0.0.13", + "version": "0.0.14", "license": "ISC", "main": "dist/index.js", "unpkg": "dist/bundle.min.js", diff --git a/packages/web/src/asset-path.ts b/packages/web/src/asset-path.ts index 1ae0ae9..6f18be7 100644 --- a/packages/web/src/asset-path.ts +++ b/packages/web/src/asset-path.ts @@ -1,9 +1,9 @@ // nextjs@14 bundler may attempt to execute this during SSR and crash -const isWeb = typeof window !== 'undefined' && typeof window.document !== 'undefined'; -const currentScript = - isWeb - ? window.document.currentScript as HTMLScriptElement - : null +const isWeb = + typeof window !== "undefined" && typeof window.document !== "undefined" +const currentScript = isWeb + ? (window.document.currentScript as HTMLScriptElement) + : null let basePath = "" if (currentScript) { diff --git a/packages/web/src/default-model-fetcher.ts b/packages/web/src/default-model-fetcher.ts index 8a150c0..32e7f85 100644 --- a/packages/web/src/default-model-fetcher.ts +++ b/packages/web/src/default-model-fetcher.ts @@ -1,4 +1,3 @@ export const defaultModelFetcher = (path: string) => { - return fetch(path) - .then(model=>model.arrayBuffer()) -}; \ No newline at end of file + return fetch(path).then((model) => model.arrayBuffer()) +} diff --git a/packages/web/src/index.ts b/packages/web/src/index.ts index 21cde4a..1a36cd5 100644 --- a/packages/web/src/index.ts +++ b/packages/web/src/index.ts @@ -12,27 +12,25 @@ import { audioFileToArray } from "./utils" import { defaultModelFetcher } from "./default-model-fetcher" import { assetPath } from "./asset-path" - export interface NonRealTimeVADOptionsWeb extends NonRealTimeVADOptions { - modelURL: string, - modelFetcher: (path: string) => Promise, -} + modelURL: string + modelFetcher: (path: string) => Promise +} export const defaultNonRealTimeVADOptions = { modelURL: assetPath("silero_vad.onnx"), - modelFetcher: defaultModelFetcher + modelFetcher: defaultModelFetcher, } class NonRealTimeVAD extends PlatformAgnosticNonRealTimeVAD { static async new( options: Partial = {} ): Promise { - const {modelURL, modelFetcher} = {...defaultNonRealTimeVADOptions, ...options}; - return await this._new( - () => modelFetcher(modelURL), - ort, - options - ) + const { modelURL, modelFetcher } = { + ...defaultNonRealTimeVADOptions, + ...options, + } + return await this._new(() => modelFetcher(modelURL), ort, options) } } diff --git a/packages/web/src/real-time-vad.ts b/packages/web/src/real-time-vad.ts index 974a3a8..64f2703 100644 --- a/packages/web/src/real-time-vad.ts +++ b/packages/web/src/real-time-vad.ts @@ -43,7 +43,7 @@ type AudioConstraints = Omit< type AssetOptions = { workletURL: string - modelURL: string, + modelURL: string modelFetcher: (path: string) => Promise } @@ -66,7 +66,6 @@ export type RealTimeVADOptions = | RealTimeVADOptionsWithStream | RealTimeVADOptionsWithoutStream - export const defaultRealTimeVADOptions: RealTimeVADOptions = { ...defaultFrameProcessorOptions, onFrameProcessed: (probabilities) => {}, @@ -79,52 +78,59 @@ export const defaultRealTimeVADOptions: RealTimeVADOptions = { onSpeechEnd: () => { log.debug("Detected speech end") }, - workletURL: assetPath("vad.worklet.bundle.min.js"), + workletURL: assetPath("vad.worklet.bundle.min.js"), modelURL: assetPath("silero_vad.onnx"), modelFetcher: defaultModelFetcher, stream: undefined, } export class MicVAD { - audioContext: AudioContext | null = null - // @ts-ignore - stream: MediaStream - // @ts-ignore - audioNodeVAD: AudioNodeVAD - listening = false - static async new(options: Partial = {}) { - const vad = new MicVAD({ ...defaultRealTimeVADOptions, ...options }) - await vad.init() - return vad - } - - constructor(public options: RealTimeVADOptions) { - validateOptions(options) - } + const fullOptions: RealTimeVADOptions = { + ...defaultRealTimeVADOptions, + ...options, + } + validateOptions(fullOptions) - init = async () => { - if (this.options.stream === undefined) - this.stream = await navigator.mediaDevices.getUserMedia({ + let stream: MediaStream + if (fullOptions.stream === undefined) + stream = await navigator.mediaDevices.getUserMedia({ audio: { - ...this.options.additionalAudioConstraints, + ...fullOptions.additionalAudioConstraints, channelCount: 1, echoCancellation: true, autoGainControl: true, noiseSuppression: true, }, }) - else this.stream = this.options.stream + else stream = fullOptions.stream - this.audioContext = new AudioContext() - const source = new MediaStreamAudioSourceNode(this.audioContext, { - mediaStream: this.stream, + const audioContext = new AudioContext() + const sourceNode = new MediaStreamAudioSourceNode(audioContext, { + mediaStream: stream, }) - this.audioNodeVAD = await AudioNodeVAD.new(this.audioContext, this.options) - this.audioNodeVAD.receive(source) + const audioNodeVAD = await AudioNodeVAD.new(audioContext, fullOptions) + audioNodeVAD.receive(sourceNode) + + return new MicVAD( + fullOptions, + audioContext, + stream, + audioNodeVAD, + sourceNode + ) } + private constructor( + public options: RealTimeVADOptions, + private audioContext: AudioContext, + private stream: MediaStream, + private audioNodeVAD: AudioNodeVAD, + private sourceNode: MediaStreamAudioSourceNode, + private listening = false + ) {} + pause = () => { this.audioNodeVAD.pause() this.listening = false @@ -139,37 +145,77 @@ export class MicVAD { if (this.listening) { this.pause() } - this.stream.getTracks().forEach((t) => t.stop()) - this.audioContext?.close() - this.audioContext = null - this.audioNodeVAD.entryNode.port.postMessage({ - message: Message.SpeechStop, - }) + this.sourceNode.disconnect() + this.audioNodeVAD.destroy() + this.audioContext.close() } } export class AudioNodeVAD { - // @ts-ignore - frameProcessor: FrameProcessor - // @ts-ignore - entryNode: AudioWorkletNode - static async new( ctx: AudioContext, options: Partial = {} ) { - const vad = new AudioNodeVAD(ctx, { + const fullOptions: RealTimeVADOptions = { ...defaultRealTimeVADOptions, ...options, + } + validateOptions(fullOptions) + + await ctx.audioWorklet.addModule(fullOptions.workletURL) + const vadNode = new AudioWorkletNode(ctx, "vad-helper-worklet", { + processorOptions: { + frameSamples: fullOptions.frameSamples, + }, }) - await vad.init() - return vad - } - constructor(public ctx: AudioContext, public options: RealTimeVADOptions) { - validateOptions(options) + const model = await Silero.new(ort, () => + fullOptions.modelFetcher(fullOptions.modelURL) + ) + + const frameProcessor = new FrameProcessor( + model.process, + model.reset_state, + { + frameSamples: fullOptions.frameSamples, + positiveSpeechThreshold: fullOptions.positiveSpeechThreshold, + negativeSpeechThreshold: fullOptions.negativeSpeechThreshold, + redemptionFrames: fullOptions.redemptionFrames, + preSpeechPadFrames: fullOptions.preSpeechPadFrames, + minSpeechFrames: fullOptions.minSpeechFrames, + } + ) + + const audioNodeVAD = new AudioNodeVAD( + ctx, + fullOptions, + frameProcessor, + vadNode + ) + + vadNode.port.onmessage = async (ev: MessageEvent) => { + switch (ev.data?.message) { + case Message.AudioFrame: + const buffer: ArrayBuffer = ev.data.data + const frame = new Float32Array(buffer) + await audioNodeVAD.processFrame(frame) + break + + default: + break + } + } + + return audioNodeVAD } + constructor( + public ctx: AudioContext, + public options: RealTimeVADOptions, + private frameProcessor: FrameProcessor, + private entryNode: AudioWorkletNode + ) {} + pause = () => { this.frameProcessor.pause() } @@ -197,8 +243,7 @@ export class AudioNodeVAD { break case Message.SpeechEnd: - // @ts-ignore - this.options.onSpeechEnd(audio) + this.options.onSpeechEnd(audio as Float32Array) break default: @@ -206,37 +251,10 @@ export class AudioNodeVAD { } } - init = async () => { - await this.ctx.audioWorklet.addModule(this.options.workletURL) - const vadNode = new AudioWorkletNode(this.ctx, "vad-helper-worklet", { - processorOptions: { - frameSamples: this.options.frameSamples, - }, - }) - this.entryNode = vadNode - - const model = await Silero.new(ort, () => this.options.modelFetcher(this.options.modelURL)) - - this.frameProcessor = new FrameProcessor(model.process, model.reset_state, { - frameSamples: this.options.frameSamples, - positiveSpeechThreshold: this.options.positiveSpeechThreshold, - negativeSpeechThreshold: this.options.negativeSpeechThreshold, - redemptionFrames: this.options.redemptionFrames, - preSpeechPadFrames: this.options.preSpeechPadFrames, - minSpeechFrames: this.options.minSpeechFrames, + destroy = () => { + this.entryNode.port.postMessage({ + message: Message.SpeechStop, }) - - vadNode.port.onmessage = async (ev: MessageEvent) => { - switch (ev.data?.message) { - case Message.AudioFrame: - const buffer: ArrayBuffer = ev.data.data - const frame = new Float32Array(buffer) - await this.processFrame(frame) - break - - default: - break - } - } + this.entryNode.disconnect() } } diff --git a/packages/web/src/worklet.ts b/packages/web/src/worklet.ts index 3358187..408da5f 100644 --- a/packages/web/src/worklet.ts +++ b/packages/web/src/worklet.ts @@ -38,6 +38,10 @@ class Processor extends AudioWorkletProcessor { outputs: Float32Array[][], parameters: Record ): boolean { + if (this._stopProcessing) { + return false + } + // @ts-ignore const arr = inputs[0][0] @@ -51,8 +55,6 @@ class Processor extends AudioWorkletProcessor { } } - if (this._stopProcessing) return false - return true } } diff --git a/test-site/src/index.html b/test-site/src/index.html index cea1bc7..040a710 100644 --- a/test-site/src/index.html +++ b/test-site/src/index.html @@ -20,6 +20,7 @@

Welcome to the VAD test site

diff --git a/test-site/src/react-destroy/index.html b/test-site/src/react-destroy/index.html new file mode 100644 index 0000000..ffff7b9 --- /dev/null +++ b/test-site/src/react-destroy/index.html @@ -0,0 +1,16 @@ + + + + VAD test site + + + + + + +
+ + diff --git a/test-site/src/react-destroy/index.jsx b/test-site/src/react-destroy/index.jsx new file mode 100644 index 0000000..fd844c7 --- /dev/null +++ b/test-site/src/react-destroy/index.jsx @@ -0,0 +1,88 @@ +// @ts-nocheck + +import React, { useReducer, useState } from "react" +import * as ort from "onnxruntime-web" +import { createRoot } from "react-dom/client" +import { useMicVAD, utils } from "@ricky0123/vad-react" + +ort.env.wasm.wasmPaths = { + "ort-wasm-simd-threaded.wasm": "/ort-wasm-simd-threaded.wasm", + "ort-wasm-simd.wasm": "/ort-wasm-simd.wasm", + "ort-wasm.wasm": "/ort-wasm.wasm", + "ort-wasm-threaded.wasm": "/ort-wasm-threaded.wasm", +} + +const domContainer = document.querySelector("#root") +const root = createRoot(domContainer) +root.render() + +function App() { + const [demoActive, setDemoActive] = useState(false) + return ( +
+ + {demoActive && } +
+ ) +} + +function VADDemo() { + const [audioList, setAudioList] = useState([]) + const vad = useMicVAD({ + workletURL: "http://localhost:8080/vad.worklet.bundle.min.js", + modelURL: "http://localhost:8080/silero_vad.onnx", + onVADMisfire: () => { + console.log("Vad misfire") + }, + onSpeechStart: () => { + console.log("Speech start") + }, + onSpeechEnd: (audio) => { + console.log("Speech end") + const wavBuffer = utils.encodeWAV(audio) + const base64 = utils.arrayBufferToBase64(wavBuffer) + const url = `data:audio/wav;base64,${base64}` + setAudioList((old) => [url, ...old]) + }, + }) + return ( +
+

Basic vad-react functionality

+ +
+ +
+ +
+
    + {audioList.map((audioURL) => { + return ( +
  • +
  • + ) + })} +
+
+
+ ) +}