Skip to content

Commit

Permalink
Support Silero V5 (#154)
Browse files Browse the repository at this point in the history
  • Loading branch information
ricky0123 authored Dec 1, 2024
1 parent 80e7b34 commit 482e054
Show file tree
Hide file tree
Showing 33 changed files with 389 additions and 659 deletions.
24 changes: 24 additions & 0 deletions docs/developer-guide/hacking.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,27 @@ The automated tests are useful, but manual testing is even more important. There
## Project Management

I set up a [Github project for VAD](https://github.com/users/ricky0123/projects/1) to track work related to the project.

## Playing with VAD model in browser console

Go to [test.vad.ricky0123.com](https://test.vad.ricky0123.com) and open the browser console. Then run the following line by line:

```js linenums="1"
script = this.document.createElement("script")
script.src = "https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.js"
document.body.appendChild(script)
// wait a few seconds
modelarraybuffer = await fetch(`${location}silero_vad_v5.onnx`).then((model) => model.arrayBuffer())
session = await ort.InferenceSession.create(modelarraybuffer)
state_zeroes = Array(2 * 128).fill(0)
state = new this.ort.Tensor("float32", state_zeroes, [2, 1, 128]) // https://github.com/snakers4/silero-vad/blob/fdbb0a3a81e0f9d95561d6b388d67dce5d9e3f1b/utils_vad.py#L58
audio_zeros = Array(512).fill(0)
audio = new this.ort.Tensor("float32", audio_zeros, [1, audio_zeros.length])
sr = new this.ort.Tensor("int64", [16000n])
inputs = {
sr,
state,
input: audio
}
out = await session.run(inputs)
```
80 changes: 4 additions & 76 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions packages/react/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,15 @@
"react"
],
"homepage": "https://github.com/ricky0123/vad",
"version": "0.0.25",
"version": "0.0.26",
"license": "ISC",
"main": "dist/index.js",
"devDependencies": {
"@types/react": "^18.0.28"
},
"dependencies": {
"onnxruntime-web": "^1.14.0",
"@ricky0123/vad-web": "^0.0.19"
"@ricky0123/vad-web": "^0.0.20"
},
"peerDependencies": {
"react": "^18",
Expand Down
28 changes: 18 additions & 10 deletions packages/react/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
import type { RealTimeVADOptions } from "@ricky0123/vad-web"
import { MicVAD, defaultRealTimeVADOptions } from "@ricky0123/vad-web"
import {
DEFAULT_MODEL,
MicVAD,
getDefaultRealTimeVADOptions,
} from "@ricky0123/vad-web"
import React, { useEffect, useReducer, useState } from "react"

export { utils } from "@ricky0123/vad-web"
Expand All @@ -16,13 +20,17 @@ const defaultReactOptions: ReactOptions = {
userSpeakingThreshold: 0.6,
}

export const defaultReactRealTimeVADOptions = {
...defaultRealTimeVADOptions,
...defaultReactOptions,
export const getDefaultReactRealTimeVADOptions = (
model: "legacy" | "v5"
): ReactRealTimeVADOptions => {
return {
...getDefaultRealTimeVADOptions(model),
...defaultReactOptions,
}
}

const reactOptionKeys = Object.keys(defaultReactOptions)
const vadOptionKeys = Object.keys(defaultRealTimeVADOptions)
const vadOptionKeys = Object.keys(getDefaultRealTimeVADOptions("v5"))

const _filter = (keys: string[], obj: any) => {
return keys.reduce((acc, key) => {
Expand All @@ -34,7 +42,8 @@ const _filter = (keys: string[], obj: any) => {
function useOptions(
options: Partial<ReactRealTimeVADOptions>
): [ReactOptions, RealTimeVADOptions] {
options = { ...defaultReactRealTimeVADOptions, ...options }
const model = options.model ?? DEFAULT_MODEL
options = { ...getDefaultReactRealTimeVADOptions(model), ...options }
const reactOptions = _filter(reactOptionKeys, options) as ReactOptions
const vadOptions = _filter(vadOptionKeys, options) as RealTimeVADOptions
return [reactOptions, vadOptions]
Expand Down Expand Up @@ -62,7 +71,7 @@ export function useMicVAD(options: Partial<ReactRealTimeVADOptions>) {
false
)
const [loading, setLoading] = useState(true)
const [errored, setErrored] = useState<false | { message: string }>(false)
const [errored, setErrored] = useState<false | string>(false)
const [listening, setListening] = useState(false)
const [vad, setVAD] = useState<MicVAD | null>(null)

Expand Down Expand Up @@ -92,10 +101,9 @@ export function useMicVAD(options: Partial<ReactRealTimeVADOptions>) {
} catch (e) {
setLoading(false)
if (e instanceof Error) {
setErrored({ message: e.message })
setErrored(e.message)
} else {
// @ts-ignore
setErrored({ message: e })
setErrored(String(e))
}
return
}
Expand Down
5 changes: 1 addition & 4 deletions packages/web/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,12 @@
"offline-speech-recognition"
],
"homepage": "https://github.com/ricky0123/vad",
"version": "0.0.19",
"version": "0.0.20",
"license": "ISC",
"main": "dist/index.js",
"unpkg": "dist/bundle.min.js",
"jsdelivr": "dist/bundle.min.js",
"devDependencies": {
"@playwright/test": "^1.48.1",
"@types/audioworklet": "^0.0.36",
"@types/express": "^4.17.17",
"express": "^4.18.2",
Expand All @@ -30,8 +29,6 @@
},
"scripts": {
"build": "./scripts/build.sh",
"test-server": "tsc -p ./scripts/tsconfig.json && node ./scripts/test-server.js",
"test": "playwright test",
"clean": "rm -rf dist",
"publish": "npm publish --access public"
}
Expand Down
90 changes: 0 additions & 90 deletions packages/web/playwright.config.ts

This file was deleted.

5 changes: 4 additions & 1 deletion packages/web/scripts/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
rm -rf dist
mkdir dist
npx tsc
cp ../../silero_vad.onnx dist
cp \
../../silero_vad_legacy.onnx \
../../silero_vad_v5.onnx \
dist
npx webpack -c webpack.config.worklet.js
npx webpack -c webpack.config.index.js
Loading

0 comments on commit 482e054

Please sign in to comment.