Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 0360f2493d | |||
| 0172887877 | |||
| 8f89f5e3cf | |||
| 5bd41f8c6a | |||
| e4399e1b7b |
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@ztimson/ai-utils",
|
"name": "@ztimson/ai-utils",
|
||||||
"version": "0.6.6",
|
"version": "0.6.10",
|
||||||
"description": "AI Utility library",
|
"description": "AI Utility library",
|
||||||
"author": "Zak Timson",
|
"author": "Zak Timson",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
|
|||||||
@@ -8,6 +8,8 @@ export type AbortablePromise<T> = Promise<T> & {
|
|||||||
};
|
};
|
||||||
|
|
||||||
export type AiOptions = {
|
export type AiOptions = {
|
||||||
|
/** Token to pull models from hugging face */
|
||||||
|
hfToken?: string;
|
||||||
/** Path to models */
|
/** Path to models */
|
||||||
path?: string;
|
path?: string;
|
||||||
/** ASR model: whisper-tiny, whisper-base */
|
/** ASR model: whisper-tiny, whisper-base */
|
||||||
|
|||||||
18
src/asr.ts
18
src/asr.ts
@@ -14,14 +14,15 @@ export async function canDiarization(): Promise<boolean> {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
async function runDiarization(audioPath: string, torchHome: string): Promise<any[]> {
|
async function runDiarization(audioPath: string, dir: string, token: string): Promise<any[]> {
|
||||||
const script = `
|
const script = `
|
||||||
import sys
|
import sys
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
from pyannote.audio import Pipeline
|
from pyannote.audio import Pipeline
|
||||||
|
|
||||||
os.environ['TORCH_HOME'] = "${torchHome}"
|
os.environ['TORCH_HOME'] = "${dir}"
|
||||||
|
os.environ['HF_TOKEN'] = "${token}"
|
||||||
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1")
|
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1")
|
||||||
diarization = pipeline(sys.argv[1])
|
diarization = pipeline(sys.argv[1])
|
||||||
|
|
||||||
@@ -82,12 +83,13 @@ function combineSpeakerTranscript(chunks: any[], speakers: any[]): string {
|
|||||||
return lines.join('\n');
|
return lines.join('\n');
|
||||||
}
|
}
|
||||||
|
|
||||||
parentPort?.on('message', async ({ path, model, speaker, torchHome }) => {
|
parentPort?.on('message', async ({ file, speaker, model, modelDir, token }) => {
|
||||||
try {
|
try {
|
||||||
if(!whisperPipeline) whisperPipeline = await pipeline('automatic-speech-recognition', `Xenova/${model}`, {cache_dir: torchHome, quantized: true});
|
console.log('worker', file);
|
||||||
|
if(!whisperPipeline) whisperPipeline = await pipeline('automatic-speech-recognition', `Xenova/${model}`, {cache_dir: modelDir, quantized: true});
|
||||||
|
|
||||||
// Prepare audio file (convert to mono channel wave)
|
// Prepare audio file (convert to mono channel wave)
|
||||||
const wav = new wavefile.WaveFile(fs.readFileSync(path));
|
const wav = new wavefile.WaveFile(fs.readFileSync(file));
|
||||||
wav.toBitDepth('32f');
|
wav.toBitDepth('32f');
|
||||||
wav.toSampleRate(16000);
|
wav.toSampleRate(16000);
|
||||||
const samples = wav.getSamples();
|
const samples = wav.getSamples();
|
||||||
@@ -110,12 +112,12 @@ parentPort?.on('message', async ({ path, model, speaker, torchHome }) => {
|
|||||||
|
|
||||||
// Speaker Diarization
|
// Speaker Diarization
|
||||||
const hasDiarization = await canDiarization();
|
const hasDiarization = await canDiarization();
|
||||||
if(!hasDiarization) {
|
if(!token || !hasDiarization) {
|
||||||
parentPort?.postMessage({ text: transcriptResult.text?.trim() || null, warning: 'Speaker diarization unavailable' });
|
parentPort?.postMessage({ text: transcriptResult.text?.trim() || null, error: 'Speaker diarization unavailable' });
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const speakers = await runDiarization(path, torchHome);
|
const speakers = await runDiarization(file, modelDir, token);
|
||||||
const combined = combineSpeakerTranscript(transcriptResult.chunks || [], speakers);
|
const combined = combineSpeakerTranscript(transcriptResult.chunks || [], speakers);
|
||||||
parentPort?.postMessage({ text: combined });
|
parentPort?.postMessage({ text: combined });
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
|
|||||||
@@ -1,18 +1,19 @@
|
|||||||
|
import {fileURLToPath} from 'url';
|
||||||
import {Worker} from 'worker_threads';
|
import {Worker} from 'worker_threads';
|
||||||
import path from 'node:path';
|
|
||||||
import {AbortablePromise, Ai} from './ai.ts';
|
import {AbortablePromise, Ai} from './ai.ts';
|
||||||
import {canDiarization} from './asr.ts';
|
import {canDiarization} from './asr.ts';
|
||||||
|
import {dirname, join} from 'path';
|
||||||
|
|
||||||
export class Audio {
|
export class Audio {
|
||||||
constructor(private ai: Ai) {}
|
constructor(private ai: Ai) {}
|
||||||
|
|
||||||
asr(filepath: string, options: { model?: string; speaker?: boolean } = {}): AbortablePromise<string | null> {
|
asr(file: string, options: { model?: string; speaker?: boolean } = {}): AbortablePromise<string | null> {
|
||||||
const { model = this.ai.options.asr || 'whisper-base', speaker = false } = options;
|
const { model = this.ai.options.asr || 'whisper-base', speaker = false } = options;
|
||||||
let aborted = false;
|
let aborted = false;
|
||||||
const abort = () => { aborted = true; };
|
const abort = () => { aborted = true; };
|
||||||
|
|
||||||
const p = new Promise<string | null>((resolve, reject) => {
|
const p = new Promise<string | null>((resolve, reject) => {
|
||||||
const worker = new Worker(path.join(import.meta.dirname, 'asr.js'));
|
const worker = new Worker(join(dirname(fileURLToPath(import.meta.url)), 'asr.js'));
|
||||||
const handleMessage = ({ text, warning, error }: any) => {
|
const handleMessage = ({ text, warning, error }: any) => {
|
||||||
worker.terminate();
|
worker.terminate();
|
||||||
if(aborted) return;
|
if(aborted) return;
|
||||||
@@ -31,7 +32,7 @@ export class Audio {
|
|||||||
worker.on('exit', (code) => {
|
worker.on('exit', (code) => {
|
||||||
if(code !== 0 && !aborted) reject(new Error(`Worker exited with code ${code}`));
|
if(code !== 0 && !aborted) reject(new Error(`Worker exited with code ${code}`));
|
||||||
});
|
});
|
||||||
worker.postMessage({path: filepath, model, speaker, torchHome: this.ai.options.path,});
|
worker.postMessage({file, model, speaker, modelDir: this.ai.options.path, token: this.ai.options.hfToken});
|
||||||
});
|
});
|
||||||
return Object.assign(p, { abort });
|
return Object.assign(p, { abort });
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,12 +3,9 @@ import { parentPort } from 'worker_threads';
|
|||||||
|
|
||||||
let embedder: any;
|
let embedder: any;
|
||||||
|
|
||||||
parentPort?.on('message', async ({ id, text, model, path }) => {
|
parentPort?.on('message', async ({text, model, modelDir }) => {
|
||||||
if(!embedder) embedder = await pipeline('feature-extraction', 'Xenova/' + model, {
|
if(!embedder) embedder = await pipeline('feature-extraction', 'Xenova/' + model, {quantized: true, cache_dir: modelDir});
|
||||||
quantized: true,
|
|
||||||
cache_dir: path,
|
|
||||||
});
|
|
||||||
const output = await embedder(text, { pooling: 'mean', normalize: true });
|
const output = await embedder(text, { pooling: 'mean', normalize: true });
|
||||||
const embedding = Array.from(output.data);
|
const embedding = Array.from(output.data);
|
||||||
parentPort?.postMessage({ id, embedding });
|
parentPort?.postMessage({embedding});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -271,7 +271,7 @@ class LLM {
|
|||||||
worker.on('exit', (code) => {
|
worker.on('exit', (code) => {
|
||||||
if(code !== 0) reject(new Error(`Worker exited with code ${code}`));
|
if(code !== 0) reject(new Error(`Worker exited with code ${code}`));
|
||||||
});
|
});
|
||||||
worker.postMessage({text, model: this.ai.options?.embedder || 'bge-small-en-v1.5', path: this.ai.options.path});
|
worker.postMessage({text, model: this.ai.options?.embedder || 'bge-small-en-v1.5', modelDir: this.ai.options.path});
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
const chunks = this.chunk(target, maxTokens, overlapTokens);
|
const chunks = this.chunk(target, maxTokens, overlapTokens);
|
||||||
|
|||||||
Reference in New Issue
Block a user