Fix ASR
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@ztimson/ai-utils",
|
||||
"version": "0.6.4",
|
||||
"version": "0.6.5",
|
||||
"description": "AI Utility library",
|
||||
"author": "Zak Timson",
|
||||
"license": "MIT",
|
||||
|
||||
23
src/audio.ts
23
src/audio.ts
@@ -2,7 +2,7 @@ import {spawn} from 'node:child_process';
|
||||
import {pipeline} from '@xenova/transformers';
|
||||
import * as fs from 'node:fs';
|
||||
import {AbortablePromise, Ai} from './ai.ts';
|
||||
import * as wavefile from 'wavefile';
|
||||
import wavefile from 'wavefile';
|
||||
|
||||
export class Audio {
|
||||
private whisperPipeline: any;
|
||||
@@ -96,14 +96,25 @@ print(json.dumps(segments))
|
||||
if(aborted) return resolve(null);
|
||||
if(!this.whisperPipeline) this.whisperPipeline = await pipeline('automatic-speech-recognition', `Xenova/${model}`, { cache_dir: this.ai.options.path, quantized: true });
|
||||
|
||||
// Transcript
|
||||
// Prepare audio file (convert to mono channel wave)
|
||||
if(aborted) return resolve(null);
|
||||
const audioData = fs.readFileSync(path);
|
||||
const wav = new wavefile.WaveFile(audioData);
|
||||
const wav = new wavefile.WaveFile(fs.readFileSync(path));
|
||||
wav.toBitDepth('32f');
|
||||
wav.toSampleRate(16000);
|
||||
const buffer = wav.getSamples();
|
||||
const transcriptResult = await this.whisperPipeline(buffer, {return_timestamps: speaker ? 'word' : false, chunk_length_s: 30,});
|
||||
const samples = wav.getSamples();
|
||||
let buffer;
|
||||
if(Array.isArray(samples)) { // stereo to mono - average the channels
|
||||
const left = samples[0];
|
||||
const right = samples[1];
|
||||
buffer = new Float32Array(left.length);
|
||||
for (let i = 0; i < left.length; i++) buffer[i] = (left[i] + right[i]) / 2;
|
||||
} else {
|
||||
buffer = samples;
|
||||
}
|
||||
|
||||
// Transcribe
|
||||
if(aborted) return resolve(null);
|
||||
const transcriptResult = await this.whisperPipeline(buffer, {return_timestamps: speaker ? 'word' : false});
|
||||
if(!speaker) return resolve(transcriptResult.text?.trim() || null);
|
||||
|
||||
// Speaker Diarization
|
||||
|
||||
Reference in New Issue
Block a user