diff --git a/package.json b/package.json index 112aa78..a70f031 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@ztimson/utils", - "version": "0.28.8", + "version": "0.28.9", "description": "Utility library", "author": "Zak Timson", "license": "MIT", diff --git a/src/index.ts b/src/index.ts index b1d9759..6456fe9 100644 --- a/src/index.ts +++ b/src/index.ts @@ -21,5 +21,6 @@ export * from './search'; export * from './string'; export * from './template'; export * from './time'; +export * from './tts'; export * from './types'; export * from 'var-persist'; diff --git a/src/string.ts b/src/string.ts index c9d136e..147054a 100644 --- a/src/string.ts +++ b/src/string.ts @@ -140,7 +140,7 @@ export function pascalCase(str?: string): string { * @param {string} str Input string with emojis * @returns {string} Sanitized string without emojis */ -function removeEmojis(str: string): string { +export function removeEmojis(str: string): string { const emojiRegex = /(?:[\u2700-\u27bf]|(?:\ud83c[\udde6-\uddff]){2}|[\ud83c[\udde6-\uddff]|[\ud83d[\ude00-\ude4f]|[\ud83d[\ude80-\udeff]|[\ud83c[\udd00-\uddff]|[\ud83d[\ude50-\ude7f]|[\u2600-\u26ff]|[\u2700-\u27bf]|[\ud83e[\udd00-\uddff]|[\ud83c[\udf00-\uffff]|[\ud83d[\ude00-\udeff]|[\ud83c[\udde6-\uddff])/g; return str.replace(emojiRegex, ''); } diff --git a/src/tts.ts b/src/tts.ts new file mode 100644 index 0000000..a99d4de --- /dev/null +++ b/src/tts.ts @@ -0,0 +1,139 @@ +import {removeEmojis} from './string.ts'; + +export class TTS { + private static readonly QUALITY_PATTERNS = ['Google', 'Microsoft', 'Samantha', 'Premium', 'Natural', 'Neural']; + + private _currentUtterance: SpeechSynthesisUtterance | null = null; + + private _rate: number = 1.0; + get rate(): number { return this._rate; } + set rate(value: number) { + this._rate = value; + if(this._currentUtterance) this._currentUtterance.rate = value; + } + + private _pitch: number = 1.0; + get pitch(): number { return this._pitch; } + set pitch(value: number) { + this._pitch = value; + if(this._currentUtterance) this._currentUtterance.pitch = value; + } + + private _volume: number = 1.0; + get volume(): number { return this._volume; } + set volume(value: number) { + this._volume = value; + if(this._currentUtterance) this._currentUtterance.volume = value; + } + + private _voice: SpeechSynthesisVoice | undefined; + get voice(): SpeechSynthesisVoice | undefined { return this._voice; } + set voice(value: SpeechSynthesisVoice | undefined) { + this._voice = value; + if(this._currentUtterance && value) this._currentUtterance.voice = value; + } + + /** Create a TTS instance with optional configuration */ + constructor(config?: {rate?: number; pitch?: number; volume?: number; voice?: SpeechSynthesisVoice | null}) { + if(config) { + if(config.rate !== undefined) this._rate = config.rate; + if(config.pitch !== undefined) this._pitch = config.pitch; + if(config.volume !== undefined) this._volume = config.volume; + this._voice = config.voice === null ? undefined : (config.voice || undefined); + } + } + + /** + * Selects the best available TTS voice, prioritizing high-quality options + * @param lang Speaking language + * @returns Highest quality voice + */ + private static bestVoice(lang = 'en'): SpeechSynthesisVoice | undefined { + const voices = window.speechSynthesis.getVoices(); + for (const pattern of this.QUALITY_PATTERNS) { + const voice = voices.find(v => v.name.includes(pattern) && v.lang.startsWith(lang)); + if(voice) return voice; + } + return voices.find(v => v.lang.startsWith(lang)); + } + + /** Cleans text for TTS by removing emojis, markdown and code block */ + private static cleanText(text: string): string { + return removeEmojis(text) + .replace(/```[\s\S]*?```/g, ' code block ') + .replace(/[#*_~`]/g, ''); + } + + /** Creates a speech utterance with current options */ + private createUtterance(text: string): SpeechSynthesisUtterance { + const cleanedText = TTS.cleanText(text); + const utterance = new SpeechSynthesisUtterance(cleanedText); + const voice = this._voice || TTS.bestVoice(); + if(voice) utterance.voice = voice; + utterance.rate = this._rate; + utterance.pitch = this._pitch; + utterance.volume = this._volume; + return utterance; + } + + /** Speaks text and returns a Promise which resolves once complete */ + speak(text: string): Promise { + if(!text.trim()) return Promise.resolve(); + + return new Promise((resolve, reject) => { + this._currentUtterance = this.createUtterance(text); + + this._currentUtterance.onend = () => { + this._currentUtterance = null; + resolve(); + }; + + this._currentUtterance.onerror = (error) => { + this._currentUtterance = null; + reject(error); + }; + + window.speechSynthesis.speak(this._currentUtterance); + }); + } + + /** Stops all TTS */ + stop(): void { + window.speechSynthesis.cancel(); + this._currentUtterance = null; + } + + /** + * Initialize a stream that chunks text into sentences and speak them. + * + * @example + * const stream = tts.speakStream(); + * stream.next("Hello "); + * stream.next("World. How"); + * stream.next(" are you?"); + * await stream.done(); + * + * @returns Object with next function for passing chunk of streamed text and done for completing the stream + */ + speakStream(): {next: (text: string) => void, done: () => void} { + let buffer = ''; + const sentenceRegex = /[^.!?\n]+[.!?\n]+/g; + + return { + next: (text: string): void => { + buffer += text; + const sentences = buffer.match(sentenceRegex); + if(sentences) { + sentences.forEach(sentence => this.speak(sentence.trim())); + buffer = buffer.replace(sentenceRegex, ''); + } + }, + done: async (): Promise => { + if(buffer.trim()) { + await this.speak(buffer.trim()); + buffer = ''; + } + } + }; + } +}