From bb6933f0d5d27ed742246fa6bdc6d8660186f8fc Mon Sep 17 00:00:00 2001 From: ztimson Date: Fri, 19 Dec 2025 15:22:06 -0500 Subject: [PATCH] Optimized cosineSimilarity --- package.json | 2 +- src/llm.ts | 26 +++++++++++++------------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/package.json b/package.json index 1e9f2d3..3e6f7aa 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@ztimson/ai-utils", - "version": "0.2.0", + "version": "0.2.1", "description": "AI Utility library", "author": "Zak Timson", "license": "MIT", diff --git a/src/llm.ts b/src/llm.ts index e2ca45c..d154295 100644 --- a/src/llm.ts +++ b/src/llm.ts @@ -136,6 +136,18 @@ export class LLM { return [{role: 'assistant', content: `Conversation Summary: ${summary}`, timestamp: Date.now()}, ...recent]; } + cosineSimilarity(v1: number[], v2: number[]): number { + if (v1.length !== v2.length) throw new Error('Vectors must be same length'); + let dotProduct = 0, normA = 0, normB = 0; + for (let i = 0; i < v1.length; i++) { + dotProduct += v1[i] * v2[i]; + normA += v1[i] * v1[i]; + normB += v2[i] * v2[i]; + } + const denominator = Math.sqrt(normA) * Math.sqrt(normB); + return denominator === 0 ? 0 : dotProduct / denominator; + } + embedding(target: object | string, maxTokens = 500, overlapTokens = 50) { const objString = (obj: any, path = ''): string[] => { if(obj === null || obj === undefined) return []; @@ -205,24 +217,12 @@ export class LLM { */ fuzzyMatch(target: string, ...searchTerms: string[]) { if(searchTerms.length < 2) throw new Error('Requires at least 2 strings to compare'); - const vector = (text: string, dimensions: number = 10): number[] => { return text.toLowerCase().split('').map((char, index) => (char.charCodeAt(0) * (index + 1)) % dimensions / dimensions).slice(0, dimensions); } - - const cosineSimilarity = (v1: number[], v2: number[]): number => { - if (v1.length !== v2.length) throw new Error('Vectors must be same length'); - const tensor1 = tf.tensor1d(v1), tensor2 = tf.tensor1d(v2) - const dotProduct = tf.dot(tensor1, tensor2) - const magnitude1 = tf.norm(tensor1) - const magnitude2 = tf.norm(tensor2) - if(magnitude1.dataSync()[0] === 0 || magnitude2.dataSync()[0] === 0) return 0 - return dotProduct.dataSync()[0] / (magnitude1.dataSync()[0] * magnitude2.dataSync()[0]) - } - const v = vector(target); - const similarities = searchTerms.map(t => vector(t)).map(refVector => cosineSimilarity(v, refVector)) + const similarities = searchTerms.map(t => vector(t)).map(refVector => this.cosineSimilarity(v, refVector)) return {avg: similarities.reduce((acc, s) => acc + s, 0) / similarities.length, max: Math.max(...similarities), similarities} }