1
- import { similarity as ml_distance_similarity } from "ml-distance" ;
1
+ import {
2
+ similarity as ml_distance_similarity ,
3
+ distance as ml_distance ,
4
+ } from "ml-distance" ;
5
+
6
+ type VectorFunction = ( xVector : number [ ] , yVector : number [ ] ) => number ;
2
7
3
8
/**
4
- * This function calculates the row-wise cosine similarity between two matrices with the same number of columns.
9
+ * Apply a row-wise function between two matrices with the same number of columns.
5
10
*
6
11
* @param {number[][] } X - The first matrix.
7
12
* @param {number[][] } Y - The second matrix.
13
+ * @param {VectorFunction } func - The function to apply.
8
14
*
9
15
* @throws {Error } If the number of columns in X and Y are not the same.
10
16
*
11
- * @returns {number[][] | [[]] } A matrix where each row represents the cosine similarity values between the corresponding rows of X and Y.
17
+ * @returns {number[][] | [[]] } A matrix where each row represents the result of applying the function between the corresponding rows of X and Y.
12
18
*/
13
- export function cosineSimilarity ( X : number [ ] [ ] , Y : number [ ] [ ] ) : number [ ] [ ] {
19
+
20
+ export function matrixFunc (
21
+ X : number [ ] [ ] ,
22
+ Y : number [ ] [ ] ,
23
+ func : VectorFunction
24
+ ) : number [ ] [ ] {
14
25
if (
15
26
X . length === 0 ||
16
27
X [ 0 ] . length === 0 ||
@@ -30,12 +41,41 @@ export function cosineSimilarity(X: number[][], Y: number[][]): number[][] {
30
41
}
31
42
32
43
return X . map ( ( xVector ) =>
33
- Y . map ( ( yVector ) => ml_distance_similarity . cosine ( xVector , yVector ) ) . map (
34
- ( similarity ) => ( Number . isNaN ( similarity ) ? 0 : similarity )
44
+ Y . map ( ( yVector ) => func ( xVector , yVector ) ) . map ( ( similarity ) =>
45
+ Number . isNaN ( similarity ) ? 0 : similarity
35
46
)
36
47
) ;
37
48
}
38
49
50
+ export function normalize ( M : number [ ] [ ] , similarity = false ) : number [ ] [ ] {
51
+ const max = matrixMaxVal ( M ) ;
52
+ return M . map ( ( row ) =>
53
+ row . map ( ( val ) => ( similarity ? 1 - val / max : val / max ) )
54
+ ) ;
55
+ }
56
+
57
+ /**
58
+ * This function calculates the row-wise cosine similarity between two matrices with the same number of columns.
59
+ *
60
+ * @param {number[][] } X - The first matrix.
61
+ * @param {number[][] } Y - The second matrix.
62
+ *
63
+ * @throws {Error } If the number of columns in X and Y are not the same.
64
+ *
65
+ * @returns {number[][] | [[]] } A matrix where each row represents the cosine similarity values between the corresponding rows of X and Y.
66
+ */
67
+ export function cosineSimilarity ( X : number [ ] [ ] , Y : number [ ] [ ] ) : number [ ] [ ] {
68
+ return matrixFunc ( X , Y , ml_distance_similarity . cosine ) ;
69
+ }
70
+
71
+ export function innerProduct ( X : number [ ] [ ] , Y : number [ ] [ ] ) : number [ ] [ ] {
72
+ return matrixFunc ( X , Y , ml_distance . innerProduct ) ;
73
+ }
74
+
75
+ export function euclideanDistance ( X : number [ ] [ ] , Y : number [ ] [ ] ) : number [ ] [ ] {
76
+ return matrixFunc ( X , Y , ml_distance . euclidean ) ;
77
+ }
78
+
39
79
/**
40
80
* This function implements the Maximal Marginal Relevance algorithm
41
81
* to select a set of embeddings that maximizes the diversity and relevance to a query embedding.
@@ -65,7 +105,7 @@ export function maximalMarginalRelevance(
65
105
queryEmbeddingExpanded ,
66
106
embeddingList
67
107
) [ 0 ] ;
68
- const mostSimilarEmbeddingIndex = argMax ( similarityToQuery ) ;
108
+ const mostSimilarEmbeddingIndex = argMax ( similarityToQuery ) . maxIndex ;
69
109
70
110
const selectedEmbeddings = [ embeddingList [ mostSimilarEmbeddingIndex ] ] ;
71
111
const selectedEmbeddingsIndexes = [ mostSimilarEmbeddingIndex ] ;
@@ -101,15 +141,23 @@ export function maximalMarginalRelevance(
101
141
return selectedEmbeddingsIndexes ;
102
142
}
103
143
144
+ type MaxInfo = {
145
+ maxIndex : number ;
146
+ maxValue : number ;
147
+ } ;
148
+
104
149
/**
105
150
* Finds the index of the maximum value in the given array.
106
151
* @param {number[] } array - The input array.
107
152
*
108
153
* @returns {number } The index of the maximum value in the array. If the array is empty, returns -1.
109
154
*/
110
- function argMax ( array : number [ ] ) : number {
155
+ function argMax ( array : number [ ] ) : MaxInfo {
111
156
if ( array . length === 0 ) {
112
- return - 1 ;
157
+ return {
158
+ maxIndex : - 1 ,
159
+ maxValue : NaN ,
160
+ } ;
113
161
}
114
162
115
163
let maxValue = array [ 0 ] ;
@@ -121,5 +169,12 @@ function argMax(array: number[]): number {
121
169
maxValue = array [ i ] ;
122
170
}
123
171
}
124
- return maxIndex ;
172
+ return { maxIndex, maxValue } ;
173
+ }
174
+
175
+ function matrixMaxVal ( arrays : number [ ] [ ] ) : number {
176
+ return arrays . reduce (
177
+ ( acc , array ) => Math . max ( acc , argMax ( array ) . maxValue ) ,
178
+ 0
179
+ ) ;
125
180
}
0 commit comments