Skip to content

Commit 1c1274d

Browse files
More mathutils (langchain-ai#2150)
* Refactor existing functions for future use. * Add normalize, euclideanDistance, and innerProduct functions. * Add math_utils to entry points * Rename math_utils for export * Run build --------- Co-authored-by: jacoblee93 <[email protected]>
1 parent ca277ea commit 1c1274d

File tree

13 files changed

+133
-11
lines changed

13 files changed

+133
-11
lines changed

langchain/.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -475,6 +475,9 @@ stores/message/upstash_redis.d.ts
475475
stores/message/planetscale.cjs
476476
stores/message/planetscale.js
477477
stores/message/planetscale.d.ts
478+
util/math.cjs
479+
util/math.js
480+
util/math.d.ts
478481
experimental/autogpt.cjs
479482
experimental/autogpt.js
480483
experimental/autogpt.d.ts

langchain/package.json

+8
Original file line numberDiff line numberDiff line change
@@ -487,6 +487,9 @@
487487
"stores/message/planetscale.cjs",
488488
"stores/message/planetscale.js",
489489
"stores/message/planetscale.d.ts",
490+
"util/math.cjs",
491+
"util/math.js",
492+
"util/math.d.ts",
490493
"experimental/autogpt.cjs",
491494
"experimental/autogpt.js",
492495
"experimental/autogpt.d.ts",
@@ -1736,6 +1739,11 @@
17361739
"import": "./stores/message/planetscale.js",
17371740
"require": "./stores/message/planetscale.cjs"
17381741
},
1742+
"./util/math": {
1743+
"types": "./util/math.d.ts",
1744+
"import": "./util/math.js",
1745+
"require": "./util/math.cjs"
1746+
},
17391747
"./experimental/autogpt": {
17401748
"types": "./experimental/autogpt.d.ts",
17411749
"import": "./experimental/autogpt.js",

langchain/scripts/create-entrypoints.js

+2
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,8 @@ const entrypoints = {
193193
"stores/message/ioredis": "stores/message/ioredis",
194194
"stores/message/upstash_redis": "stores/message/upstash_redis",
195195
"stores/message/planetscale": "stores/message/planetscale",
196+
// utilities
197+
"util/math": "util/math",
196198
// experimental
197199
"experimental/autogpt": "experimental/autogpt/index",
198200
"experimental/babyagi": "experimental/babyagi/index",

langchain/src/load/import_map.ts

+1
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ export * as cache from "../cache/index.js";
5050
export * as stores__doc__in_memory from "../stores/doc/in_memory.js";
5151
export * as stores__file__in_memory from "../stores/file/in_memory.js";
5252
export * as stores__message__in_memory from "../stores/message/in_memory.js";
53+
export * as util__math from "../util/math.js";
5354
export * as experimental__autogpt from "../experimental/autogpt/index.js";
5455
export * as experimental__babyagi from "../experimental/babyagi/index.js";
5556
export * as experimental__generative_agents from "../experimental/generative_agents/index.js";

langchain/src/util/math_utils.ts langchain/src/util/math.ts

+65-10
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,27 @@
1-
import { similarity as ml_distance_similarity } from "ml-distance";
1+
import {
2+
similarity as ml_distance_similarity,
3+
distance as ml_distance,
4+
} from "ml-distance";
5+
6+
type VectorFunction = (xVector: number[], yVector: number[]) => number;
27

38
/**
4-
* This function calculates the row-wise cosine similarity between two matrices with the same number of columns.
9+
* Apply a row-wise function between two matrices with the same number of columns.
510
*
611
* @param {number[][]} X - The first matrix.
712
* @param {number[][]} Y - The second matrix.
13+
* @param {VectorFunction} func - The function to apply.
814
*
915
* @throws {Error} If the number of columns in X and Y are not the same.
1016
*
11-
* @returns {number[][] | [[]]} A matrix where each row represents the cosine similarity values between the corresponding rows of X and Y.
17+
* @returns {number[][] | [[]]} A matrix where each row represents the result of applying the function between the corresponding rows of X and Y.
1218
*/
13-
export function cosineSimilarity(X: number[][], Y: number[][]): number[][] {
19+
20+
export function matrixFunc(
21+
X: number[][],
22+
Y: number[][],
23+
func: VectorFunction
24+
): number[][] {
1425
if (
1526
X.length === 0 ||
1627
X[0].length === 0 ||
@@ -30,12 +41,41 @@ export function cosineSimilarity(X: number[][], Y: number[][]): number[][] {
3041
}
3142

3243
return X.map((xVector) =>
33-
Y.map((yVector) => ml_distance_similarity.cosine(xVector, yVector)).map(
34-
(similarity) => (Number.isNaN(similarity) ? 0 : similarity)
44+
Y.map((yVector) => func(xVector, yVector)).map((similarity) =>
45+
Number.isNaN(similarity) ? 0 : similarity
3546
)
3647
);
3748
}
3849

50+
export function normalize(M: number[][], similarity = false): number[][] {
51+
const max = matrixMaxVal(M);
52+
return M.map((row) =>
53+
row.map((val) => (similarity ? 1 - val / max : val / max))
54+
);
55+
}
56+
57+
/**
58+
* This function calculates the row-wise cosine similarity between two matrices with the same number of columns.
59+
*
60+
* @param {number[][]} X - The first matrix.
61+
* @param {number[][]} Y - The second matrix.
62+
*
63+
* @throws {Error} If the number of columns in X and Y are not the same.
64+
*
65+
* @returns {number[][] | [[]]} A matrix where each row represents the cosine similarity values between the corresponding rows of X and Y.
66+
*/
67+
export function cosineSimilarity(X: number[][], Y: number[][]): number[][] {
68+
return matrixFunc(X, Y, ml_distance_similarity.cosine);
69+
}
70+
71+
export function innerProduct(X: number[][], Y: number[][]): number[][] {
72+
return matrixFunc(X, Y, ml_distance.innerProduct);
73+
}
74+
75+
export function euclideanDistance(X: number[][], Y: number[][]): number[][] {
76+
return matrixFunc(X, Y, ml_distance.euclidean);
77+
}
78+
3979
/**
4080
* This function implements the Maximal Marginal Relevance algorithm
4181
* to select a set of embeddings that maximizes the diversity and relevance to a query embedding.
@@ -65,7 +105,7 @@ export function maximalMarginalRelevance(
65105
queryEmbeddingExpanded,
66106
embeddingList
67107
)[0];
68-
const mostSimilarEmbeddingIndex = argMax(similarityToQuery);
108+
const mostSimilarEmbeddingIndex = argMax(similarityToQuery).maxIndex;
69109

70110
const selectedEmbeddings = [embeddingList[mostSimilarEmbeddingIndex]];
71111
const selectedEmbeddingsIndexes = [mostSimilarEmbeddingIndex];
@@ -101,15 +141,23 @@ export function maximalMarginalRelevance(
101141
return selectedEmbeddingsIndexes;
102142
}
103143

144+
type MaxInfo = {
145+
maxIndex: number;
146+
maxValue: number;
147+
};
148+
104149
/**
105150
* Finds the index of the maximum value in the given array.
106151
* @param {number[]} array - The input array.
107152
*
108153
* @returns {number} The index of the maximum value in the array. If the array is empty, returns -1.
109154
*/
110-
function argMax(array: number[]): number {
155+
function argMax(array: number[]): MaxInfo {
111156
if (array.length === 0) {
112-
return -1;
157+
return {
158+
maxIndex: -1,
159+
maxValue: NaN,
160+
};
113161
}
114162

115163
let maxValue = array[0];
@@ -121,5 +169,12 @@ function argMax(array: number[]): number {
121169
maxValue = array[i];
122170
}
123171
}
124-
return maxIndex;
172+
return { maxIndex, maxValue };
173+
}
174+
175+
function matrixMaxVal(arrays: number[][]): number {
176+
return arrays.reduce(
177+
(acc, array) => Math.max(acc, argMax(array).maxValue),
178+
0
179+
);
125180
}

langchain/src/util/tests/math_utils.test.ts

+47-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
import { test, expect } from "@jest/globals";
22
import { Matrix } from "ml-matrix";
3-
import { cosineSimilarity, maximalMarginalRelevance } from "../math_utils.js";
3+
import {
4+
cosineSimilarity,
5+
euclideanDistance,
6+
innerProduct,
7+
maximalMarginalRelevance,
8+
normalize,
9+
} from "../math.js";
410

511
test("Test cosine similarity zero", async () => {
612
const X = Matrix.rand(3, 3).to2DArray();
@@ -143,3 +149,43 @@ test("Test maximal marginal relevance has no duplicates", async () => {
143149
const expected = new Set(actual).size;
144150
expect(actual).toHaveLength(expected);
145151
});
152+
153+
test("Test normalize", async () => {
154+
const input = [
155+
[1, 2],
156+
[3, 4],
157+
];
158+
159+
const expected = [
160+
[0.25, 0.5],
161+
[0.75, 1],
162+
];
163+
164+
const actual = normalize(input);
165+
expect(actual).toEqual(expected);
166+
});
167+
168+
test("Test innerProduct", async () => {
169+
const x = [
170+
[1, 2],
171+
[5, 6],
172+
];
173+
const y = [
174+
[3, 4],
175+
[7, 8],
176+
];
177+
const expected = [
178+
[11, 23],
179+
[39, 83],
180+
];
181+
const actual = innerProduct(x, y);
182+
expect(actual).toEqual(expected);
183+
});
184+
185+
test("Test distance", async () => {
186+
const x = [[1, 2]];
187+
const y = [[2, 4]];
188+
const expected = [[2.23606797749979]];
189+
const actual = euclideanDistance(x, y);
190+
expect(actual[0][0]).toBeCloseTo(expected[0][0]);
191+
});

langchain/tsconfig.json

+1
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,7 @@
185185
"src/stores/message/ioredis.ts",
186186
"src/stores/message/upstash_redis.ts",
187187
"src/stores/message/planetscale.ts",
188+
"src/util/math.ts",
188189
"src/experimental/autogpt/index.ts",
189190
"src/experimental/babyagi/index.ts",
190191
"src/experimental/generative_agents/index.ts",

test-exports-cf/src/entrypoints.js

+1
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ export * from "langchain/cache";
4949
export * from "langchain/stores/doc/in_memory";
5050
export * from "langchain/stores/file/in_memory";
5151
export * from "langchain/stores/message/in_memory";
52+
export * from "langchain/util/math";
5253
export * from "langchain/experimental/autogpt";
5354
export * from "langchain/experimental/babyagi";
5455
export * from "langchain/experimental/generative_agents";

test-exports-cjs/src/entrypoints.js

+1
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ const cache = require("langchain/cache");
4949
const stores_doc_in_memory = require("langchain/stores/doc/in_memory");
5050
const stores_file_in_memory = require("langchain/stores/file/in_memory");
5151
const stores_message_in_memory = require("langchain/stores/message/in_memory");
52+
const util_math = require("langchain/util/math");
5253
const experimental_autogpt = require("langchain/experimental/autogpt");
5354
const experimental_babyagi = require("langchain/experimental/babyagi");
5455
const experimental_generative_agents = require("langchain/experimental/generative_agents");

test-exports-esbuild/src/entrypoints.js

+1
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ import * as cache from "langchain/cache";
4949
import * as stores_doc_in_memory from "langchain/stores/doc/in_memory";
5050
import * as stores_file_in_memory from "langchain/stores/file/in_memory";
5151
import * as stores_message_in_memory from "langchain/stores/message/in_memory";
52+
import * as util_math from "langchain/util/math";
5253
import * as experimental_autogpt from "langchain/experimental/autogpt";
5354
import * as experimental_babyagi from "langchain/experimental/babyagi";
5455
import * as experimental_generative_agents from "langchain/experimental/generative_agents";

test-exports-esm/src/entrypoints.js

+1
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ import * as cache from "langchain/cache";
4949
import * as stores_doc_in_memory from "langchain/stores/doc/in_memory";
5050
import * as stores_file_in_memory from "langchain/stores/file/in_memory";
5151
import * as stores_message_in_memory from "langchain/stores/message/in_memory";
52+
import * as util_math from "langchain/util/math";
5253
import * as experimental_autogpt from "langchain/experimental/autogpt";
5354
import * as experimental_babyagi from "langchain/experimental/babyagi";
5455
import * as experimental_generative_agents from "langchain/experimental/generative_agents";

test-exports-vercel/src/entrypoints.js

+1
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ export * from "langchain/cache";
4949
export * from "langchain/stores/doc/in_memory";
5050
export * from "langchain/stores/file/in_memory";
5151
export * from "langchain/stores/message/in_memory";
52+
export * from "langchain/util/math";
5253
export * from "langchain/experimental/autogpt";
5354
export * from "langchain/experimental/babyagi";
5455
export * from "langchain/experimental/generative_agents";

test-exports-vite/src/entrypoints.js

+1
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ export * from "langchain/cache";
4949
export * from "langchain/stores/doc/in_memory";
5050
export * from "langchain/stores/file/in_memory";
5151
export * from "langchain/stores/message/in_memory";
52+
export * from "langchain/util/math";
5253
export * from "langchain/experimental/autogpt";
5354
export * from "langchain/experimental/babyagi";
5455
export * from "langchain/experimental/generative_agents";

0 commit comments

Comments
 (0)