Skip to content

Commit

Permalink
More mathutils (langchain-ai#2150)
Browse files Browse the repository at this point in the history
* Refactor existing functions for future use.

* Add normalize, euclideanDistance, and innerProduct functions.

* Add math_utils to entry points

* Rename math_utils for export

* Run build

---------

Co-authored-by: jacoblee93 <[email protected]>
  • Loading branch information
afirstenberg and jacoblee93 authored Aug 3, 2023
1 parent ca277ea commit 1c1274d
Show file tree
Hide file tree
Showing 13 changed files with 133 additions and 11 deletions.
3 changes: 3 additions & 0 deletions langchain/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,9 @@ stores/message/upstash_redis.d.ts
stores/message/planetscale.cjs
stores/message/planetscale.js
stores/message/planetscale.d.ts
util/math.cjs
util/math.js
util/math.d.ts
experimental/autogpt.cjs
experimental/autogpt.js
experimental/autogpt.d.ts
Expand Down
8 changes: 8 additions & 0 deletions langchain/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,9 @@
"stores/message/planetscale.cjs",
"stores/message/planetscale.js",
"stores/message/planetscale.d.ts",
"util/math.cjs",
"util/math.js",
"util/math.d.ts",
"experimental/autogpt.cjs",
"experimental/autogpt.js",
"experimental/autogpt.d.ts",
Expand Down Expand Up @@ -1736,6 +1739,11 @@
"import": "./stores/message/planetscale.js",
"require": "./stores/message/planetscale.cjs"
},
"./util/math": {
"types": "./util/math.d.ts",
"import": "./util/math.js",
"require": "./util/math.cjs"
},
"./experimental/autogpt": {
"types": "./experimental/autogpt.d.ts",
"import": "./experimental/autogpt.js",
Expand Down
2 changes: 2 additions & 0 deletions langchain/scripts/create-entrypoints.js
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,8 @@ const entrypoints = {
"stores/message/ioredis": "stores/message/ioredis",
"stores/message/upstash_redis": "stores/message/upstash_redis",
"stores/message/planetscale": "stores/message/planetscale",
// utilities
"util/math": "util/math",
// experimental
"experimental/autogpt": "experimental/autogpt/index",
"experimental/babyagi": "experimental/babyagi/index",
Expand Down
1 change: 1 addition & 0 deletions langchain/src/load/import_map.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ export * as cache from "../cache/index.js";
export * as stores__doc__in_memory from "../stores/doc/in_memory.js";
export * as stores__file__in_memory from "../stores/file/in_memory.js";
export * as stores__message__in_memory from "../stores/message/in_memory.js";
export * as util__math from "../util/math.js";
export * as experimental__autogpt from "../experimental/autogpt/index.js";
export * as experimental__babyagi from "../experimental/babyagi/index.js";
export * as experimental__generative_agents from "../experimental/generative_agents/index.js";
Expand Down
75 changes: 65 additions & 10 deletions langchain/src/util/math_utils.ts → langchain/src/util/math.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,27 @@
import { similarity as ml_distance_similarity } from "ml-distance";
import {
similarity as ml_distance_similarity,
distance as ml_distance,
} from "ml-distance";

type VectorFunction = (xVector: number[], yVector: number[]) => number;

/**
* This function calculates the row-wise cosine similarity between two matrices with the same number of columns.
* Apply a row-wise function between two matrices with the same number of columns.
*
* @param {number[][]} X - The first matrix.
* @param {number[][]} Y - The second matrix.
* @param {VectorFunction} func - The function to apply.
*
* @throws {Error} If the number of columns in X and Y are not the same.
*
* @returns {number[][] | [[]]} A matrix where each row represents the cosine similarity values between the corresponding rows of X and Y.
* @returns {number[][] | [[]]} A matrix where each row represents the result of applying the function between the corresponding rows of X and Y.
*/
export function cosineSimilarity(X: number[][], Y: number[][]): number[][] {

export function matrixFunc(
X: number[][],
Y: number[][],
func: VectorFunction
): number[][] {
if (
X.length === 0 ||
X[0].length === 0 ||
Expand All @@ -30,12 +41,41 @@ export function cosineSimilarity(X: number[][], Y: number[][]): number[][] {
}

return X.map((xVector) =>
Y.map((yVector) => ml_distance_similarity.cosine(xVector, yVector)).map(
(similarity) => (Number.isNaN(similarity) ? 0 : similarity)
Y.map((yVector) => func(xVector, yVector)).map((similarity) =>
Number.isNaN(similarity) ? 0 : similarity
)
);
}

export function normalize(M: number[][], similarity = false): number[][] {
const max = matrixMaxVal(M);
return M.map((row) =>
row.map((val) => (similarity ? 1 - val / max : val / max))
);
}

/**
* This function calculates the row-wise cosine similarity between two matrices with the same number of columns.
*
* @param {number[][]} X - The first matrix.
* @param {number[][]} Y - The second matrix.
*
* @throws {Error} If the number of columns in X and Y are not the same.
*
* @returns {number[][] | [[]]} A matrix where each row represents the cosine similarity values between the corresponding rows of X and Y.
*/
export function cosineSimilarity(X: number[][], Y: number[][]): number[][] {
return matrixFunc(X, Y, ml_distance_similarity.cosine);
}

export function innerProduct(X: number[][], Y: number[][]): number[][] {
return matrixFunc(X, Y, ml_distance.innerProduct);
}

export function euclideanDistance(X: number[][], Y: number[][]): number[][] {
return matrixFunc(X, Y, ml_distance.euclidean);
}

/**
* This function implements the Maximal Marginal Relevance algorithm
* to select a set of embeddings that maximizes the diversity and relevance to a query embedding.
Expand Down Expand Up @@ -65,7 +105,7 @@ export function maximalMarginalRelevance(
queryEmbeddingExpanded,
embeddingList
)[0];
const mostSimilarEmbeddingIndex = argMax(similarityToQuery);
const mostSimilarEmbeddingIndex = argMax(similarityToQuery).maxIndex;

const selectedEmbeddings = [embeddingList[mostSimilarEmbeddingIndex]];
const selectedEmbeddingsIndexes = [mostSimilarEmbeddingIndex];
Expand Down Expand Up @@ -101,15 +141,23 @@ export function maximalMarginalRelevance(
return selectedEmbeddingsIndexes;
}

type MaxInfo = {
maxIndex: number;
maxValue: number;
};

/**
* Finds the index of the maximum value in the given array.
* @param {number[]} array - The input array.
*
* @returns {number} The index of the maximum value in the array. If the array is empty, returns -1.
*/
function argMax(array: number[]): number {
function argMax(array: number[]): MaxInfo {
if (array.length === 0) {
return -1;
return {
maxIndex: -1,
maxValue: NaN,
};
}

let maxValue = array[0];
Expand All @@ -121,5 +169,12 @@ function argMax(array: number[]): number {
maxValue = array[i];
}
}
return maxIndex;
return { maxIndex, maxValue };
}

function matrixMaxVal(arrays: number[][]): number {
return arrays.reduce(
(acc, array) => Math.max(acc, argMax(array).maxValue),
0
);
}
48 changes: 47 additions & 1 deletion langchain/src/util/tests/math_utils.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
import { test, expect } from "@jest/globals";
import { Matrix } from "ml-matrix";
import { cosineSimilarity, maximalMarginalRelevance } from "../math_utils.js";
import {
cosineSimilarity,
euclideanDistance,
innerProduct,
maximalMarginalRelevance,
normalize,
} from "../math.js";

test("Test cosine similarity zero", async () => {
const X = Matrix.rand(3, 3).to2DArray();
Expand Down Expand Up @@ -143,3 +149,43 @@ test("Test maximal marginal relevance has no duplicates", async () => {
const expected = new Set(actual).size;
expect(actual).toHaveLength(expected);
});

test("Test normalize", async () => {
const input = [
[1, 2],
[3, 4],
];

const expected = [
[0.25, 0.5],
[0.75, 1],
];

const actual = normalize(input);
expect(actual).toEqual(expected);
});

test("Test innerProduct", async () => {
const x = [
[1, 2],
[5, 6],
];
const y = [
[3, 4],
[7, 8],
];
const expected = [
[11, 23],
[39, 83],
];
const actual = innerProduct(x, y);
expect(actual).toEqual(expected);
});

test("Test distance", async () => {
const x = [[1, 2]];
const y = [[2, 4]];
const expected = [[2.23606797749979]];
const actual = euclideanDistance(x, y);
expect(actual[0][0]).toBeCloseTo(expected[0][0]);
});
1 change: 1 addition & 0 deletions langchain/tsconfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@
"src/stores/message/ioredis.ts",
"src/stores/message/upstash_redis.ts",
"src/stores/message/planetscale.ts",
"src/util/math.ts",
"src/experimental/autogpt/index.ts",
"src/experimental/babyagi/index.ts",
"src/experimental/generative_agents/index.ts",
Expand Down
1 change: 1 addition & 0 deletions test-exports-cf/src/entrypoints.js
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ export * from "langchain/cache";
export * from "langchain/stores/doc/in_memory";
export * from "langchain/stores/file/in_memory";
export * from "langchain/stores/message/in_memory";
export * from "langchain/util/math";
export * from "langchain/experimental/autogpt";
export * from "langchain/experimental/babyagi";
export * from "langchain/experimental/generative_agents";
Expand Down
1 change: 1 addition & 0 deletions test-exports-cjs/src/entrypoints.js
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ const cache = require("langchain/cache");
const stores_doc_in_memory = require("langchain/stores/doc/in_memory");
const stores_file_in_memory = require("langchain/stores/file/in_memory");
const stores_message_in_memory = require("langchain/stores/message/in_memory");
const util_math = require("langchain/util/math");
const experimental_autogpt = require("langchain/experimental/autogpt");
const experimental_babyagi = require("langchain/experimental/babyagi");
const experimental_generative_agents = require("langchain/experimental/generative_agents");
Expand Down
1 change: 1 addition & 0 deletions test-exports-esbuild/src/entrypoints.js
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ import * as cache from "langchain/cache";
import * as stores_doc_in_memory from "langchain/stores/doc/in_memory";
import * as stores_file_in_memory from "langchain/stores/file/in_memory";
import * as stores_message_in_memory from "langchain/stores/message/in_memory";
import * as util_math from "langchain/util/math";
import * as experimental_autogpt from "langchain/experimental/autogpt";
import * as experimental_babyagi from "langchain/experimental/babyagi";
import * as experimental_generative_agents from "langchain/experimental/generative_agents";
Expand Down
1 change: 1 addition & 0 deletions test-exports-esm/src/entrypoints.js
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ import * as cache from "langchain/cache";
import * as stores_doc_in_memory from "langchain/stores/doc/in_memory";
import * as stores_file_in_memory from "langchain/stores/file/in_memory";
import * as stores_message_in_memory from "langchain/stores/message/in_memory";
import * as util_math from "langchain/util/math";
import * as experimental_autogpt from "langchain/experimental/autogpt";
import * as experimental_babyagi from "langchain/experimental/babyagi";
import * as experimental_generative_agents from "langchain/experimental/generative_agents";
Expand Down
1 change: 1 addition & 0 deletions test-exports-vercel/src/entrypoints.js
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ export * from "langchain/cache";
export * from "langchain/stores/doc/in_memory";
export * from "langchain/stores/file/in_memory";
export * from "langchain/stores/message/in_memory";
export * from "langchain/util/math";
export * from "langchain/experimental/autogpt";
export * from "langchain/experimental/babyagi";
export * from "langchain/experimental/generative_agents";
Expand Down
1 change: 1 addition & 0 deletions test-exports-vite/src/entrypoints.js
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ export * from "langchain/cache";
export * from "langchain/stores/doc/in_memory";
export * from "langchain/stores/file/in_memory";
export * from "langchain/stores/message/in_memory";
export * from "langchain/util/math";
export * from "langchain/experimental/autogpt";
export * from "langchain/experimental/babyagi";
export * from "langchain/experimental/generative_agents";
Expand Down

0 comments on commit 1c1274d

Please sign in to comment.