Skip to content

Commit

Permalink
Move docstore to store (breaking change, but this isn't an api we adv…
Browse files Browse the repository at this point in the history
…ertise for using directly)
  • Loading branch information
nfcampos committed Jun 9, 2023
1 parent 02bb9fc commit f6023ec
Show file tree
Hide file tree
Showing 10 changed files with 96 additions and 96 deletions.
4 changes: 2 additions & 2 deletions langchain/scripts/create-entrypoints.js
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,6 @@ const entrypoints = {
"memory/zep": "memory/zep",
// document
document: "document",
// docstore
docstore: "docstore/index",
// document_loaders
document_loaders: "document_loaders/index",
"document_loaders/base": "document_loaders/base",
Expand Down Expand Up @@ -144,6 +142,8 @@ const entrypoints = {
"cache/momento": "cache/momento",
"cache/redis": "cache/redis",
// stores
"stores/doc/in_memory": "stores/doc/in_memory",
"stores/doc/gcs": "stores/doc/gcs",
"stores/file/in_memory": "stores/file/in_memory",
"stores/file/node": "stores/file/node",
"stores/message/dynamodb": "stores/message/dynamodb",
Expand Down
13 changes: 0 additions & 13 deletions langchain/src/docstore/base.ts

This file was deleted.

55 changes: 0 additions & 55 deletions langchain/src/docstore/in_memory.ts

This file was deleted.

3 changes: 0 additions & 3 deletions langchain/src/docstore/index.ts

This file was deleted.

6 changes: 6 additions & 0 deletions langchain/src/schema/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -215,3 +215,9 @@ export abstract class BaseEntityStore {

abstract clear(): Promise<void>;
}

export abstract class Docstore {
abstract search(search: string): Promise<Document>;

abstract add(texts: Record<string, Document>): Promise<void>;
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { Storage, File } from "@google-cloud/storage";

import { Document } from "../document.js";
import { Docstore } from "./base.js";
import { Document } from "../../document.js";
import { Docstore } from "../../schema/index.js";

export interface GoogleCloudStorageDocstoreConfiguration {
/** The identifier for the GCS bucket */
Expand Down
63 changes: 63 additions & 0 deletions langchain/src/stores/doc/in_memory.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import { Document } from "../../document.js";
import { Docstore } from "../../schema/index.js";

export class InMemoryDocstore extends Docstore {
_docs: Map<string, Document>;

constructor(docs?: Map<string, Document>) {
super();
this._docs = docs ?? new Map();
}

async search(search: string): Promise<Document> {
const result = this._docs.get(search);
if (!result) {
throw new Error(`ID ${search} not found.`);
} else {
return result;
}
}

async add(texts: Record<string, Document>): Promise<void> {
const keys = [...this._docs.keys()];
const overlapping = Object.keys(texts).filter((x) => keys.includes(x));

if (overlapping.length > 0) {
throw new Error(`Tried to add ids that already exist: ${overlapping}`);
}

for (const [key, value] of Object.entries(texts)) {
this._docs.set(key, value);
}
}
}

export class SynchronousInMemoryDocstore {
_docs: Map<string, Document>;

constructor(docs?: Map<string, Document>) {
this._docs = docs ?? new Map();
}

search(search: string): Document {
const result = this._docs.get(search);
if (!result) {
throw new Error(`ID ${search} not found.`);
} else {
return result;
}
}

add(texts: Record<string, Document>): void {
const keys = [...this._docs.keys()];
const overlapping = Object.keys(texts).filter((x) => keys.includes(x));

if (overlapping.length > 0) {
throw new Error(`Tried to add ids that already exist: ${overlapping}`);
}

for (const [key, value] of Object.entries(texts)) {
this._docs.set(key, value);
}
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { describe, test, expect } from "@jest/globals";
import { GoogleCloudStorageDocstore } from "../googlecloudstorage.js";
import { Document } from "../../document.js";
import { GoogleCloudStorageDocstore } from "../gcs.js";
import { Document } from "../../../document.js";

describe("GoogleCloudStorageDocstore", () => {
const bucket = "INSERT_BUCKET_HERE";
Expand Down
20 changes: 10 additions & 10 deletions langchain/src/vectorstores/faiss.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@ import * as uuid from "uuid";
import { Embeddings } from "../embeddings/base.js";
import { SaveableVectorStore } from "./base.js";
import { Document } from "../document.js";
import { InMemoryDocstore } from "../docstore/index.js";
import { SynchronousInMemoryDocstore } from "../stores/doc/in_memory.js";

export interface FaissLibArgs {
docstore?: InMemoryDocstore;
docstore?: SynchronousInMemoryDocstore;
index?: IndexFlatL2;
mapping?: Record<number, string>;
}
Expand All @@ -17,7 +17,7 @@ export class FaissStore extends SaveableVectorStore {

_mapping: Record<number, string>;

docstore: InMemoryDocstore;
docstore: SynchronousInMemoryDocstore;

args: FaissLibArgs;

Expand All @@ -27,7 +27,7 @@ export class FaissStore extends SaveableVectorStore {
this._index = args.index;
this._mapping = args.mapping ?? {};
this.embeddings = embeddings;
this.docstore = args?.docstore ?? new InMemoryDocstore();
this.docstore = args?.docstore ?? new SynchronousInMemoryDocstore();
}

async addDocuments(documents: Document[]): Promise<void> {
Expand Down Expand Up @@ -70,7 +70,7 @@ export class FaissStore extends SaveableVectorStore {
);
}

const docstoreSize = this.docstore.count;
const docstoreSize = this.index.ntotal();
for (let i = 0; i < vectors.length; i += 1) {
const documentId = uuid.v4();
const id = docstoreSize + i;
Expand Down Expand Up @@ -138,7 +138,7 @@ export class FaissStore extends SaveableVectorStore {
readStore(directory),
readIndex(directory),
]);
const docstore = new InMemoryDocstore(new Map(docstoreFiles));
const docstore = new SynchronousInMemoryDocstore(new Map(docstoreFiles));
return new this(embeddings, { docstore, index, mapping });
}

Expand All @@ -159,8 +159,8 @@ export class FaissStore extends SaveableVectorStore {
class PyInMemoryDocstore {
_dict: Map<string, PyDocument>;

toInMemoryDocstore(): InMemoryDocstore {
const s = new InMemoryDocstore();
toInMemoryDocstore(): SynchronousInMemoryDocstore {
const s = new SynchronousInMemoryDocstore();
for (const [key, value] of Object.entries(this._dict)) {
s._docs.set(key, value.toDocument());
}
Expand Down Expand Up @@ -215,7 +215,7 @@ export class FaissStore extends SaveableVectorStore {
metadatas: object[] | object,
embeddings: Embeddings,
dbConfig?: {
docstore?: InMemoryDocstore;
docstore?: SynchronousInMemoryDocstore;
}
): Promise<FaissStore> {
const docs: Document[] = [];
Expand All @@ -234,7 +234,7 @@ export class FaissStore extends SaveableVectorStore {
docs: Document[],
embeddings: Embeddings,
dbConfig?: {
docstore?: InMemoryDocstore;
docstore?: SynchronousInMemoryDocstore;
}
): Promise<FaissStore> {
const args: FaissLibArgs = {
Expand Down
20 changes: 11 additions & 9 deletions langchain/src/vectorstores/hnswlib.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@ import type {
import { Embeddings } from "../embeddings/base.js";
import { SaveableVectorStore } from "./base.js";
import { Document } from "../document.js";
import { InMemoryDocstore } from "../docstore/index.js";
import { SynchronousInMemoryDocstore } from "../stores/doc/in_memory.js";

export interface HNSWLibBase {
space: SpaceName;
numDimensions?: number;
}

export interface HNSWLibArgs extends HNSWLibBase {
docstore?: InMemoryDocstore;
docstore?: SynchronousInMemoryDocstore;
index?: HierarchicalNSWT;
}

Expand All @@ -22,7 +22,7 @@ export class HNSWLib extends SaveableVectorStore {

_index?: HierarchicalNSWT;

docstore: InMemoryDocstore;
docstore: SynchronousInMemoryDocstore;

args: HNSWLibBase;

Expand All @@ -31,7 +31,7 @@ export class HNSWLib extends SaveableVectorStore {
this._index = args.index;
this.args = args;
this.embeddings = embeddings;
this.docstore = args?.docstore ?? new InMemoryDocstore();
this.docstore = args?.docstore ?? new SynchronousInMemoryDocstore();
}

async addDocuments(documents: Document[]): Promise<void> {
Expand Down Expand Up @@ -101,11 +101,13 @@ export class HNSWLib extends SaveableVectorStore {
if (needed > capacity) {
this.index.resizeIndex(needed);
}
const docstoreSize = this.docstore.count;
const docstoreSize = this.index.getCurrentCount();
const toSave: Record<string, Document> = {};
for (let i = 0; i < vectors.length; i += 1) {
this.index.addPoint(vectors[i], docstoreSize + i);
this.docstore.add({ [docstoreSize + i]: documents[i] });
toSave[docstoreSize + i] = documents[i];
}
this.docstore.add(toSave);
}

async similaritySearchVectorWithScore(
Expand Down Expand Up @@ -184,7 +186,7 @@ export class HNSWLib extends SaveableVectorStore {
.then(JSON.parse),
index.readIndex(path.join(directory, "hnswlib.index")),
]);
args.docstore = new InMemoryDocstore(new Map(docstoreFiles));
args.docstore = new SynchronousInMemoryDocstore(new Map(docstoreFiles));

args.index = index;

Expand All @@ -196,7 +198,7 @@ export class HNSWLib extends SaveableVectorStore {
metadatas: object[] | object,
embeddings: Embeddings,
dbConfig?: {
docstore?: InMemoryDocstore;
docstore?: SynchronousInMemoryDocstore;
}
): Promise<HNSWLib> {
const docs: Document[] = [];
Expand All @@ -215,7 +217,7 @@ export class HNSWLib extends SaveableVectorStore {
docs: Document[],
embeddings: Embeddings,
dbConfig?: {
docstore?: InMemoryDocstore;
docstore?: SynchronousInMemoryDocstore;
}
): Promise<HNSWLib> {
const args: HNSWLibArgs = {
Expand Down

0 comments on commit f6023ec

Please sign in to comment.