Skip to content

Commit

Permalink
Add file name property to addFiles function in Vectara (langchain-ai#…
Browse files Browse the repository at this point in the history
…2604)

* Add file names to addFiles function

* Fix error handling
  • Loading branch information
3eif authored Sep 12, 2023
1 parent abb9491 commit 7543ad3
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 33 deletions.
16 changes: 11 additions & 5 deletions langchain/src/vectorstores/tests/vectara.int.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import fs from "fs";
import { expect, beforeAll } from "@jest/globals";
import { FakeEmbeddings } from "../../embeddings/fake.js";
import { Document } from "../../document.js";
import { VectaraLibArgs, VectaraStore } from "../vectara.js";
import { VectaraFile, VectaraLibArgs, VectaraStore } from "../vectara.js";

const getDocs = (): Document[] => {
const hashCode = (s: string) =>
Expand Down Expand Up @@ -191,20 +191,26 @@ describe("VectaraStore", () => {
{ filename: "frenchOne.txt", content: frenchOneContent },
];

const blobs = [];
const vectaraFiles: VectaraFile[] = [];
for (const file of files) {
fs.writeFileSync(file.filename, file.content);

const buffer = fs.readFileSync(file.filename);
blobs.push(new Blob([buffer], { type: "text/plain" }));
vectaraFiles.push({
blob: new Blob([buffer], { type: "text/plain" }),
fileName: file.filename,
});
}

const bitcoinBuffer = fs.readFileSync(
"../examples/src/document_loaders/example_data/bitcoin.pdf"
);
blobs.push(new Blob([bitcoinBuffer], { type: "application/pdf" }));
vectaraFiles.push({
blob: new Blob([bitcoinBuffer], { type: "application/pdf" }),
fileName: "bitcoin.pdf",
});

const results = await store.addFiles(blobs);
const results = await store.addFiles(vectaraFiles);

for (const file of files) {
fs.unlinkSync(file.filename);
Expand Down
60 changes: 32 additions & 28 deletions langchain/src/vectorstores/vectara.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,16 @@ interface VectaraCallHeader {
};
}

/**
* Interface for the file objects to be uploaded to Vectara.
*/
export interface VectaraFile {
// The contents of the file to be uploaded.
blob: Blob;
// The name of the file to be uploaded.
fileName: string;
}

/**
* Interface for the filter options used in Vectara API calls.
*/
Expand Down Expand Up @@ -239,55 +249,49 @@ export class VectaraStore extends VectorStore {
* pre-processing and chunking internally in an optimal manner. This method is a wrapper
* to utilize that API within LangChain.
*
* @param filePaths An array of Blob objects representing the files to be uploaded to Vectara.
* @param files An array of VectaraFile objects representing the files and their respective file names to be uploaded to Vectara.
* @param metadata Optional. An array of metadata objects corresponding to each file in the `filePaths` array.
* @returns A Promise that resolves to the number of successfully uploaded files.
*/
async addFiles(
filePaths: Blob[],
files: VectaraFile[],
metadatas: Record<string, unknown> | undefined = undefined
) {
if (this.corpusId.length > 1)
throw new Error("addFiles does not support multiple corpus ids");

let numDocs = 0;

for (const [index, fileBlob] of filePaths.entries()) {
for (const [index, file] of files.entries()) {
const md = metadatas ? metadatas[index] : {};

const data = new FormData();
data.append("file", fileBlob, `file_${index}`);
data.append("file", file.blob, file.fileName);
data.append("doc-metadata", JSON.stringify(md));

try {
const response = await fetch(
`https://api.vectara.io/v1/upload?c=${this.customerId}&o=${this.corpusId[0]}`,
{
method: "POST",
headers: {
"x-api-key": this.apiKey,
},
body: data,
}
);

const result = await response.json();
const { status } = response;

if (status !== 200 && status !== 409) {
throw new Error(
`Vectara API returned status code ${status}: ${result}`
);
} else {
numDocs += 1;
const response = await fetch(
`https://api.vectara.io/v1/upload?c=${this.customerId}&o=${this.corpusId[0]}`,
{
method: "POST",
headers: {
"x-api-key": this.apiKey,
},
body: data,
}
} catch (err) {
console.error(`Failed to upload file at index ${index}:`, err);
);

const { status } = response;
if (status === 409) {
throw new Error(`File at index ${index} already exists in Vectara`);
} else if (status !== 200) {
throw new Error(`Vectara API returned status code ${status}`);
} else {
numDocs += 1;
}
}

if (this.verbose) {
console.log(`Uploaded ${filePaths.length} files to Vectara`);
console.log(`Uploaded ${files.length} files to Vectara`);
}

return numDocs;
Expand Down

0 comments on commit 7543ad3

Please sign in to comment.