forked from gkamradt/langchain-tutorials
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Gregory Kamradt
committed
Feb 16, 2023
1 parent
a619518
commit d383c41
Showing
238 changed files
with
44,978 additions
and
0 deletions.
There are no files selected for viewing
225 changes: 225 additions & 0 deletions
225
Untitled Folder/.ipynb_checkpoints/Custom Files Question & Answer-checkpoint.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,225 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"id": "ad66c9aa", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from langchain.embeddings.openai import OpenAIEmbeddings\n", | ||
"from langchain.vectorstores import Chroma\n", | ||
"from langchain.text_splitter import CharacterTextSplitter\n", | ||
"from langchain import OpenAI, VectorDBQA\n", | ||
"from langchain.document_loaders import DirectoryLoader\n", | ||
"import magic\n", | ||
"import os\n", | ||
"import nltk\n", | ||
"\n", | ||
"# os.environ['OPENAI_API_KEY'] = '...'\n", | ||
"\n", | ||
"# nltk.download('averaged_perceptron_tagger')\n", | ||
"\n", | ||
"# pip install unstructured\n", | ||
"# Other dependencies to install https://langchain.readthedocs.io/en/latest/modules/document_loaders/examples/unstructured_file.html\n", | ||
"# pip install python-magic-bin\n", | ||
"# pip install chromadb" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"id": "e8a28a08", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"loader = DirectoryLoader('../data/PaulGrahamEssaySmall/', glob='**/*.txt')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 6, | ||
"id": "6a9740d9", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"documents = loader.load()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 8, | ||
"id": "3153f864", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 9, | ||
"id": "a792c6fb", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"texts = text_splitter.split_documents(documents)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 11, | ||
"id": "d2cad0de", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"embeddings = OpenAIEmbeddings(openai_api_key=os.environ['OPENAI_API_KEY'])" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 12, | ||
"id": "734ed265", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Running Chroma using direct local API.\n", | ||
"Using DuckDB in-memory for database. Data will be transient.\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"docsearch = Chroma.from_documents(texts, embeddings)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 13, | ||
"id": "817a0ece", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"qa = VectorDBQA.from_chain_type(llm=OpenAI(), chain_type=\"stuff\", vectorstore=docsearch)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 14, | ||
"id": "5533a9e1", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"' McCarthy discovered how to use a handful of simple operators and a notation for functions to build a whole programming language, which he called Lisp.'" | ||
] | ||
}, | ||
"execution_count": 14, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"query = \"What did McCarthy discover?\"\n", | ||
"qa.run(query)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "f22231c5", | ||
"metadata": {}, | ||
"source": [ | ||
"### Sources" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 15, | ||
"id": "694343cb", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"qa = VectorDBQA.from_chain_type(llm=OpenAI(), chain_type=\"stuff\", vectorstore=docsearch, return_source_documents=True)\n", | ||
"query = \"What did McCarthy discover?\"\n", | ||
"result = qa({\"query\": query})" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 16, | ||
"id": "bec53323", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"' McCarthy discovered that a programming language could be built from a handful of simple operators and a notation for functions. He called this language Lisp, for \"List Processing,\" because one of his key ideas was to use a simple data structure called a list for both code and data.'" | ||
] | ||
}, | ||
"execution_count": 16, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"result['result']" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 17, | ||
"id": "32246ae3", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"[Document(page_content=\"our own time. It seems to me that there have been\\n\\ntwo really clean, consistent models of programming so\\n\\nfar: the C model and the Lisp model.\\n\\nThese two seem points of high ground, with swampy lowlands\\n\\nbetween them. As computers have grown more powerful,\\n\\nthe new languages being developed have been moving\\n\\nsteadily toward the Lisp model. A popular recipe\\n\\nfor new programming languages in the past 20 years\\n\\nhas been to take the C model of computing and add to\\n\\nit, piecemeal, parts taken from the Lisp model,\\n\\nlike runtime typing and garbage collection.In this article I'm going to try to explain in the\\n\\nsimplest possible terms what McCarthy discovered.\\n\\nThe point is not just to learn about an interesting\\n\\ntheoretical result someone figured out forty years ago,\\n\\nbut to show where languages are heading.\\n\\nThe unusual thing about Lisp \\x97 in fact, the defining\\n\\nquality of Lisp \\x97 is that it can be written in\\n\\nitself. To understand what McCarthy meant by this,\\n\\nwe're going to retrace his steps, with his mathematical\", lookup_str='', metadata={'source': '../data/PaulGrahamEssaySmall/rootsoflisp.txt'}, lookup_index=0),\n", | ||
" Document(page_content='May 2001\\n\\n(I wrote this article to help myself understand exactly\\n\\nwhat McCarthy discovered. You don\\'t need to know this stuff\\n\\nto program in Lisp, but it should be helpful to\\n\\nanyone who wants to\\n\\nunderstand the essence of Lisp \\x97 both in the sense of its\\n\\norigins and its semantic core. The fact that it has such a core\\n\\nis one of Lisp\\'s distinguishing features, and the reason why,\\n\\nunlike other languages, Lisp has dialects.)In 1960, John\\n\\nMcCarthy published a remarkable paper in\\n\\nwhich he did for programming something like what Euclid did for\\n\\ngeometry. He showed how, given a handful of simple\\n\\noperators and a notation for functions, you can\\n\\nbuild a whole programming language.\\n\\nHe called this language Lisp, for \"List Processing,\"\\n\\nbecause one of his key ideas was to use a simple\\n\\ndata structure called a list for both\\n\\ncode and data.It\\'s worth understanding what McCarthy discovered, not\\n\\njust as a landmark in the history of computers, but as\\n\\na model for what programming is tending to become in', lookup_str='', metadata={'source': '../data/PaulGrahamEssaySmall/rootsoflisp.txt'}, lookup_index=0),\n", | ||
" Document(page_content=\"January 2023(Someone fed my essays into GPT to make something that could answer\\n\\nquestions based on them, then asked it where good ideas come from. The\\n\\nanswer was ok, but not what I would have said. This is what I would have said.)The way to get new ideas is to notice anomalies: what seems strange,\\n\\nor missing, or broken? You can see anomalies in everyday life (much\\n\\nof standup comedy is based on this), but the best place to look for\\n\\nthem is at the frontiers of knowledge.Knowledge grows fractally.\\n\\nFrom a distance its edges look smooth, but when you learn enough\\n\\nto get close to one, you'll notice it's full of gaps. These gaps\\n\\nwill seem obvious; it will seem inexplicable that no one has tried\\n\\nx or wondered about y. In the best case, exploring such gaps yields\\n\\nwhole new fractal buds.\", lookup_str='', metadata={'source': '../data/PaulGrahamEssaySmall/getideas.txt'}, lookup_index=0),\n", | ||
" Document(page_content=\"November 2019If you discover something new, there's a significant chance you'll be\\n\\naccused of some form of heresy.To discover new things, you have\\n\\nto work on ideas that are good but non-obvious; if an idea is\\n\\nobviously good, other people are probably already working on it.\\n\\nOne common way for a good idea to be non-obvious is for it to be hidden in the\\n\\nshadow of some mistaken assumption that people are very attached to.\\n\\nBut anything you discover from working on such an idea will tend to\\n\\ncontradict the mistaken assumption that was concealing it. And you\\n\\nwill thus get a lot of heat from people attached to the mistaken\\n\\nassumption. Galileo and Darwin are famous examples of this phenomenon,\\n\\nbut it's probably always an ingredient in the resistance to new\\n\\nideas.So it's particularly dangerous for an organization or society to\\n\\nhave a culture of pouncing on heresy. When you suppress heresies,\\n\\nyou don't just prevent people from contradicting the mistaken\", lookup_str='', metadata={'source': '../data/PaulGrahamEssaySmall/nov.txt'}, lookup_index=0)]" | ||
] | ||
}, | ||
"execution_count": 17, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"result['source_documents']" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "d1415339", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.9.13" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
Oops, something went wrong.