docs change to documents

weitsung50110 · Nov 21, 2024 · 501daf2 · 501daf2
1 parent e366725
commit 501daf2
Show file tree

Hide file tree

Showing 4 changed files with 31 additions and 14 deletions.
diff --git a/langchain_rag_doc.py b/langchain_rag_doc.py
@@ -12,14 +12,14 @@
 from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
 
 # 初始化Ollama模型
-llm = Ollama(model='llama3', callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))
+llm = Ollama(model='kenneth85/llama-3-taiwan:8b-instruct', callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))
 
 # 建立文件列表，每個文件包含一段文字內容
 docs = [
     Document(page_content='崴寶Weibert Weiberson的網站：了解更多關於崴寶的資訊，請訪問 https://weitsung50110.github.io'),
-    Document(page_content='崴寶Weibert Weiberson的YouTube：觀看崴寶的最新影片，請訪問 https://youtube.com/@weibert'),
+    Document(page_content='崴寶Weibert Weiberson的YouTube(YT)：觀看崴寶的最新影片，請訪問 https://youtube.com/@weibert'),
     Document(page_content='崴寶Weibert Weiberson的Instagram(IG)：跟隨崴寶的編程和創作，請訪問 https://instagram.com/weibert_coding'),
-    Document(page_content='崴寶Weibert Weiberson的Threads：探索崴寶的更多動態，請訪問 https://threads.net/@weibert_coding')
+    Document(page_content='崴寶Weibert Weiberson的Threads(脆)：探索崴寶的更多動態，請訪問 https://threads.net/@weibert_coding')
 ]
 
 
@@ -31,7 +31,7 @@
 embeddings = OllamaEmbeddings(model="llama3")
 
 # 使用FAISS建立向量資料庫
-vectordb = FAISS.from_documents(docs, embeddings)
+vectordb = FAISS.from_documents(documents, embeddings)
 # 將向量資料庫設為檢索器
 retriever = vectordb.as_retriever()
 
@@ -56,9 +56,13 @@
     })
     # print(response['answer'])
     # context = response['context']
+    # print("-------------------")
+    # print(response)
     print("-------------------")
-    print(response)
-    print("-------------------")
-    print(response['context'])
+    # print(response['context'])
     input_text = input('>>> ')
 
+# https://myapollo.com.tw/blog/langchain-tutorial-retrieval/
+# https://huggingface.co/learn/cookbook/zh-CN/advanced_rag
+# https://chatgpt.com/share/e0f169d7-8620-4468-ba0a-581e7d9f5676
+# https://medium.com/@jackcheang5/%E5%BB%BA%E6%A7%8B%E7%B0%A1%E6%98%93rag%E7%B3%BB%E7%B5%B1-ca4e593f3fed
diff --git a/langchain_rag_doc_chroma.py b/langchain_rag_doc_chroma.py
@@ -20,7 +20,7 @@
     Document(page_content='崴寶Weibert Weiberson的網站：了解更多關於崴寶的資訊，請訪問 https://weitsung50110.github.io'),
     Document(page_content='崴寶Weibert Weiberson的YouTube：觀看崴寶的最新影片，請訪問 https://youtube.com/@weibert'),
     Document(page_content='崴寶Weibert Weiberson的Instagram(IG)：跟隨崴寶的編程和創作，請訪問 https://instagram.com/weibert_coding'),
-    Document(page_content='崴寶Weibert Weiberson的Threads：探索崴寶的更多動態，請訪問 https://threads.net/@weibert_coding')
+    Document(page_content='崴寶Weibert Weiberson的Threads(脆)：探索崴寶的更多動態，請訪問 https://threads.net/@weibert_coding')
 ]
 
 # 設定文本分割器，chunk_size是分割的大小，chunk_overlap是重疊的部分
@@ -60,11 +60,15 @@
         'input': input_text,
         'context': context
     })
-    # print(response['answer'])
+    print(response['answer'])
     # context = response['context']
     print("-------------------")
     print(response)
     print("-------------------")
     print(response['context'])
     input_text = input('>>> ')
 
+# https://myapollo.com.tw/blog/langchain-tutorial-retrieval/
+# https://huggingface.co/learn/cookbook/zh-CN/advanced_rag
+# https://chatgpt.com/share/e0f169d7-8620-4468-ba0a-581e7d9f5676
+# https://medium.com/@jackcheang5/%E5%BB%BA%E6%A7%8B%E7%B0%A1%E6%98%93rag%E7%B3%BB%E7%B5%B1-ca4e593f3fed
diff --git a/langchain_rag_doc_restrict.py b/langchain_rag_doc_restrict.py
@@ -30,7 +30,7 @@
 embeddings = OllamaEmbeddings(model="llama3")
 
 # 使用FAISS建立向量資料庫
-vectordb = FAISS.from_documents(docs, embeddings)
+vectordb = FAISS.from_documents(documents, embeddings)
 # 將向量資料庫設為檢索器
 retriever = vectordb.as_retriever()
 retriever.search_kwargs = {'distance_threshold': 0.8}  # 根據需求調整閾值
@@ -54,11 +54,15 @@
         'input': input_text,
         'context': context
     })
-    # print(response['answer'])
+    print(response['answer'])
     # context = response['context']
     print("-------------------")
     print(response)
     print("-------------------")
     print(response['context'])
     input_text = input('>>> ')
 
+# https://myapollo.com.tw/blog/langchain-tutorial-retrieval/
+# https://huggingface.co/learn/cookbook/zh-CN/advanced_rag
+# https://chatgpt.com/share/e0f169d7-8620-4468-ba0a-581e7d9f5676
+# https://medium.com/@jackcheang5/%E5%BB%BA%E6%A7%8B%E7%B0%A1%E6%98%93rag%E7%B3%BB%E7%B5%B1-ca4e593f3fed
diff --git a/langchain_rag_pdf.py b/langchain_rag_pdf.py
@@ -13,7 +13,7 @@
 from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
 
 # 初始化Ollama模型
-llm = Ollama(model='llama3', callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))
+llm = Ollama(model='wangshenzhi/llama3-8b-chinese-chat-ollama-q8', callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))
 
 # 載入並分割PDF文件
 loader = PyPDFLoader("pdf_test.pdf")
@@ -24,10 +24,10 @@
 documents = text_splitter.split_documents(docs)
 
 # 初始化嵌入模型
-embeddings = OllamaEmbeddings(model="llama3")
+embeddings = OllamaEmbeddings(model="wangshenzhi/llama3-8b-chinese-chat-ollama-q8")
 
 # 使用FAISS建立向量資料庫
-vectordb = FAISS.from_documents(docs, embeddings)
+vectordb = FAISS.from_documents(documents, embeddings)
 # 將向量資料庫設為檢索器
 retriever = vectordb.as_retriever()
 
@@ -52,5 +52,10 @@
     })
     # print(response['answer'])
     # context = response['context']
+
     input_text = input('>>> ')
 
+# https://myapollo.com.tw/blog/langchain-tutorial-retrieval/
+# https://huggingface.co/learn/cookbook/zh-CN/advanced_rag
+# https://chatgpt.com/share/e0f169d7-8620-4468-ba0a-581e7d9f5676
+# https://medium.com/@jackcheang5/%E5%BB%BA%E6%A7%8B%E7%B0%A1%E6%98%93rag%E7%B3%BB%E7%B5%B1-ca4e593f3fed