Skip to content

Commit

Permalink
docs change to documents
Browse files Browse the repository at this point in the history
  • Loading branch information
weitsung50110 committed Nov 21, 2024
1 parent e366725 commit 501daf2
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 14 deletions.
18 changes: 11 additions & 7 deletions langchain_rag_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

# 初始化Ollama模型
llm = Ollama(model='llama3', callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))
llm = Ollama(model='kenneth85/llama-3-taiwan:8b-instruct', callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))

# 建立文件列表,每個文件包含一段文字內容
docs = [
Document(page_content='崴寶Weibert Weiberson的網站:了解更多關於崴寶的資訊,請訪問 https://weitsung50110.github.io'),
Document(page_content='崴寶Weibert Weiberson的YouTube:觀看崴寶的最新影片,請訪問 https://youtube.com/@weibert'),
Document(page_content='崴寶Weibert Weiberson的YouTube(YT):觀看崴寶的最新影片,請訪問 https://youtube.com/@weibert'),
Document(page_content='崴寶Weibert Weiberson的Instagram(IG):跟隨崴寶的編程和創作,請訪問 https://instagram.com/weibert_coding'),
Document(page_content='崴寶Weibert Weiberson的Threads:探索崴寶的更多動態,請訪問 https://threads.net/@weibert_coding')
Document(page_content='崴寶Weibert Weiberson的Threads(脆):探索崴寶的更多動態,請訪問 https://threads.net/@weibert_coding')
]


Expand All @@ -31,7 +31,7 @@
embeddings = OllamaEmbeddings(model="llama3")

# 使用FAISS建立向量資料庫
vectordb = FAISS.from_documents(docs, embeddings)
vectordb = FAISS.from_documents(documents, embeddings)
# 將向量資料庫設為檢索器
retriever = vectordb.as_retriever()

Expand All @@ -56,9 +56,13 @@
})
# print(response['answer'])
# context = response['context']
# print("-------------------")
# print(response)
print("-------------------")
print(response)
print("-------------------")
print(response['context'])
# print(response['context'])
input_text = input('>>> ')

# https://myapollo.com.tw/blog/langchain-tutorial-retrieval/
# https://huggingface.co/learn/cookbook/zh-CN/advanced_rag
# https://chatgpt.com/share/e0f169d7-8620-4468-ba0a-581e7d9f5676
# https://medium.com/@jackcheang5/%E5%BB%BA%E6%A7%8B%E7%B0%A1%E6%98%93rag%E7%B3%BB%E7%B5%B1-ca4e593f3fed
8 changes: 6 additions & 2 deletions langchain_rag_doc_chroma.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
Document(page_content='崴寶Weibert Weiberson的網站:了解更多關於崴寶的資訊,請訪問 https://weitsung50110.github.io'),
Document(page_content='崴寶Weibert Weiberson的YouTube:觀看崴寶的最新影片,請訪問 https://youtube.com/@weibert'),
Document(page_content='崴寶Weibert Weiberson的Instagram(IG):跟隨崴寶的編程和創作,請訪問 https://instagram.com/weibert_coding'),
Document(page_content='崴寶Weibert Weiberson的Threads:探索崴寶的更多動態,請訪問 https://threads.net/@weibert_coding')
Document(page_content='崴寶Weibert Weiberson的Threads(脆):探索崴寶的更多動態,請訪問 https://threads.net/@weibert_coding')
]

# 設定文本分割器,chunk_size是分割的大小,chunk_overlap是重疊的部分
Expand Down Expand Up @@ -60,11 +60,15 @@
'input': input_text,
'context': context
})
# print(response['answer'])
print(response['answer'])
# context = response['context']
print("-------------------")
print(response)
print("-------------------")
print(response['context'])
input_text = input('>>> ')

# https://myapollo.com.tw/blog/langchain-tutorial-retrieval/
# https://huggingface.co/learn/cookbook/zh-CN/advanced_rag
# https://chatgpt.com/share/e0f169d7-8620-4468-ba0a-581e7d9f5676
# https://medium.com/@jackcheang5/%E5%BB%BA%E6%A7%8B%E7%B0%A1%E6%98%93rag%E7%B3%BB%E7%B5%B1-ca4e593f3fed
8 changes: 6 additions & 2 deletions langchain_rag_doc_restrict.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
embeddings = OllamaEmbeddings(model="llama3")

# 使用FAISS建立向量資料庫
vectordb = FAISS.from_documents(docs, embeddings)
vectordb = FAISS.from_documents(documents, embeddings)
# 將向量資料庫設為檢索器
retriever = vectordb.as_retriever()
retriever.search_kwargs = {'distance_threshold': 0.8} # 根據需求調整閾值
Expand All @@ -54,11 +54,15 @@
'input': input_text,
'context': context
})
# print(response['answer'])
print(response['answer'])
# context = response['context']
print("-------------------")
print(response)
print("-------------------")
print(response['context'])
input_text = input('>>> ')

# https://myapollo.com.tw/blog/langchain-tutorial-retrieval/
# https://huggingface.co/learn/cookbook/zh-CN/advanced_rag
# https://chatgpt.com/share/e0f169d7-8620-4468-ba0a-581e7d9f5676
# https://medium.com/@jackcheang5/%E5%BB%BA%E6%A7%8B%E7%B0%A1%E6%98%93rag%E7%B3%BB%E7%B5%B1-ca4e593f3fed
11 changes: 8 additions & 3 deletions langchain_rag_pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

# 初始化Ollama模型
llm = Ollama(model='llama3', callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))
llm = Ollama(model='wangshenzhi/llama3-8b-chinese-chat-ollama-q8', callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))

# 載入並分割PDF文件
loader = PyPDFLoader("pdf_test.pdf")
Expand All @@ -24,10 +24,10 @@
documents = text_splitter.split_documents(docs)

# 初始化嵌入模型
embeddings = OllamaEmbeddings(model="llama3")
embeddings = OllamaEmbeddings(model="wangshenzhi/llama3-8b-chinese-chat-ollama-q8")

# 使用FAISS建立向量資料庫
vectordb = FAISS.from_documents(docs, embeddings)
vectordb = FAISS.from_documents(documents, embeddings)
# 將向量資料庫設為檢索器
retriever = vectordb.as_retriever()

Expand All @@ -52,5 +52,10 @@
})
# print(response['answer'])
# context = response['context']

input_text = input('>>> ')

# https://myapollo.com.tw/blog/langchain-tutorial-retrieval/
# https://huggingface.co/learn/cookbook/zh-CN/advanced_rag
# https://chatgpt.com/share/e0f169d7-8620-4468-ba0a-581e7d9f5676
# https://medium.com/@jackcheang5/%E5%BB%BA%E6%A7%8B%E7%B0%A1%E6%98%93rag%E7%B3%BB%E7%B5%B1-ca4e593f3fed

0 comments on commit 501daf2

Please sign in to comment.