mirror of
https://github.com/james-m-jordan/openai-cookbook.git
synced 2025-05-09 19:32:38 +00:00
commit
f6af261085
@ -135,7 +135,7 @@ def crawl(url):
|
|||||||
print(url) # for debugging and to see the progress
|
print(url) # for debugging and to see the progress
|
||||||
|
|
||||||
# Save text from the url to a <url>.txt file
|
# Save text from the url to a <url>.txt file
|
||||||
with open('text/'+local_domain+'/'+url[8:].replace("/", "_") + ".txt", "w") as f:
|
with open('text/'+local_domain+'/'+url[8:].replace("/", "_") + ".txt", "w", encoding="UTF-8") as f:
|
||||||
|
|
||||||
# Get the text from the URL using BeautifulSoup
|
# Get the text from the URL using BeautifulSoup
|
||||||
soup = BeautifulSoup(requests.get(url).text, "html.parser")
|
soup = BeautifulSoup(requests.get(url).text, "html.parser")
|
||||||
@ -181,7 +181,7 @@ texts=[]
|
|||||||
for file in os.listdir("text/" + domain + "/"):
|
for file in os.listdir("text/" + domain + "/"):
|
||||||
|
|
||||||
# Open the file and read the text
|
# Open the file and read the text
|
||||||
with open("text/" + domain + "/" + file, "r") as f:
|
with open("text/" + domain + "/" + file, "r", encoding="UTF-8") as f:
|
||||||
text = f.read()
|
text = f.read()
|
||||||
|
|
||||||
# Omit the first 11 lines and the last 4 lines, then replace -, _, and #update with spaces.
|
# Omit the first 11 lines and the last 4 lines, then replace -, _, and #update with spaces.
|
||||||
@ -379,4 +379,4 @@ def answer_question(
|
|||||||
|
|
||||||
print(answer_question(df, question="What day is it?", debug=False))
|
print(answer_question(df, question="What day is it?", debug=False))
|
||||||
|
|
||||||
print(answer_question(df, question="What is our newest embeddings model?"))
|
print(answer_question(df, question="What is our newest embeddings model?"))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user