Revert "File name sanitization (#630)" (#668)

This reverts commit 169f5e02c8ab13372bb066263424f9ddb31f7f9f.
This commit is contained in:
Simón Fishman 2023-08-29 17:45:47 -07:00 committed by GitHub
parent fae14ddb89
commit b2ca4d395c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -627,12 +627,9 @@
" # Get the next URL from the queue\n",
" url = queue.pop()\n",
" print(url) # for debugging and to see the progress\n",
" \n",
" sanitized_url = re.sub(r'[^\w\s.-]', '_', url)\n",
" file_path = 'text/' + local_domain + '/' + sanitized_url[8:] + \".txt\"\n",
"\n",
" # Save text from the url to a <url>.txt file\n",
" with open(file_path, \"w\") as f:\n",
" with open('text/'+local_domain+'/'+url[8:].replace(\"/\", \"_\") + \".txt\", \"w\") as f:\n",
"\n",
" # Get the text from the URL using BeautifulSoup\n",
" soup = BeautifulSoup(requests.get(url).text, \"html.parser\")\n",