Source code for qrmine.utils
import re
import requests
import os
[docs]
class QRUtils(object):
def __init__(self):
pass
[docs]
@staticmethod
def read_covid_narratives(output_folder):
os.makedirs(output_folder, exist_ok=True)
for doc_count in range(1, 115):
url = f"https://root-url-here/items/show/{doc_count}"
html = requests.get(url).text
# Extract <a class="download-file" href
pattern = r'<a class="download-file" href="(.*?)">'
# find first match
match = re.search(pattern, html)
if match:
# Extract the URL
file_url = match.group(1)
# sanitize the URL
file_url = file_url.replace("&", "&")
print(f"Downloading file from {file_url}")
# Download the file
response = requests.get(file_url)
# Save the file to the output folder
with open(
os.path.join(output_folder, f"doc_{doc_count}.pdf"), "wb"
) as f:
f.write(response.content)
else:
print(f"No match found for document {doc_count}")
if __name__ == "__main__":
# Example usage
qr_utils = QRUtils()
qr_utils.read_covid_narratives("/tmp/covid_narratives")