Fix writing to html

This commit is contained in:
Chay 2024-04-20 18:07:39 +03:00
parent e767426409
commit 9c4ed1a296
3 changed files with 8 additions and 8 deletions

View file

@ -29,10 +29,12 @@ aljazeera_scraper = NewsScraper(
enable_js=True
)
# Scrape and write to HTML for each news source
# Write the HTML template to the output file
with open('./template.html', 'r') as template_file:
template_content = template_file.read()
if os.path.exists('./index.html'):
os.remove('./index.html')
with open('./index.html', 'w') as output_file:
output_file.write(template_content)
news_items = almayadeen_scraper.scrape()
almayadeen_scraper.write_to_html('./template.html', './index.html', news_items, 'Al Mayadeen')

View file

@ -85,12 +85,8 @@ class NewsScraper:
def write_to_html(self, template_path, output_path, news_items, heading):
try:
with open(template_path, 'r') as f:
html_template = f.read()
news_html = '\n'.join(news_items)
html_output = html_template.replace('</body>', f'<h1>{heading}</h1>\n{news_html}\n</body>')
html_output = f'<h2>{heading}</h2>\n{news_html}\n'
with open(output_path, 'a') as f:
f.write(html_output)
except IOError as e:

View file

@ -40,5 +40,7 @@ body {
</style>
</head>
<body>
<h1>altesq scrp</h1>
<p>Scraping a bunch of live pages from news outlets. Refreshes every two minutes.</p>
</body>
</html>