scrp/main.py
2024-04-20 18:07:39 +03:00

47 lines
1.7 KiB
Python

import os
from news_scraper import NewsScraper
# Define the scrapers for each news source
almayadeen_scraper = NewsScraper(
url="https://english.almayadeen.net/shortnews",
title_selector="h4",
time_selector="div.post-tag.day-time",
item_selector="div.item",
show_content=False,
enable_js=False
)
middleeasteye_scraper = NewsScraper(
url="https://www.middleeasteye.net/live/israels-war-gaza-live-israel-pounds-rafah-overnight-strikes",
title_selector=".views-field.views-field-title-1 span.field-content a",
time_selector=".views-field.views-field-changed span.field-content",
item_selector=".views-row",
show_content=True,
enable_js=False)
aljazeera_scraper = NewsScraper(
url="https://www.aljazeera.com/news/liveblog/2024/4/20/israels-war-on-gaza-live-israelis-continue-intense-raid-on-nur-shams-camp",
title_selector="h2",
time_selector=".date-relative__time",
item_selector=".card-live",
content_selector=".wysiwyg-content",
show_content=True,
enable_js=True
)
# Write the HTML template to the output file
with open('./template.html', 'r') as template_file:
template_content = template_file.read()
with open('./index.html', 'w') as output_file:
output_file.write(template_content)
news_items = almayadeen_scraper.scrape()
almayadeen_scraper.write_to_html('./template.html', './index.html', news_items, 'Al Mayadeen')
news_items = middleeasteye_scraper.scrape()
middleeasteye_scraper.write_to_html('./template.html', './index.html', news_items, 'Middle East Eye')
news_items = aljazeera_scraper.scrape()
aljazeera_scraper.write_to_html('./template.html', './index.html', news_items, 'Al Jazeera')