47 lines
1.7 KiB
Python
47 lines
1.7 KiB
Python
import os
|
|
from news_scraper import NewsScraper
|
|
|
|
# Define the scrapers for each news source
|
|
almayadeen_scraper = NewsScraper(
|
|
url="https://english.almayadeen.net/shortnews",
|
|
title_selector="h4",
|
|
time_selector="div.post-tag.day-time",
|
|
item_selector="div.item",
|
|
show_content=False,
|
|
enable_js=False
|
|
)
|
|
|
|
middleeasteye_scraper = NewsScraper(
|
|
url="https://www.middleeasteye.net/live/israels-war-gaza-live-israel-pounds-rafah-overnight-strikes",
|
|
title_selector=".views-field.views-field-title-1 span.field-content a",
|
|
time_selector=".views-field.views-field-changed span.field-content",
|
|
item_selector=".views-row",
|
|
show_content=True,
|
|
enable_js=False)
|
|
|
|
aljazeera_scraper = NewsScraper(
|
|
url="https://www.aljazeera.com/news/liveblog/2024/4/20/israels-war-on-gaza-live-israelis-continue-intense-raid-on-nur-shams-camp",
|
|
title_selector="h2",
|
|
time_selector=".date-relative__time",
|
|
item_selector=".card-live",
|
|
content_selector=".wysiwyg-content",
|
|
show_content=True,
|
|
enable_js=True
|
|
)
|
|
|
|
# Write the HTML template to the output file
|
|
with open('./template.html', 'r') as template_file:
|
|
template_content = template_file.read()
|
|
|
|
with open('./index.html', 'w') as output_file:
|
|
output_file.write(template_content)
|
|
|
|
news_items = almayadeen_scraper.scrape()
|
|
almayadeen_scraper.write_to_html('./template.html', './index.html', news_items, 'Al Mayadeen')
|
|
|
|
news_items = middleeasteye_scraper.scrape()
|
|
middleeasteye_scraper.write_to_html('./template.html', './index.html', news_items, 'Middle East Eye')
|
|
|
|
news_items = aljazeera_scraper.scrape()
|
|
aljazeera_scraper.write_to_html('./template.html', './index.html', news_items, 'Al Jazeera')
|