diff --git a/main.py b/main.py index fb403d7..bf95f5c 100644 --- a/main.py +++ b/main.py @@ -15,42 +15,50 @@ class NewsScraper: self.content_selector = content_selector def scrape(self): - if self.enable_js: - session = HTMLSession() - response = session.get(self.url) - response.html.render(timeout=20) # Render the JavaScript content (adjust the timeout as needed) - html = response.html.html - else: - response = requests.get(self.url) - html = response.content + try: + if self.enable_js: + session = HTMLSession() + response = session.get(self.url) + response.html.render(timeout=20) # Render the JavaScript content (adjust the timeout as needed) + html = response.html.html + else: + response = requests.get(self.url) + html = response.content - soup = BeautifulSoup(html, 'html.parser') + soup = BeautifulSoup(html, 'html.parser') - news_items = [] + news_items = [] - for item in soup.select(self.item_selector): - title_element = item.select_one(self.title_selector) - time_element = item.select_one(self.time_selector) + for item in soup.select(self.item_selector): + title_element = item.select_one(self.title_selector) + time_element = item.select_one(self.time_selector) - if title_element and time_element: - title = title_element.text.strip() - time = time_element.text.strip() + if title_element and time_element: + title = title_element.text.strip() + time = time_element.text.strip() - if self.show_content: - if self.content_selector: - content_element = item.select_one(self.content_selector) - else: - content_element = item.find('div', class_='field-content') + if self.show_content: + if self.content_selector: + content_element = item.select_one(self.content_selector) + else: + content_element = item.find('div', class_='field-content') - if content_element: - content = content_element.decode_contents(formatter="html") - news_item_html = """ -
{time}
-{time}
+{time}
+{time}
{time}
-