From e42f68947e9ab859a33401e31a8036fb15cab4df Mon Sep 17 00:00:00 2001
From: lelkins <lelkins@noreply.altesq.net>
Date: Sat, 20 Apr 2024 14:07:34 +0300
Subject: [PATCH] add error handler by internal request

---
 main.py | 105 ++++++++++++++++++++++++++++++++------------------------
 1 file changed, 60 insertions(+), 45 deletions(-)
diff --git a/main.py b/main.py
index fb403d7..bf95f5c 100644
--- a/main.py
+++ b/main.py
@@ -15,42 +15,50 @@ class NewsScraper:
         self.content_selector = content_selector
 
     def scrape(self):
-        if self.enable_js:
-            session = HTMLSession()
-            response = session.get(self.url)
-            response.html.render(timeout=20)  # Render the JavaScript content (adjust the timeout as needed)
-            html = response.html.html
-        else:
-            response = requests.get(self.url)
-            html = response.content
+        try:
+            if self.enable_js:
+                session = HTMLSession()
+                response = session.get(self.url)
+                response.html.render(timeout=20)  # Render the JavaScript content (adjust the timeout as needed)
+                html = response.html.html
+            else:
+                response = requests.get(self.url)
+                html = response.content
 
-        soup = BeautifulSoup(html, 'html.parser')
+            soup = BeautifulSoup(html, 'html.parser')
 
-        news_items = []
+            news_items = []
 
-        for item in soup.select(self.item_selector):
-            title_element = item.select_one(self.title_selector)
-            time_element = item.select_one(self.time_selector)
+            for item in soup.select(self.item_selector):
+                title_element = item.select_one(self.title_selector)
+                time_element = item.select_one(self.time_selector)
 
-            if title_element and time_element:
-                title = title_element.text.strip()
-                time = time_element.text.strip()
+                if title_element and time_element:
+                    title = title_element.text.strip()
+                    time = time_element.text.strip()
 
-                if self.show_content:
-                    if self.content_selector:
-                        content_element = item.select_one(self.content_selector)
-                    else:
-                        content_element = item.find('div', class_='field-content')
+                    if self.show_content:
+                        if self.content_selector:
+                            content_element = item.select_one(self.content_selector)
+                        else:
+                            content_element = item.find('div', class_='field-content')
 
-                    if content_element:
-                        content = content_element.decode_contents(formatter="html")
-                        news_item_html = """
-                            <div class="news-item">
-                                <h2 class="news-title">{title}</h2>
-                                <p class="news-time">{time}</p>
-                                <div class="news-content">{content}</div>
-                            </div>
-                        """.format(title=title, time=time, content=content)
+                        if content_element:
+                            content = content_element.decode_contents(formatter="html")
+                            news_item_html = """
+                                <div class="news-item">
+                                    <h2 class="news-title">{title}</h2>
+                                    <p class="news-time">{time}</p>
+                                    <div class="news-content">{content}</div>
+                                </div>
+                            """.format(title=title, time=time, content=content)
+                        else:
+                            news_item_html = """
+                                <div class="news-item">
+                                    <h2 class="news-title">{title}</h2>
+                                    <p class="news-time">{time}</p>
+                                </div>
+                            """.format(title=title, time=time)
                     else:
                         news_item_html = """
                             <div class="news-item">
@@ -58,27 +66,34 @@ class NewsScraper:
                                 <p class="news-time">{time}</p>
                             </div>
                         """.format(title=title, time=time)
-                else:
-                    news_item_html = """
-                        <div class="news-item">
-                            <h2 class="news-title">{title}</h2>
-                            <p class="news-time">{time}</p>
-                        </div>
-                    """.format(title=title, time=time)
 
-                news_items.append(news_item_html)
+                    news_items.append(news_item_html)
 
-        return news_items
+            return news_items
+        except requests.exceptions.RequestException as e:
+            print(f"Error fetching URL: {e}")
+            return []
+        except AttributeError as e:
+            print(f"Error parsing HTML: {e}")
+            return []
+        except Exception as e:
+            print(f"Unexpected error: {e}")
+            return []
 
     def write_to_html(self, template_path, output_path, news_items, heading):
-        with open(template_path, 'r') as f:
-            html_template = f.read()
+        try:
+            with open(template_path, 'r') as f:
+                html_template = f.read()
 
-        news_html = '\n'.join(news_items)
-        html_output = html_template.replace('</body>', f'<h1>{heading}</h1>\n{news_html}\n</body>')
+            news_html = '\n'.join(news_items)
+            html_output = html_template.replace('</body>', f'<h1>{heading}</h1>\n{news_html}\n</body>')
 
-        with open(output_path, 'a') as f:
-            f.write(html_output)
+            with open(output_path, 'a') as f:
+                f.write(html_output)
+        except IOError as e:
+            print(f"Error writing to file: {e}")
+        except Exception as e:
+            print(f"Unexpected error: {e}")
 
 # Define the scrapers for each news source