denrad · March 11, 2023 07:12
diff --git a/scrape2.py b/scrape2.py
 import requests
 from bs4 import BeautifulSoup
 import datetime
 import pytz
 import feedgen.feed
 from feedgen.entry import FeedEntry

 # URL to scrape
 url = 'https://gcheb.cap.ru/news/?type=news'

 # Make a request to the URL and get the HTML content
 response = requests.get(url)
 html_content = response.text

 # Parse the HTML content using BeautifulSoup
 soup = BeautifulSoup(html_content, 'html.parser')

 # Create a new RSS feed using feedgen
 feed = feedgen.feed.FeedGenerator()
 feed.title('News from https://gcheb.cap.ru/news/?type=news')
 feed.link(href=url)
 feed.description('Latest news from the Cheboksary City Government website')

 # Find all news items on the page
 news_items = soup.find_all('div', {'class': 'item_news'})

 # Loop through each news item and create an RSS feed entry
 for item in news_items:
    # Get the title, link and date of the news item
    title = item.find('a').get_text()
    link = item.find('a', {'class': 'news-list_title'})['href']
    link = 'https://gcheb.cap.ru' + link
    date = item.find('div', {'class': 'news-list_date'}).text.strip()

    # Convert the date string to a datetime object and set the timezone
    tz = pytz.timezone('Europe/Moscow')
    dt = datetime.datetime.strptime(date, '%H:%M | %d.%m.%Y').replace(tzinfo=tz)

    # Make a request to the news article URL and get the HTML content
    response = requests.get(link)
    html_content = response.text

    # Parse the HTML content using BeautifulSoup
    article_soup = BeautifulSoup(html_content, 'html.parser')

    # Get the content of the news article
    content_div = article_soup.find('div', {'class': 'news_text'})
    content = content_div.text.strip()

    # Create a new RSS feed entry
    entry = FeedEntry()
    entry.title(title)
    entry.link(href=link)
    entry.pubDate(dt)
    entry.description(content)

    # Add the entry to the feed
    feed.add_entry(entry)

 # Generate the RSS feed and save it to a file
 feed.rss_file('news.rss')
	import requests
	from bs4 import BeautifulSoup
	import datetime
	import pytz
	import feedgen.feed
	from feedgen.entry import FeedEntry

	# URL to scrape
	url = 'https://gcheb.cap.ru/news/?type=news'

	# Make a request to the URL and get the HTML content
	response = requests.get(url)
	html_content = response.text

	# Parse the HTML content using BeautifulSoup
	soup = BeautifulSoup(html_content, 'html.parser')

	# Create a new RSS feed using feedgen
	feed = feedgen.feed.FeedGenerator()
	feed.title('News from https://gcheb.cap.ru/news/?type=news')
	feed.link(href=url)
	feed.description('Latest news from the Cheboksary City Government website')

	# Find all news items on the page
	news_items = soup.find_all('div', {'class': 'item_news'})

	# Loop through each news item and create an RSS feed entry
	for item in news_items:
	# Get the title, link and date of the news item
	title = item.find('a').get_text()
	link = item.find('a', {'class': 'news-list_title'})['href']
	link = 'https://gcheb.cap.ru' + link
	date = item.find('div', {'class': 'news-list_date'}).text.strip()

	# Convert the date string to a datetime object and set the timezone
	tz = pytz.timezone('Europe/Moscow')
	dt = datetime.datetime.strptime(date, '%H:%M \| %d.%m.%Y').replace(tzinfo=tz)

	# Make a request to the news article URL and get the HTML content
	response = requests.get(link)
	html_content = response.text

	# Parse the HTML content using BeautifulSoup
	article_soup = BeautifulSoup(html_content, 'html.parser')

	# Get the content of the news article
	content_div = article_soup.find('div', {'class': 'news_text'})
	content = content_div.text.strip()

	# Create a new RSS feed entry
	entry = FeedEntry()
	entry.title(title)
	entry.link(href=link)
	entry.pubDate(dt)
	entry.description(content)

	# Add the entry to the feed
	feed.add_entry(entry)

	# Generate the RSS feed and save it to a file
	feed.rss_file('news.rss')
No results found