-
-
Save pohzipohzi/ad7942fc5545675022c1f31123e64c0c to your computer and use it in GitHub Desktop.
| from bs4 import BeautifulSoup | |
| import requests | |
| import datetime | |
| import logging | |
| import csv | |
| def setLogger(): | |
| logging.basicConfig(level=logging.INFO, | |
| format='%(asctime)s - %(levelname)s - %(message)s', | |
| filename='logs_file', | |
| filemode='w') | |
| console = logging.StreamHandler() | |
| formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') | |
| console.setFormatter(formatter) | |
| logging.getLogger('').addHandler(console) | |
| def getEconomicCalendar(startlink,endlink): | |
| # write to console current status | |
| logging.info("Scraping data for link: {}".format(startlink)) | |
| # get the page and make the soup | |
| baseURL = "https://www.forexfactory.com/" | |
| r = requests.get(baseURL + startlink) | |
| data = r.text | |
| soup = BeautifulSoup(data, "lxml") | |
| # get and parse table data, ignoring details and graph | |
| table = soup.find("table", class_="calendar__table") | |
| # do not use the ".calendar__row--grey" css selector (reserved for historical data) | |
| trs = table.select("tr.calendar__row.calendar_row") | |
| fields = ["date","time","currency","impact","event","actual","forecast","previous"] | |
| # some rows do not have a date (cells merged) | |
| curr_year = startlink[-4:] | |
| curr_date = "" | |
| curr_time = "" | |
| for tr in trs: | |
| # fields may mess up sometimes, see Tue Sep 25 2:45AM French Consumer Spending | |
| # in that case we append to errors.csv the date time where the error is | |
| try: | |
| for field in fields: | |
| data = tr.select("td.calendar__cell.calendar__{}.{}".format(field,field))[0] | |
| # print(data) | |
| if field=="date" and data.text.strip()!="": | |
| curr_date = data.text.strip() | |
| elif field=="time" and data.text.strip()!="": | |
| # time is sometimes "All Day" or "Day X" (eg. WEF Annual Meetings) | |
| if data.text.strip().find("Day")!=-1: | |
| curr_time = "12:00am" | |
| else: | |
| curr_time = data.text.strip() | |
| elif field=="currency": | |
| currency = data.text.strip() | |
| elif field=="impact": | |
| # when impact says "Non-Economic" on mouseover, the relevant | |
| # class name is "Holiday", thus we do not use the classname | |
| impact = data.find("span")["title"] | |
| elif field=="event": | |
| event = data.text.strip() | |
| elif field=="actual": | |
| actual = data.text.strip() | |
| elif field=="forecast": | |
| forecast = data.text.strip() | |
| elif field=="previous": | |
| previous = data.text.strip() | |
| dt = datetime.datetime.strptime(",".join([curr_year,curr_date,curr_time]), | |
| "%Y,%a%b %d,%I:%M%p") | |
| print(",".join([str(dt),currency,impact,event,actual,forecast,previous])) | |
| except: | |
| with open("errors.csv","a") as f: | |
| csv.writer(f).writerow([curr_year,curr_date,curr_time]) | |
| # exit recursion when last available link has reached | |
| if startlink==endlink: | |
| logging.info("Successfully retrieved data") | |
| return | |
| # get the link for the next week and follow | |
| follow = soup.select("a.calendar__pagination.calendar__pagination--next.next") | |
| follow = follow[0]["href"] | |
| getEconomicCalendar(follow,endlink) | |
| if __name__ == "__main__": | |
| """ | |
| Run this using the command "python `script_name`.py >> `output_name`.csv" | |
| """ | |
| setLogger() | |
| getEconomicCalendar("calendar.php?week=jan7.2007","calendar.php?week=dec24.2017") |
from bs4 import BeautifulSoup from datetime import date, datetime from typing import List import urllib.request import urllib.parse import ssl import json from pytz import timezone
class PyEcoElement(object):
def __init__(self, currency: str, event: str, impact : str, time_utc: str, actual: str, forecast: str, previous: str): self.currency = currency self.event = event self.impact = impact self.time_utc = time_utc self.actual = actual self.forecast = forecast self.previous = previousclass PyEcoRoot(object):
def __init__(self, eco_elements : List[PyEcoElement]): self.eco_elements = eco_elementsclass PyEcoCal:
def GetEconomicCalendar(self, query_date: datetime): base_url = "https://www.forexfactory.com/" ssl._create_default_https_context = ssl._create_unverified_context # ctx = ssl.create_default_context() # ctx.check_hostname = False # ctx.verify_mode = ssl.CERT_NONE # html = urllib.request.urlopen(url, context=ctx).read() # get the page and make the soup urleco = f"{base_url}calendar.php?day={query_date.strftime('%b').lower()}{query_date.day}.{query_date.year}" date_string = query_date.strftime('%Y-%m-%d') opener = urllib.request.build_opener() opener.addheaders = [('User-agent', 'Mozilla/5.0')] response = opener.open(urleco) result = response.read().decode('utf-8', errors='replace') soup = BeautifulSoup(result, "html.parser") table = soup.find_all("tr", class_="calendar_row") cal_date = soup.find_all("a", {"class": "highlight light options flexTitle"})[0].span.text.strip() eco_day = [] for item in table: dict = {} dict["Currency"] = item.find_all("td", \ {"class": "calendar__cell calendar__currency currency"})[0].text.strip() # Currency dict["Event"] = item.find_all("span", \ {"class": "calendar__event-title"})[0].text.strip() # Event Name try: time_eastern = item.find_all("td", {"class": "calendar__cell calendar__time time"})[ 0].div.text.strip() # Time Eastern datetime_eastern = datetime.strptime(f"{date_string} {time_eastern}", '%Y-%m-%d %I:%M%p') except: datetime_eastern = datetime.strptime(f"{date_string} 12:00am", '%Y-%m-%d %I:%M%p') eastern_tz = timezone('US/Eastern') dict["Time_UTC"] = eastern_tz.localize(datetime(datetime_eastern.year, datetime_eastern.month, \ datetime_eastern.day, datetime_eastern.hour, \ datetime_eastern.minute, 0)).astimezone(timezone('utc')).strftime("%Y%m%dT%H:%M:%S %z") impact = item.find_all("td", {"class": "impact"}) for icon in range(0, len(impact)): dict["Impact"] = impact[icon].find_all("span")[0]['title'].split(' ', 1)[0] try: actual_value =item.find_all("td", {"class": "calendar__cell calendar__actual actual"})[0].text if actual_value is not None: dict["Actual"] = actual_value.strip() else: dict["Actual"] = item.find_all("td", \ {"class": "calendar__cell calendar__actual actual"})[0].span.text.strip() # Actual Value except: dict["Actual"] = "" try: dict["Forecast"] = item.find_all("span", {"class": "calendar-forecast"})[ 0].text.strip() # forecasted Value except: dict["Forecast"] = "" try: dict["Previous"] = item.find_all("span", {"class": "calendar-previous"})[0].text.strip() # Previous except: dict["Previous"] = "" eco_day.append(dict) events_array = [] for row_dict in eco_day: eco_elem = PyEcoElement( row_dict["Currency"], row_dict["Event"], row_dict["Impact"], row_dict["Time_UTC"], row_dict["Actual"], row_dict["Forecast"], row_dict["Previous"] ) events_array.append(eco_elem) eco_cal = PyEcoRoot(events_array) json_object = json.dumps(eco_cal.__dict__, default=lambda o: o.__dict__, indent=3) return json_objectif name == "main": eco = PyEcoCal() json = eco.GetEconomicCalendar(datetime.today()) print(json)
This works, thanks buddy
PS C:\Users\Jasper> python -u "c:\Users\Jasper\Downloads\Lot Size Calculator\main.py"
2023-07-24 21:32:36,080 - INFO - Scraping data for link: calendar.php?week=jan7.2007
2023-07-24 21:32:36,219 - WARNING - Table not found on the page. Exiting...
Hey guys, discover this API providing access to all ForexFactory data. Unlock valuable insights and enhance your trading strategies efficiently.
Link to api: https://rapidapi.com/ousema.frikha/api/forex-factory-scraper1
And how can I fetch the next events for the next week