Created
September 4, 2025 08:35
-
-
Save lawrence910426/7a130eedc2b573bcf426f2bfb3d99921 to your computer and use it in GitHub Desktop.
Crawler for lending information
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import requests, json | |
| def get_otc_intraday_lending_info(date: str) -> pd.DataFrame: | |
| url = "https://www.tpex.org.tw/www/zh-tw/intraday/fee" | |
| data = {"date": date.replace("-", "/"), "id": "", "response": "json"} | |
| headers = {"User-Agent": "Mozilla/5.0", "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8"} | |
| r = requests.post(url, data=data, headers=headers, timeout=15) | |
| r.raise_for_status() | |
| resp = r.json() if r.headers.get("content-type","").startswith("application/json") else json.loads(r.text) | |
| df = pd.DataFrame(resp['tables'][0]['data']) | |
| df.columns = ['date', 'symbol', 'stock_name', 'lending_quantity', 'lending_fee'] | |
| df['lending_quantity'] = df['lending_quantity'].apply(lambda x: int(x.replace(',', ''))) | |
| df['lending_fee'] = df['lending_fee'].astype(float) / 100 | |
| df["date"] = pd.to_datetime( | |
| df["date"].apply(roc_to_ad), | |
| format="%Y/%m/%d" | |
| ) | |
| return df | |
| # Note: Lending information is fetched always fetched in a month. For example, grabbing the information at 2025-08-03 would get you the from 2025-08-01 to 2025-08-31. | |
| # Get the lending info of the entire month | |
| df = get_otc_intraday_lending_info("2025-08-01") | |
| # Reshape into FinLab-like format | |
| df_lending_quantity = ( | |
| df | |
| .pivot_table(columns='symbol', index='date', aggfunc='sum', values='lending_quantity') | |
| .reindex(index=close_price.index, columns=close_price.columns).fillna(0) | |
| ) | |
| print(df_lending_quantity.loc['2025-08-29', '6840']) | |
| def get_twse_intraday_lending_info(date: str) -> pd.DataFrame: | |
| url = f"https://www.twse.com.tw/rwd/zh/dayTrading/BFIF8U?date={date.replace('-', '')}&response=json" | |
| r = requests.get(url, timeout=15) | |
| r.raise_for_status() | |
| resp = r.json() if r.headers.get("content-type","").startswith("application/json") else json.loads(r.text) | |
| df = pd.DataFrame(resp['data']) | |
| df.columns = ['date', 'symbol', 'stock_name', 'lending_quantity', 'lending_fee'] | |
| df['symbol'] = df['symbol'].apply(lambda x: x.strip()) | |
| df['lending_quantity'] = df['lending_quantity'].apply(lambda x: int(x.replace(',', ''))) | |
| df['lending_fee'] = df['lending_fee'].apply(lambda x: float(x.replace('%', ''))) / 100 | |
| df["date"] = pd.to_datetime( | |
| df["date"].apply(roc_to_ad), | |
| format="%Y/%m/%d" | |
| ) | |
| return df | |
| # Get the lending info of the entire month | |
| df = get_twse_intraday_lending_info("2025-08-01") | |
| # Reshape into FinLab-like format | |
| df_lending_quantity = ( | |
| df | |
| .pivot_table(columns='symbol', index='date', aggfunc='sum', values='lending_quantity') | |
| .reindex(index=close_price.index, columns=close_price.columns).fillna(0) | |
| ) | |
| print(df_lending_quantity.loc['2025-08-01', '1301']) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment