Last active
May 10, 2023 18:22
-
-
Save lennier1/a7aadaaddd02d6c31df8760f44b3fbb6 to your computer and use it in GitHub Desktop.
Identify Twitter users that haven't tweeted since a cutoff date
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import ast | |
| import json | |
| from datetime import datetime | |
| import subprocess | |
| user_tuples = [] | |
| # Load user_tuples from the file | |
| with open('legacy_verified_usernames.txt', 'r') as file: | |
| data = file.read() | |
| user_tuples = ast.literal_eval(data) | |
| cutoff_date = datetime(2022, 6, 1) | |
| with open("abandoned_users.txt", "a") as abandoned_users_file, open("scraped_data.txt", "a") as scraped_data_file: | |
| # Scrape two tweets for each user (one may be a pinned tweet), capture output, and find the latest date | |
| for user_tuple in user_tuples: | |
| user_id, username = user_tuple | |
| try: | |
| command = f"snscrape --jsonl --max-results 2 twitter-profile --user-id {user_id}" | |
| result = subprocess.run(command, shell=True, check=True, capture_output=True, text=True) | |
| scraped_output = result.stdout | |
| except Exception as e: | |
| print(f"Error occurred while processing {username}: {e}") | |
| continue | |
| # Save scraped_output to scraped_data.txt | |
| scraped_data_file.write(f"Username: {username}\n{scraped_output}\n") | |
| latest_date = None | |
| current_username = username | |
| for line in scraped_output.splitlines(): | |
| tweet = json.loads(line) | |
| if "date" in tweet: | |
| tweet_date = datetime.fromisoformat(tweet["date"].replace("+00:00", "")) | |
| if latest_date is None or tweet_date > latest_date: | |
| latest_date = tweet_date | |
| if "username" in tweet: | |
| current_username = tweet["username"] | |
| if current_username != username: | |
| print(f"{username} changed to {current_username}") | |
| if latest_date is not None: | |
| print(f"{current_username}: {latest_date}") | |
| else: | |
| print(f"{current_username}: No dates available") | |
| if latest_date is not None and latest_date < cutoff_date: | |
| abandoned_users_file.write(f"{current_username}, {username}, {user_id}\n") | |
| abandoned_users_file.flush() # Flush the file buffer to write data immediately |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment