Skip to content

Instantly share code, notes, and snippets.

@lennier1
Last active May 10, 2023 18:22
Show Gist options
  • Select an option

  • Save lennier1/a7aadaaddd02d6c31df8760f44b3fbb6 to your computer and use it in GitHub Desktop.

Select an option

Save lennier1/a7aadaaddd02d6c31df8760f44b3fbb6 to your computer and use it in GitHub Desktop.
Identify Twitter users that haven't tweeted since a cutoff date
import ast
import json
from datetime import datetime
import subprocess
user_tuples = []
# Load user_tuples from the file
with open('legacy_verified_usernames.txt', 'r') as file:
data = file.read()
user_tuples = ast.literal_eval(data)
cutoff_date = datetime(2022, 6, 1)
with open("abandoned_users.txt", "a") as abandoned_users_file, open("scraped_data.txt", "a") as scraped_data_file:
# Scrape two tweets for each user (one may be a pinned tweet), capture output, and find the latest date
for user_tuple in user_tuples:
user_id, username = user_tuple
try:
command = f"snscrape --jsonl --max-results 2 twitter-profile --user-id {user_id}"
result = subprocess.run(command, shell=True, check=True, capture_output=True, text=True)
scraped_output = result.stdout
except Exception as e:
print(f"Error occurred while processing {username}: {e}")
continue
# Save scraped_output to scraped_data.txt
scraped_data_file.write(f"Username: {username}\n{scraped_output}\n")
latest_date = None
current_username = username
for line in scraped_output.splitlines():
tweet = json.loads(line)
if "date" in tweet:
tweet_date = datetime.fromisoformat(tweet["date"].replace("+00:00", ""))
if latest_date is None or tweet_date > latest_date:
latest_date = tweet_date
if "username" in tweet:
current_username = tweet["username"]
if current_username != username:
print(f"{username} changed to {current_username}")
if latest_date is not None:
print(f"{current_username}: {latest_date}")
else:
print(f"{current_username}: No dates available")
if latest_date is not None and latest_date < cutoff_date:
abandoned_users_file.write(f"{current_username}, {username}, {user_id}\n")
abandoned_users_file.flush() # Flush the file buffer to write data immediately
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment