Skip to content

Instantly share code, notes, and snippets.

@japhib
Last active June 21, 2020 03:11
Show Gist options
  • Select an option

  • Save japhib/3ae60489d43086b80951014589bcca83 to your computer and use it in GitHub Desktop.

Select an option

Save japhib/3ae60489d43086b80951014589bcca83 to your computer and use it in GitHub Desktop.
Python script that pulls games from Steam and estimates their revenue
# for tag IDs see: https://partner.steamgames.com/doc/store/tags
# for games in a particular tag see: https://store.steampowered.com/search/?tags=<tag_id>
tags = [
{'tag_name': '2D Fighter', 'tag_id': 4736},
{'tag_name': '2D Platformer', 'tag_id': 5379},
{'tag_name': '3D Fighter', 'tag_id': 6506},
{'tag_name': '3D Platformer', 'tag_id': 5395},
{'tag_name': '4X', 'tag_id': 1670},
{'tag_name': 'Action Roguelike', 'tag_id': 42804},
{'tag_name': 'Action RPG', 'tag_id': 4231},
{'tag_name': 'Arena Shooter', 'tag_id': 5547},
{'tag_name': 'Asynchronous Multiplayer', 'tag_id': 17770},
{'tag_name': 'Auto Battler', 'tag_id': 1084988},
{'tag_name': 'Automobile Sim', 'tag_id': 1100687},
{'tag_name': 'Battle Royale', 'tag_id': 176981},
{'tag_name': 'Beat em up', 'tag_id': 4158},
{'tag_name': 'Board Game', 'tag_id': 1770},
{'tag_name': 'Bullet Hell', 'tag_id': 4885},
{'tag_name': 'Card Battler', 'tag_id': 791774},
{'tag_name': 'Card Game', 'tag_id': 1666},
{'tag_name': 'Chess', 'tag_id': 4184},
{'tag_name': 'Choose Your Own Adventure', 'tag_id': 4486},
{'tag_name': 'City Builder', 'tag_id': 4328},
{'tag_name': 'Clicker', 'tag_id': 379975},
{'tag_name': 'Collectathon', 'tag_id': 5652},
{'tag_name': 'Colony Sim', 'tag_id': 220585},
{'tag_name': 'Combat Racing', 'tag_id': 4102},
{'tag_name': 'CRPG', 'tag_id': 4474},
{'tag_name': 'Dating Sim', 'tag_id': 9551},
{'tag_name': 'Deckbuilding', 'tag_id': 32322},
{'tag_name': 'Dungeon Crawler', 'tag_id': 1720},
{'tag_name': 'Farming Sim', 'tag_id': 87918},
{'tag_name': 'FPS', 'tag_id': 1663},
{'tag_name': 'God Game', 'tag_id': 5300},
{'tag_name': 'Grand Strategy', 'tag_id': 4364},
{'tag_name': 'Hero Shooter', 'tag_id': 620519},
{'tag_name': 'Hidden Object', 'tag_id': 1738},
{'tag_name': 'Idler', 'tag_id': 615955},
{'tag_name': 'Immersive Sim', 'tag_id': 9204},
{'tag_name': 'Interactive Fiction', 'tag_id': 11014},
{'tag_name': 'JRPG', 'tag_id': 4434},
{'tag_name': 'Life Sim', 'tag_id': 10235},
{'tag_name': 'Looter Shooter', 'tag_id': 353880},
{'tag_name': 'Match 3', 'tag_id': 1665},
{'tag_name': 'Medical Sim', 'tag_id': 1100688},
{'tag_name': 'Metroidvania', 'tag_id': 1628},
{'tag_name': 'MMORPG', 'tag_id': 1754},
{'tag_name': 'MOBA', 'tag_id': 1718},
{'tag_name': 'Mystery Dungeon', 'tag_id': 198631},
{'tag_name': 'On-Rails Shooter', 'tag_id': 56690},
{'tag_name': 'Open World Survival Craft', 'tag_id': 1100689},
{'tag_name': 'Outbreak Sim', 'tag_id': 1100686},
{'tag_name': 'Parkour', 'tag_id': 4036},
{'tag_name': 'Party-Based RPG', 'tag_id': 10695},
{'tag_name': 'Pinball', 'tag_id': 6621},
{'tag_name': 'Point & Click', 'tag_id': 1698},
{'tag_name': 'Political Sim', 'tag_id': 26921},
{'tag_name': 'Precision Platformer', 'tag_id': 3877},
{'tag_name': 'Procedural Generation', 'tag_id': 5125},
{'tag_name': 'Puzzle Platformer', 'tag_id': 5537},
{'tag_name': 'Real Time Tactics', 'tag_id': 3813},
{'tag_name': 'Rhythm', 'tag_id': 1752},
{'tag_name': 'Roguelike', 'tag_id': 1716},
{'tag_name': 'Roguelite', 'tag_id': 3959},
{'tag_name': 'Roguevania', 'tag_id': 922563},
{'tag_name': 'RTS', 'tag_id': 1676},
{'tag_name': 'Runner', 'tag_id': 8666},
{'tag_name': 'Shoot Em Up', 'tag_id': 4255},
{'tag_name': 'Sokoban', 'tag_id': 1730},
{'tag_name': 'Solitaire', 'tag_id': 13070},
{'tag_name': 'Souls-like', 'tag_id': 29482},
{'tag_name': 'Space Sim', 'tag_id': 16598},
{'tag_name': 'Spectacle fighter', 'tag_id': 4777},
{'tag_name': 'Spelling', 'tag_id': 71389},
{'tag_name': 'Strategy RPG', 'tag_id': 17305},
{'tag_name': 'Survival Horror', 'tag_id': 3978},
{'tag_name': 'Tactical RPG', 'tag_id': 21725},
{'tag_name': 'Text-based', 'tag_id': 31275},
{'tag_name': 'Third-Person Shooter', 'tag_id': 3814},
{'tag_name': 'Time Management', 'tag_id': 16689},
{'tag_name': 'Top-Down Shooter', 'tag_id': 4637},
{'tag_name': 'Tower Defense', 'tag_id': 1645},
{'tag_name': 'Trading Card Game', 'tag_id': 9271},
{'tag_name': 'Traditional Roguelike', 'tag_id': 454187},
{'tag_name': 'Trivia', 'tag_id': 10437},
{'tag_name': 'Turn-Based Strategy', 'tag_id': 1741},
{'tag_name': 'Turn-Based Tactics', 'tag_id': 14139},
{'tag_name': 'Twin Stick Shooter', 'tag_id': 4758},
{'tag_name': 'Typing', 'tag_id': 1674},
{'tag_name': 'Visual Novel', 'tag_id': 3799},
{'tag_name': 'Voxel', 'tag_id': 1732},
{'tag_name': 'Walking Simulator', 'tag_id': 5900},
{'tag_name': 'Wargame', 'tag_id': 4684},
{'tag_name': 'Word Game', 'tag_id': 24003}
]
import sys
import requests
import json
import math
import traceback
import statistics
from bs4 import BeautifulSoup
from os import path
from datetime import datetime
from pprint import pprint
csv = True
blacklist = set([202170,212890,97330,97350,208830,208831,208832,97360,97361,97362,97363,97364,97365,97366,97367,97368,97369,97370,97371,97372,97373,97374,97375,97376,97377,97378,97379,97380,97381,97382,97383,97384,97385,97386,97387,97388,97389,208833,208834,208835,208836,208837,208838,208839,208840,208841,208842,208843,208844,838110])
totalgames = 0
medrev = 0
playtimeavg = 0
revenues = []
playtimes = []
games_with_zero_rev = 0
games_past_one_year = 0
def request_or_file(filename, url, use_json=True):
if path.exists(filename):
with open(filename, 'r') as f:
ret = json.loads(f.read()) if use_json else f.read()
if ret:
return ret
from_request = requests.get(url)
print('.')
with open(filename, 'w') as f:
ret = json.loads(from_request.text) if use_json else from_request.text
f.write(from_request.text)
return ret
def get_app_data(appid):
filename = 'steamapps/{}.json'.format(appid)
url = 'https://store.steampowered.com/api/appdetails?appids={}'.format(appid)
return request_or_file(filename, url)
def get_reviews(appid):
filename = 'reviews/{}.json'.format(appid)
url = 'https://store.steampowered.com/appreviews/{}?json=1'.format(appid)
return request_or_file(filename, url)
def analyze_app(appid, output):
if not appid or int(appid) in blacklist:
return
global totalgames
global medrev
global playtimeavg
global revenues
global playtimes
global games_with_zero_rev
global games_past_one_year
try:
app_data = get_app_data(appid)
try:
if app_data.get(appid).get('data').get('release_date').get('coming_soon'):
return
except KeyboardInterrupt:
sys.exit(0)
except:
print('error checking pre-release state for {}'.format(appid))
traceback.print_exc()
return
name = app_data.get(appid).get('data').get('name')
release_date_str = app_data.get(appid).get('data').get('release_date').get('date')
try:
release_date = datetime.strptime(release_date_str, '%b %d, %Y')
except:
try:
release_date = datetime.strptime(release_date_str, '%b %Y')
except:
release_date = datetime.strptime(release_date_str, '%d %b, %Y')
days_since_release = (datetime.today() - release_date).days
if days_since_release <= 0:
days_since_release = 1
price = app_data.get(appid).get('data').get('price_overview').get('initial') / 100 if app_data.get(appid).get('data').get('price_overview') else 0
review_data = get_reviews(appid)
review_summary = review_data.get('query_summary').get('review_score_desc')
total_reviews = review_data.get('query_summary').get('total_reviews')
pos_reviews = review_data.get('query_summary').get('total_positive')
review_percent_positive = pos_reviews / total_reviews * 100 if total_reviews > 0 else 0
estimated_rev = total_reviews * 65 * price * .38
estimated_rev_one_year = estimated_rev / (days_since_release / 365)
if days_since_release < 90:
estimated_rev_one_year *= .3
if total_reviews == 0:
games_with_zero_rev += 1
if days_since_release > 365:
games_past_one_year += 1
return
total_playtime = 0
num_reviews = 0
for review in review_data.get('reviews'):
playtime = review.get('author').get('playtime_forever') / 60
total_playtime += playtime
num_reviews += 1
review_avg_playtime = total_playtime / num_reviews if num_reviews > 0 else 0
if csv:
output.append('{}\t{}\t{}\t{}\t{}\t{}\t{:.0f}\t{:.2f}\t{:.2f}\t{}\t{}\t{}'
.format(
appid,
name,
release_date_str,
review_summary,
pos_reviews,
total_reviews,
review_percent_positive,
review_avg_playtime,
price,
days_since_release,
estimated_rev,
estimated_rev_one_year
))
else:
output.append('appId: {}\nName: {}\nRelease date: {}\nReviews: {} ({}/{} positive, {:.0f}%)\nAverage playtime: {:.2f}\nPrice: {:.2f}\nEstimated revenue: {} after {} days (estimated {} after one year)'
.format(
appid,
name,
release_date_str,
review_summary,
pos_reviews,
total_reviews,
review_percent_positive,
review_avg_playtime,
price,
estimated_rev,
days_since_release,
estimated_rev_one_year
))
totalgames += 1
revenues.append(estimated_rev)
if num_reviews >= 20:
playtimes.append(review_avg_playtime)
except KeyboardInterrupt:
sys.exit(0)
except:
print('Error processing appid {}'.format(appid))
traceback.print_exc()
return
def get_tag_data(tag):
tag_name = tag['tag_name']
tag_id = tag['tag_id']
# print(tag_name)
tag_output = []
global totalgames
global medrev
global playtimeavg
global revenues
global playtimes
global games_with_zero_rev
global games_past_one_year
totalgames = 0
medrev = 0
playtimeavg = 0
revenues = []
playtimes = []
games_with_zero_rev = 0
games_past_one_year = 0
start = 0
count = 100
if csv:
tag_output.append('appId\tName\tRelease date\tReview String\tPositive Reviews\tTotal Reviews\tPercent Positive Reviews\tAverage playtime\tPrice\tDays since release\tEstimated revenue\tEstimated revenue after one year')
for page_num in range(0,5):
game_list_filename = 'games/games_list_{}_{}_{}'.format(
tag_id,
start + 100 * page_num,
count
)
game_list_url = 'https://store.steampowered.com/search/results/?query&start={}&count={}&sort_by=Released_DESC&tags={}&category1=998&snr=1_7_7_240_7'.format(
start + 100 * page_num,
count,
tag_id
)
game_list = request_or_file(game_list_filename, game_list_url, use_json=False)
soup = BeautifulSoup(game_list, 'html.parser')
games = soup.find_all('a', attrs={'class': 'search_result_row'})
if not games:
continue
breaking = False
for a in games:
appid = a.get('data-ds-appid')
appids = appid.split(',')
if len(appids) > 1:
for appid_single in appids:
analyze_app(appid_single, tag_output)
else:
analyze_app(appid, tag_output)
if games_past_one_year > 10:
breaking = True
break
if breaking:
break
filename = tag_name.lower()
filename = filename.replace(' ', '-')
filename = 'tags/' + filename + '.tsv'
with open(filename, 'w') as f:
f.write('\n'.join(tag_output))
medrev = statistics.median(revenues) if revenues else 0
playtimeavg = statistics.median(playtimes) if playtimes else 0
print('{}\t{}\t{}\t{}\t{}'.format(
tag_id,
tag_name,
totalgames,
medrev,
games_with_zero_rev
))
if __name__ == '__main__':
for tag in tags:
get_tag_data(tag)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment