Last active
March 23, 2020 15:16
-
-
Save jhejderup/80bab0c079f47ac7d2778baac6db8e25 to your computer and use it in GitHub Desktop.
Praezi
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ## | |
| ## dep: pip3 install kafka-python | |
| ## run python3 cg-stats.py | |
| from kafka import KafkaConsumer, KafkaProducer, TopicPartition | |
| from kafka.errors import KafkaError | |
| import json | |
| consumer = KafkaConsumer( | |
| 'praezi.callgraphs', | |
| bootstrap_servers=['localhost:30001', 'localhost:30002','localhost:30003'], | |
| auto_offset_reset='earliest', | |
| enable_auto_commit=False, | |
| group_id='praezi.cgsworkers', | |
| value_deserializer=lambda m: json.loads(m.decode('utf-8')), | |
| api_version=(2, 2, 0), | |
| max_poll_interval_ms=3600000 | |
| ) | |
| all_topic = 0 | |
| num_cgs = 0 | |
| num_fails_marked = 0 | |
| while True: | |
| msg_pack = consumer.poll(max_records=1) | |
| consumer.commit() | |
| for tp, messages in msg_pack.items(): | |
| for message in messages: | |
| all_topic = all_topic + 1 | |
| if message.value['status'] == 'FAIL' and len(message.value['cg']['nodes']) > 0 and len(message.value['cg']['edges']) > 0: | |
| num_fails_marked = num_fails_marked + 1 | |
| if message.value['status'] == 'OK' and len(message.value['cg']['nodes']) > 0 and len(message.value['cg']['edges']) > 0: | |
| num_cgs = num_cgs + 1 | |
| print("Stats [ all = {} non-empty = {} non-empty-failed = {} ]".format(all_topic,num_cgs,num_fails_marked)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/python3 | |
| # -*- coding: utf-8 -*- | |
| """Extract toolchain information from docs.rs | |
| Deps pip3 install requests beautifulsoup4s | |
| Run: python3 docsrs.py <crate_name> <crate_version> | |
| """ | |
| import sys | |
| import requests | |
| from bs4 import BeautifulSoup | |
| assert len(sys.argv) == 3 | |
| URL = "https://docs.rs/crate/{}/{}/builds".format(sys.argv[1], sys.argv[2]) | |
| page = requests.get(URL) | |
| soup = BeautifulSoup(page.content, 'html.parser') | |
| toolchains = soup.find_all('a', class_='release') | |
| def isSuccess(clazz): | |
| if clazz == "fa fa-check": | |
| return True | |
| else: | |
| return False | |
| for tc in toolchains: | |
| row = tc.find('div',class_='pure-g') | |
| status_class = row.find('i')['class'] | |
| status = ' '.join(status_class) | |
| compiler_elem = row.find('div', class_='pure-u-1 pure-u-sm-10-24') | |
| build_date_elem = row.find('div', class_='pure-u-1 pure-u-sm-3-24 date') | |
| csv_entry = "{},{},{},{},{}".format(sys.argv[1], sys.argv[2],isSuccess(status),compiler_elem.text,build_date_elem.text) | |
| print(csv_entry) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| git clone https://github.com/rust-lang/crates.io-index.git | |
| cd crates.io-index/ | |
| rm config.json #delete config file | |
| find . -type f | parallel "cat {} | jq -c '{creation_date:{secs_since_epoch:0,nanos_since_epoch:0},crates:[{Package:{name: .name, version: .vers}}]}'" > process_list.txt | |
| kafka-console-producer --broker-list localhost:30001 --topic praezi.packages < process_list.txt | |
| kafkacat -b localhost:30001 -t praezi.packages #verify that everything was added |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
crate names grouped by number of buildable versions:
cat docsrs.csv | grep True | awk -F"," '{print $1}' | sort | uniq -c | sort -k1,1nr -k2 > docsrs_groupby_releases.txtget downloads and num of releasess per missing crate:
python3 get_stats.py docsrs_groupby_releases.txt compiled-packages.txt > missing_packages.txtget number of dependent per package for missing crates:
python3 /home/jhejderup/praezi-stats/py-src/resolve-missing-deps.py crates.io-index/ missing_packages_list.txtsort number of releases:
cat missing_packages.txt| awk -F"," '{if($2==$2+0 && $2 < 2) print $1,$2,$3}' > gt10_releases.txtsort downloads:
cat missing_packages.txt| awk -F"," '{if($3==$3+0 && $3 > 1000000) print $1,$2,$3}' > gt10_releases.txt