Created
July 26, 2019 14:13
-
-
Save rflamary/4d92e7c03657523584783a2d409b4e3f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from bs4 import BeautifulSoup | |
| import urllib | |
| def parse_scholar_page(url,paper_list=False): | |
| response = urllib.request.urlopen(url) | |
| webContent = response.read() | |
| soup = BeautifulSoup(webContent, 'html.parser') | |
| def toint(v): | |
| try: | |
| return int(v) | |
| except ValueError as verr: | |
| return 0 # do job to handle: s does not c | |
| #%% Citations stats | |
| name=soup.find('div',{"id": "gsc_prf_in"}).text | |
| tabe_cite=soup.find('table',{"id": "gsc_rsb_st"}) | |
| tr_tcite=tabe_cite.findAll('tr') | |
| citations=toint(tr_tcite[1].find('td',{"class": "gsc_rsb_std"}).text) | |
| h_index=toint(tr_tcite[2].find('td',{"class": "gsc_rsb_std"}).text) | |
| #%% citation graph | |
| years_list=[toint(y.text) for y in soup.findAll("span", {"class": "gsc_g_t"})] | |
| citations_list=[toint(y.text) for y in soup.findAll("span", {"class": "gsc_g_al"})] | |
| #%% papers list | |
| res={'name':name,'citations':citations,'h_index':h_index, | |
| 'years_list':years_list,'citations_list':citations_list} | |
| if paper_list: | |
| pap=soup.findAll("tr", {"class": "gsc_a_tr"}) | |
| papers_list=[] | |
| for p in pap: | |
| temp=dict() | |
| temp['title']=p.find('a',{"class": "gsc_a_at"}).text | |
| infos=p.findAll('div',{"class": "gs_gray"}) | |
| temp['authors']=infos[0].text | |
| temp['published_in']=','.join(infos[1].text.split(',')[:-1]) | |
| temp['year']=toint(infos[1].text.split(',')[-1]) | |
| temp['citations']=toint(p.find('a',{"class": "gsc_a_ac gs_ibl"}).text) | |
| papers_list.append(temp) | |
| res['papers_list']=papers_list | |
| return res |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment