Last active
December 18, 2020 15:44
-
-
Save mesailde/dd6d24d0c27ddcb903ed to your computer and use it in GitHub Desktop.
Script para extração de CVs Lattes a partir de um arquivo CSV
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| # -*- encoding: utf-8 -*- | |
| import os, sys, re, time, traceback, csv, base64, io, zipfile | |
| import suds | |
| import suds.client | |
| from optparse import OptionParser | |
| from retry import retry | |
| class WSCurriculo(suds.client.Client): | |
| def __init__(self): | |
| suds.client.Client.__init__(self, | |
| 'https://cnpqwsproxy.ufscar.br:7443/srvcurriculo/WSCurriculo?wsdl') | |
| @retry(tries=3, delay=1) | |
| def obterCV(self, idCNPq): | |
| xmlz = zipfile.ZipFile(io.BytesIO(base64.b64decode( | |
| self.service.getCurriculoCompactado(id=idCNPq) | |
| ))) | |
| return xmlz.read(xmlz.namelist()[0]) | |
| @retry(tries=3, delay=1) | |
| def obterIdCNPq(self, *args): | |
| """ obterIdCNPq(cpf) ou obterIdCNPq(nomeCompleto, dataNascimento) """ | |
| if len(args) == 1: | |
| return self.service.getIdentificadorCNPq(cpf=args[0],nomeCompleto='',dataNascimento='') | |
| elif len(args) == 2: | |
| return self.service.getIdentificadorCNPq(cpf='',nomeCompleto=args[0],dataNascimento=args[1]) | |
| raise ValueError('obterIdCNPq deve receber 1 ou 2 parâmetros (cpf ou nomeCompleto e dataNascimento)') | |
| @retry(tries=3, delay=1) | |
| def obterOcorrencia(self, idCNPq): | |
| return self.service.getOcorrenciaCV(id=idCNPq) | |
| def onlyNumbers(s): | |
| return re.sub(r'[^\d]', '', s) | |
| def process(filename, destdir='.', outfile=None, verbose=False): | |
| idCNPqField = 'idCNPq' | |
| cpfField = 'cpf' | |
| nomeNascimentoFields = ('nome', 'nascimento') | |
| ws = WSCurriculo() | |
| csvout = open(outfile, 'w') if outfile != None else None | |
| with open(filename, 'r') as csvin: | |
| csvreader = csv.DictReader(csvin, delimiter=';') | |
| if cpfField not in csvreader.fieldnames and not set(nomeNascimentoFields).issubset(csvreader.fieldnames): | |
| sys.stderr.write(u'Forneça um arquivo de entrada contendo:\n') | |
| sys.stderr.write(u' * Uma coluna chamada "%s" com o CPF de cada pessoa; e/ou\n' % cpfField) | |
| sys.stderr.write(u' * Colunas "%s" e "%s" com o nome completo e data de nascimento\n' % nomeNascimentoFields) | |
| sys.exit(1) | |
| if csvout: | |
| csvwriter = csv.DictWriter(csvout, fieldnames=csvreader.fieldnames + [idCNPqField], delimiter=';', quoting=csv.QUOTE_ALL) | |
| csvwriter.writeheader() | |
| for row in csvreader: | |
| persondata = (onlyNumbers(row.get(cpfField, '')),) | |
| if persondata == ('',): | |
| persondata = tuple(row.get(x, '') for x in nomeNascimentoFields) | |
| if verbose: | |
| sys.stderr.write(u'=> Obtendo idCNPq de %s\n' % repr(persondata)) | |
| try: | |
| row[idCNPqField] = ws.obterIdCNPq(*persondata) | |
| except: | |
| row[idCNPqField] = None | |
| if row[idCNPqField] is None: | |
| sys.stderr.write(u'=> idCNPQ não encontrado para %s!\n' % repr(persondata)) | |
| continue | |
| if csvout: | |
| csvwriter.writerow(row) | |
| if verbose: | |
| sys.stderr.write(u'=> Obtendo CV do idCNPq %s\n' % row[idCNPqField]) | |
| try: | |
| cv = ws.obterCV(row[idCNPqField]) | |
| except: | |
| ocorrencia = None | |
| try: | |
| ocorrencia = ws.obterOcorrencia(row[idCNPqField]) | |
| except Exception as e: | |
| traceback.print_exc(e) | |
| sys.stderr.write(u'Impossível obter CV do idCNPq %s: %s\n' % (row[idCNPqField], repr(ocorrencia))) | |
| continue | |
| with open(os.path.join(destdir,'lattes%s.xml' % row[idCNPqField]), 'wb') as xmlout: | |
| xmlout.write(cv) | |
| if csvout: | |
| csvout.close() | |
| def main(): | |
| parser = OptionParser(usage=u'uso: %prog [opções] entrada.csv') | |
| parser.add_option('-o', '--output', dest='outfile', | |
| help=u'Grava um CSV com idCNPq no ARQUIVO', | |
| metavar='ARQUIVO') | |
| parser.add_option('-d', '--destdir', dest='destdir', | |
| help=u'Grava os CVs no DIRETORIO', | |
| metavar='DIRETORIO', | |
| default='.') | |
| parser.add_option('-v', '--verbose', action='store_true', | |
| dest='verbose', default=False) | |
| options, args = parser.parse_args() | |
| if len(args) != 1: | |
| parser.error(u'Forneça um arquivo CSV de entrada') | |
| process(args[0], options.destdir, options.outfile, options.verbose) | |
| if __name__ == '__main__': | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment