Last active
April 17, 2021 01:56
-
-
Save delannoy/ad959e9f1bf4510f919a088e72c4f87a to your computer and use it in GitHub Desktop.
Return arbitrary `brilcalc lumi` query as a pandas.DataFrame
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| import os | |
| import pandas | |
| import shlex | |
| def brilcalcLumiQuery(outputstyle:str='csv', cerntime:bool=False, tssec:bool=True, unit:str='/fb', **kwargs) -> str: | |
| ''' | |
| Format brilcalc query string. Keyword arguments are expected to specify the query. | |
| See [https://cms-service-lumi.web.cern.ch/cms-service-lumi/brilwsdoc.html#brilcalc] for more info. | |
| ''' | |
| def kwArgs(kw:str, flag:str) -> str: | |
| if kw in kwargs: | |
| val = kwargs.get(kw) | |
| if isinstance(val, bool): return f'{flag} ' | |
| else: return f'{flag} {shlex.quote(str(val))} ' | |
| else: return '' | |
| query = 'brilcalc lumi ' | |
| # SELECTIONS | |
| query += kwArgs('f', '-f') # f=6666 | |
| query += kwArgs('fill', '-f') # fill=6666 | |
| query += kwArgs('r', '-r') # r=325000 | |
| query += kwArgs('run', '-r') # run=325000 | |
| query += kwArgs('i', '-i') # i='{325000:[[0,10],[100,200]]}' | |
| query += kwArgs('ls', '-i') # ls='{325000:[[0,10],[100,200]]}' | |
| query += kwArgs('begin', '--begin') # begin='05/10/18 21:21:40' | |
| query += kwArgs('end', '--end') # end='05/10/18 22:43:52' | |
| if 'year' in kwargs: | |
| YY = str(kwargs.get('year'))[2:4] | |
| query += f'--begin "01/01/{YY} 00:00:00" --end "12/31/{YY} 23:59:59" ' # year=2018 | |
| # FILTERS | |
| query += kwArgs('b', '-b') # beamstatus='STABLE BEAMS' # Choices: ["STABLE BEAMS", "FLAT TOP","ADJUST","SQUEEZE"] | |
| query += kwArgs('beamstatus', '-b') # beamstatus='STABLE BEAMS' # Choices: ["STABLE BEAMS", "FLAT TOP","ADJUST","SQUEEZE"] | |
| query += kwArgs('amodetag', '--amodetag') # amodetag='PROTPHYS' # Choices ["PROTPHYS","IONPHYS", "PAPHYS"] | |
| query += kwArgs('machinemode', '--amodetag') # machinemode='PROTPHYS' # Choices ["PROTPHYS","IONPHYS", "PAPHYS"] | |
| query += kwArgs('beamenergy', '--beamenergy') # beamenergy=6500 # in GeV | |
| # OUTPUT | |
| query += kwArgs('o', '-o') # o='file.csv' # output csv filename (incompatible with brilcalcDataFrame()) | |
| query += kwArgs('outputfile', '-o') # outputfile='file.csv' # output csv filename (incompatible with brilcalcDataFrame()) | |
| if outputstyle: query += f'--output-style {outputstyle} ' # outputstyle='csv' # stdout output format. Choices: ["tab","csv","html"] (brilcalcDataFrame() requires 'csv') | |
| query += kwArgs('n', '-n') # n=10 # apply the scalefactor 1./n to the output | |
| query += kwArgs('scalefactor', '-n') # scalefactor=10 # apply the scalefactor 1./n to the output | |
| if cerntime: query += '--cerntime ' # cerntime=False # display results in cern local time (brilcalcDataFrame() requires False) | |
| if tssec: query += '--tssec ' # tssec=True # display result time field in unix timestamp (brilcalcDataFrame() requires True) | |
| # LUMI OPTIONS | |
| query += kwArgs('filedata', '--filedata') # filedata='/brildata/vdmdata18/6666' # input file(s) in hdf5 format | |
| query += kwArgs('byls', '--byls') # byls=True # show luminosity and average pileup by lumi section | |
| query += kwArgs('xing', '--xing') # xing=True # show luminosity by lumi section and per bunch crossing | |
| query += kwArgs('xingId', '--xingId') # xingId='1,2,3' # select bunch(es) by id | |
| query += kwArgs('xingTr', '--xingTr') # xingTr=0.85 # select bunch(es) with luminosity above the specified fraction of the max bx lumi | |
| query += kwArgs('xingMin', '--xingMin') # xingMin=1 # select bunch(es) with luminosity greater than the specified value | |
| query += kwArgs('normtag', '--normtag') # normtag='normtag_PHYSICS.json' # apply calibration/correction function defined by a tag | |
| if unit: query += f'-u {unit} ' # unit='/fb' # show luminosity in the specified unit and scale the output value accordingly | |
| query += kwArgs('type', '--type') # type='pltzero' # show results from the selected luminometer. Choices: ["hfoc","hfet","bcm1f","bcm1fsi","bcm1futca","pltzero","pltslink","dt","pxl","ramses","radmon"] | |
| query += kwArgs('without_correction', '--without-correction') # without_correction=True # show raw data taken by a specific luminometer (requires 'type' option) | |
| query += kwArgs('datatag', '--datatag') # datatag='v1' # specify version of lumi and beam data | |
| query += kwArgs('precision', '--precision') # precision='9f' # define the luminosity value output format (f=float, e=scientific notation) and precision (integer) | |
| query += kwArgs('hltpath', '--hltpath') # hltpath='HLT_ZeroBias_v6' # hlt path name or pattern | |
| query += kwArgs('ignore_mask', '--ignore-mask') # ignore_mask='True' # switch off the effect of L1 bit masks | |
| query += kwArgs('without_checkjson', '--without-checkjson') # without_checkjson='True' # switch off cross-checking with the json selection | |
| query += kwArgs('minBiasXsec', '--minBiasXsec') # minBiasXsec=80000.0 # specify minimum bias cross section (ub) to use when calculating the average pileup column for the --byls output | |
| return query[:-1] | |
| def brilcalcLumiPOG(year:int=None, golden:str='delivered', **kwargs) -> str: | |
| ''' | |
| Format brilcalc query string with LumiPOG recommendations | |
| The "golden" argument expects one of the following strings: ["delivered","legacy","prelegacy"]. | |
| See [https://twiki.cern.ch/twiki/bin/view/CMS/TWikiLUM] for more info. | |
| ''' | |
| lumipog = '/cvmfs/cms-bril.cern.ch/cms-lumi-pog/Normtags' | |
| normtagBRIL = f'{lumipog}/normtag_BRIL.json' | |
| normtagPHYSICS = f'{lumipog}/normtag_PHYSICS.json' | |
| COMMDQM = '/afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification' | |
| if year == 2015: | |
| legacy = f'{COMMDQM}/Collisions15/13TeV/Reprocessing/Cert_13TeV_16Dec2015ReReco_Collisions15_25ns_JSON_v2.txt' | |
| prelegacy = legacy | |
| elif year == 2016: | |
| legacy = f'{COMMDQM}/Collisions16/13TeV/Legacy_2016/Cert_271036-284044_13TeV_Legacy2016_Collisions16_JSON.txt' | |
| prelegacy = f'{COMMDQM}/Collisions16/13TeV/ReReco/Final/Cert_271036-284044_13TeV_ReReco_07Aug2017_Collisions16_JSON.txt' | |
| elif year == 2017: | |
| legacy = f'{COMMDQM}/Collisions17/13TeV/Legacy_2017/Cert_294927-306462_13TeV_UL2017_Collisions17_GoldenJSON.txt' | |
| prelegacy = f'{COMMDQM}/Collisions17/13TeV/ReReco/Cert_294927-306462_13TeV_EOY2017ReReco_Collisions17_JSON_v1.txt' | |
| elif year == 2018: | |
| legacy = f'{COMMDQM}/Collisions18/13TeV/Legacy_2018/Cert_314472-325175_13TeV_Legacy2018_Collisions18_JSON.txt' | |
| prelegacy = f'{COMMDQM}/Collisions18/13TeV/ReReco/Cert_314472-325175_13TeV_17SeptEarlyReReco2018ABC_PromptEraD_Collisions18_JSON.txt' | |
| if golden == 'delivered': | |
| query = brilcalcLumiQuery(year=year, beamstatus='STABLE BEAMS', amodetag='PROTPHYS', beamenergy=6500, normtag=normtagBRIL) | |
| elif golden == 'legacy': | |
| query = brilcalcLumiQuery(year=year, normtag=normtagPHYSICS, i=legacy) | |
| elif golden == 'prelegacy': | |
| query = brilcalcLumiQuery(year=year, normtag=normtagPHYSICS, i=prelegacy) | |
| return query | |
| def brilcalcDataFrame(query:str, perFill:bool=False, unit:str='/fb') -> pandas.DataFrame: | |
| ''' | |
| Query brilcalc and return as a pandas.DataFrame. | |
| If 'perFill' argument is set to True, data will be grouped per-fill as opposed to per-run (only works when querying multiple fills) | |
| ''' | |
| print(query) | |
| brilcalc = os.popen(query).read().splitlines() | |
| try: | |
| columns = brilcalc[1][1:].split(',') # drop leading '#' from header line and split by commas into a list of strings | |
| data = (line.split(',') for line in brilcalc if not line.startswith('#')) # skip lines starting with '#' (norm tag version, column headers, and summary) | |
| df = pandas.DataFrame(data=data, columns=columns) | |
| if 'run:fill' in df.columns: | |
| runFill = df['run:fill'].str.split(':',expand=True).rename(columns={0:'run', 1:'fill'}) # split 'fill:run' into a two-column dataframe | |
| df = runFill.join(df).drop(columns='run:fill') # prepend 'run' and 'fill' columns and drop 'fill:run' column | |
| df = df.apply(pandas.to_numeric, errors='ignore') # infer dtype and convert columns to int64 or float64 accordingly | |
| df.insert(df.columns.get_loc('time'), 'dt', pandas.to_datetime(df['time'], unit='s', utc=True)) # convert df['time'] to datetime64 dtype and insert as new 'dt' column | |
| if perFill: | |
| aggDict = {'dt':'min','time':'min','run':list,'nls':list,'ncms':list,f'delivered({unit})':'sum',f'recorded({unit})':'sum'} | |
| df = df.groupby('fill').agg(aggDict).reset_index() | |
| except IndexError: | |
| df = brilcalc | |
| return df | |
| def examples(): | |
| pltFill6666 = brilcalcDataFrame(brilcalcLumiQuery(fill=6666, beamstatus='STABLE BEAMS', type='pltzero', byls=True)) | |
| run325000perBunch = brilcalcDataFrame(brilcalcLumiQuery(run=325000, beamstatus='STABLE BEAMS', xing=True, xingTr=0.85)) | |
| run325000ZeroBias = brilcalcDataFrame(brilcalcLumiQuery(r=325000, byls=True, unit=False, minBiasXsec=80000, type='hfet', precision='2f', hltpath='HLT_ZeroBias_v6')) | |
| def run2(golden:str='delivered', unit:str='/fb') -> pandas.DataFrame: | |
| ''' | |
| Calculate total delivered/recorded lumi for run2 to reproduce results [here](https://twiki.cern.ch/twiki/bin/view/CMS/TWikiLUM#SummaryTable) | |
| The "golden" argument expects one of the following strings: ["delivered","legacy","prelegacy"]. | |
| ''' | |
| df2015 = brilcalcDataFrame(brilcalcLumiPOG(year=2015, golden=golden)) | |
| df2016 = brilcalcDataFrame(brilcalcLumiPOG(year=2016, golden=golden)) | |
| df2017 = brilcalcDataFrame(brilcalcLumiPOG(year=2017, golden=golden)) | |
| df2018 = brilcalcDataFrame(brilcalcLumiPOG(year=2018, golden=golden)) | |
| df = pandas.concat([df2015,df2016,df2017,df2018]) | |
| if golden == 'delivered': col = f'delivered({unit})' | |
| elif (golden == 'legacy') or (golden == 'prelegacy'): col = f'recorded({unit})' | |
| def colSum(df:pandas.DataFrame, col:str) -> float: return df[col].sum() | |
| print(f'total 2015 {golden} lumi: {colSum(df2015,col)} 1{unit}') | |
| print(f'total 2016 {golden} lumi: {colSum(df2016,col)} 1{unit}') | |
| print(f'total 2017 {golden} lumi: {colSum(df2017,col)} 1{unit}') | |
| print(f'total 2018 {golden} lumi: {colSum(df2018,col)} 1{unit}') | |
| print(f'total Run2 {golden} lumi: {colSum(df,col)} 1{unit}') | |
| df2016_2018 = df[(df['dt'] > '2016-01-01') & (df['dt'] < '2018-12-31')] | |
| print(f'total 2016-2018 {golden} lumi: {colSum(df2016_2018,col)} 1{unit}') | |
| return df | |
| def intLumiPerFill(unit:str='/fb') -> pandas.DataFrame: | |
| '''Query brilcalc for all run2, group into fills, and add columns for cumulative delivered/recorded lumi''' | |
| df = pandas.DataFrame() | |
| for year in [2015,2016,2017,2018]: | |
| df = pandas.concat([df, brilcalcDataFrame(brilcalcLumiPOG(year=year, golden='delivered'), perFill=True, unit=unit)]) | |
| df[[f'intDelivered({unit})',f'intRecorded({unit})']] = df[[f'delivered({unit})',f'recorded({unit})']].cumsum() | |
| return df | |
| def main(): | |
| run2(golden='delivered') | |
| run2(golden='legacy') | |
| run2(golden='prelegacy') | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment