Skip to content

Instantly share code, notes, and snippets.

@delannoy
Last active April 17, 2021 01:56
Show Gist options
  • Select an option

  • Save delannoy/ad959e9f1bf4510f919a088e72c4f87a to your computer and use it in GitHub Desktop.

Select an option

Save delannoy/ad959e9f1bf4510f919a088e72c4f87a to your computer and use it in GitHub Desktop.
Return arbitrary `brilcalc lumi` query as a pandas.DataFrame
#!/usr/bin/env python3
import os
import pandas
import shlex
def brilcalcLumiQuery(outputstyle:str='csv', cerntime:bool=False, tssec:bool=True, unit:str='/fb', **kwargs) -> str:
'''
Format brilcalc query string. Keyword arguments are expected to specify the query.
See [https://cms-service-lumi.web.cern.ch/cms-service-lumi/brilwsdoc.html#brilcalc] for more info.
'''
def kwArgs(kw:str, flag:str) -> str:
if kw in kwargs:
val = kwargs.get(kw)
if isinstance(val, bool): return f'{flag} '
else: return f'{flag} {shlex.quote(str(val))} '
else: return ''
query = 'brilcalc lumi '
# SELECTIONS
query += kwArgs('f', '-f') # f=6666
query += kwArgs('fill', '-f') # fill=6666
query += kwArgs('r', '-r') # r=325000
query += kwArgs('run', '-r') # run=325000
query += kwArgs('i', '-i') # i='{325000:[[0,10],[100,200]]}'
query += kwArgs('ls', '-i') # ls='{325000:[[0,10],[100,200]]}'
query += kwArgs('begin', '--begin') # begin='05/10/18 21:21:40'
query += kwArgs('end', '--end') # end='05/10/18 22:43:52'
if 'year' in kwargs:
YY = str(kwargs.get('year'))[2:4]
query += f'--begin "01/01/{YY} 00:00:00" --end "12/31/{YY} 23:59:59" ' # year=2018
# FILTERS
query += kwArgs('b', '-b') # beamstatus='STABLE BEAMS' # Choices: ["STABLE BEAMS", "FLAT TOP","ADJUST","SQUEEZE"]
query += kwArgs('beamstatus', '-b') # beamstatus='STABLE BEAMS' # Choices: ["STABLE BEAMS", "FLAT TOP","ADJUST","SQUEEZE"]
query += kwArgs('amodetag', '--amodetag') # amodetag='PROTPHYS' # Choices ["PROTPHYS","IONPHYS", "PAPHYS"]
query += kwArgs('machinemode', '--amodetag') # machinemode='PROTPHYS' # Choices ["PROTPHYS","IONPHYS", "PAPHYS"]
query += kwArgs('beamenergy', '--beamenergy') # beamenergy=6500 # in GeV
# OUTPUT
query += kwArgs('o', '-o') # o='file.csv' # output csv filename (incompatible with brilcalcDataFrame())
query += kwArgs('outputfile', '-o') # outputfile='file.csv' # output csv filename (incompatible with brilcalcDataFrame())
if outputstyle: query += f'--output-style {outputstyle} ' # outputstyle='csv' # stdout output format. Choices: ["tab","csv","html"] (brilcalcDataFrame() requires 'csv')
query += kwArgs('n', '-n') # n=10 # apply the scalefactor 1./n to the output
query += kwArgs('scalefactor', '-n') # scalefactor=10 # apply the scalefactor 1./n to the output
if cerntime: query += '--cerntime ' # cerntime=False # display results in cern local time (brilcalcDataFrame() requires False)
if tssec: query += '--tssec ' # tssec=True # display result time field in unix timestamp (brilcalcDataFrame() requires True)
# LUMI OPTIONS
query += kwArgs('filedata', '--filedata') # filedata='/brildata/vdmdata18/6666' # input file(s) in hdf5 format
query += kwArgs('byls', '--byls') # byls=True # show luminosity and average pileup by lumi section
query += kwArgs('xing', '--xing') # xing=True # show luminosity by lumi section and per bunch crossing
query += kwArgs('xingId', '--xingId') # xingId='1,2,3' # select bunch(es) by id
query += kwArgs('xingTr', '--xingTr') # xingTr=0.85 # select bunch(es) with luminosity above the specified fraction of the max bx lumi
query += kwArgs('xingMin', '--xingMin') # xingMin=1 # select bunch(es) with luminosity greater than the specified value
query += kwArgs('normtag', '--normtag') # normtag='normtag_PHYSICS.json' # apply calibration/correction function defined by a tag
if unit: query += f'-u {unit} ' # unit='/fb' # show luminosity in the specified unit and scale the output value accordingly
query += kwArgs('type', '--type') # type='pltzero' # show results from the selected luminometer. Choices: ["hfoc","hfet","bcm1f","bcm1fsi","bcm1futca","pltzero","pltslink","dt","pxl","ramses","radmon"]
query += kwArgs('without_correction', '--without-correction') # without_correction=True # show raw data taken by a specific luminometer (requires 'type' option)
query += kwArgs('datatag', '--datatag') # datatag='v1' # specify version of lumi and beam data
query += kwArgs('precision', '--precision') # precision='9f' # define the luminosity value output format (f=float, e=scientific notation) and precision (integer)
query += kwArgs('hltpath', '--hltpath') # hltpath='HLT_ZeroBias_v6' # hlt path name or pattern
query += kwArgs('ignore_mask', '--ignore-mask') # ignore_mask='True' # switch off the effect of L1 bit masks
query += kwArgs('without_checkjson', '--without-checkjson') # without_checkjson='True' # switch off cross-checking with the json selection
query += kwArgs('minBiasXsec', '--minBiasXsec') # minBiasXsec=80000.0 # specify minimum bias cross section (ub) to use when calculating the average pileup column for the --byls output
return query[:-1]
def brilcalcLumiPOG(year:int=None, golden:str='delivered', **kwargs) -> str:
'''
Format brilcalc query string with LumiPOG recommendations
The "golden" argument expects one of the following strings: ["delivered","legacy","prelegacy"].
See [https://twiki.cern.ch/twiki/bin/view/CMS/TWikiLUM] for more info.
'''
lumipog = '/cvmfs/cms-bril.cern.ch/cms-lumi-pog/Normtags'
normtagBRIL = f'{lumipog}/normtag_BRIL.json'
normtagPHYSICS = f'{lumipog}/normtag_PHYSICS.json'
COMMDQM = '/afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification'
if year == 2015:
legacy = f'{COMMDQM}/Collisions15/13TeV/Reprocessing/Cert_13TeV_16Dec2015ReReco_Collisions15_25ns_JSON_v2.txt'
prelegacy = legacy
elif year == 2016:
legacy = f'{COMMDQM}/Collisions16/13TeV/Legacy_2016/Cert_271036-284044_13TeV_Legacy2016_Collisions16_JSON.txt'
prelegacy = f'{COMMDQM}/Collisions16/13TeV/ReReco/Final/Cert_271036-284044_13TeV_ReReco_07Aug2017_Collisions16_JSON.txt'
elif year == 2017:
legacy = f'{COMMDQM}/Collisions17/13TeV/Legacy_2017/Cert_294927-306462_13TeV_UL2017_Collisions17_GoldenJSON.txt'
prelegacy = f'{COMMDQM}/Collisions17/13TeV/ReReco/Cert_294927-306462_13TeV_EOY2017ReReco_Collisions17_JSON_v1.txt'
elif year == 2018:
legacy = f'{COMMDQM}/Collisions18/13TeV/Legacy_2018/Cert_314472-325175_13TeV_Legacy2018_Collisions18_JSON.txt'
prelegacy = f'{COMMDQM}/Collisions18/13TeV/ReReco/Cert_314472-325175_13TeV_17SeptEarlyReReco2018ABC_PromptEraD_Collisions18_JSON.txt'
if golden == 'delivered':
query = brilcalcLumiQuery(year=year, beamstatus='STABLE BEAMS', amodetag='PROTPHYS', beamenergy=6500, normtag=normtagBRIL)
elif golden == 'legacy':
query = brilcalcLumiQuery(year=year, normtag=normtagPHYSICS, i=legacy)
elif golden == 'prelegacy':
query = brilcalcLumiQuery(year=year, normtag=normtagPHYSICS, i=prelegacy)
return query
def brilcalcDataFrame(query:str, perFill:bool=False, unit:str='/fb') -> pandas.DataFrame:
'''
Query brilcalc and return as a pandas.DataFrame.
If 'perFill' argument is set to True, data will be grouped per-fill as opposed to per-run (only works when querying multiple fills)
'''
print(query)
brilcalc = os.popen(query).read().splitlines()
try:
columns = brilcalc[1][1:].split(',') # drop leading '#' from header line and split by commas into a list of strings
data = (line.split(',') for line in brilcalc if not line.startswith('#')) # skip lines starting with '#' (norm tag version, column headers, and summary)
df = pandas.DataFrame(data=data, columns=columns)
if 'run:fill' in df.columns:
runFill = df['run:fill'].str.split(':',expand=True).rename(columns={0:'run', 1:'fill'}) # split 'fill:run' into a two-column dataframe
df = runFill.join(df).drop(columns='run:fill') # prepend 'run' and 'fill' columns and drop 'fill:run' column
df = df.apply(pandas.to_numeric, errors='ignore') # infer dtype and convert columns to int64 or float64 accordingly
df.insert(df.columns.get_loc('time'), 'dt', pandas.to_datetime(df['time'], unit='s', utc=True)) # convert df['time'] to datetime64 dtype and insert as new 'dt' column
if perFill:
aggDict = {'dt':'min','time':'min','run':list,'nls':list,'ncms':list,f'delivered({unit})':'sum',f'recorded({unit})':'sum'}
df = df.groupby('fill').agg(aggDict).reset_index()
except IndexError:
df = brilcalc
return df
def examples():
pltFill6666 = brilcalcDataFrame(brilcalcLumiQuery(fill=6666, beamstatus='STABLE BEAMS', type='pltzero', byls=True))
run325000perBunch = brilcalcDataFrame(brilcalcLumiQuery(run=325000, beamstatus='STABLE BEAMS', xing=True, xingTr=0.85))
run325000ZeroBias = brilcalcDataFrame(brilcalcLumiQuery(r=325000, byls=True, unit=False, minBiasXsec=80000, type='hfet', precision='2f', hltpath='HLT_ZeroBias_v6'))
def run2(golden:str='delivered', unit:str='/fb') -> pandas.DataFrame:
'''
Calculate total delivered/recorded lumi for run2 to reproduce results [here](https://twiki.cern.ch/twiki/bin/view/CMS/TWikiLUM#SummaryTable)
The "golden" argument expects one of the following strings: ["delivered","legacy","prelegacy"].
'''
df2015 = brilcalcDataFrame(brilcalcLumiPOG(year=2015, golden=golden))
df2016 = brilcalcDataFrame(brilcalcLumiPOG(year=2016, golden=golden))
df2017 = brilcalcDataFrame(brilcalcLumiPOG(year=2017, golden=golden))
df2018 = brilcalcDataFrame(brilcalcLumiPOG(year=2018, golden=golden))
df = pandas.concat([df2015,df2016,df2017,df2018])
if golden == 'delivered': col = f'delivered({unit})'
elif (golden == 'legacy') or (golden == 'prelegacy'): col = f'recorded({unit})'
def colSum(df:pandas.DataFrame, col:str) -> float: return df[col].sum()
print(f'total 2015 {golden} lumi: {colSum(df2015,col)} 1{unit}')
print(f'total 2016 {golden} lumi: {colSum(df2016,col)} 1{unit}')
print(f'total 2017 {golden} lumi: {colSum(df2017,col)} 1{unit}')
print(f'total 2018 {golden} lumi: {colSum(df2018,col)} 1{unit}')
print(f'total Run2 {golden} lumi: {colSum(df,col)} 1{unit}')
df2016_2018 = df[(df['dt'] > '2016-01-01') & (df['dt'] < '2018-12-31')]
print(f'total 2016-2018 {golden} lumi: {colSum(df2016_2018,col)} 1{unit}')
return df
def intLumiPerFill(unit:str='/fb') -> pandas.DataFrame:
'''Query brilcalc for all run2, group into fills, and add columns for cumulative delivered/recorded lumi'''
df = pandas.DataFrame()
for year in [2015,2016,2017,2018]:
df = pandas.concat([df, brilcalcDataFrame(brilcalcLumiPOG(year=year, golden='delivered'), perFill=True, unit=unit)])
df[[f'intDelivered({unit})',f'intRecorded({unit})']] = df[[f'delivered({unit})',f'recorded({unit})']].cumsum()
return df
def main():
run2(golden='delivered')
run2(golden='legacy')
run2(golden='prelegacy')
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment