Skip to content

Instantly share code, notes, and snippets.

@marquisthunder
Created November 7, 2017 03:43
Show Gist options
  • Select an option

  • Save marquisthunder/4f69af360bb29785a2ce84af1de3e40d to your computer and use it in GitHub Desktop.

Select an option

Save marquisthunder/4f69af360bb29785a2ce84af1de3e40d to your computer and use it in GitHub Desktop.
[pmml]
def pmml_validation(pmml):
"""
Use openscoring to test pmml on validation data and Save the probability result.
"""
headers = {'Content-type': 'text/xml'}
data = bytes(pmml, encoding='utf-8')
rt = requests.put('{}/{}'.format(validate_url, uuid.uuid4()), headers=headers, data=data)
return rt.status_code == 200 or rt.status_code == 201
def verify_pmml(pmml, verifydata, retry=3):
with codecs.open("{}.pmml".format(pmml), "r", encoding="utf-8") as inpmml:
bd = xmltodict.parse(inpmml.read(), encoding="utf-8")
dataFields = [OrderedDict([('@field', dataField['@name']),
('@column', dataField['@name'])])
for dataField in bd["PMML"]["DataDictionary"]["DataField"]]
modelType = None
for field in bd["PMML"].keys():
if "Model" in field:
modelType = field
break
# rebase Output under modelType
if "Segmentation" in bd["PMML"][modelType]:
bd["PMML"][modelType]["Output"] = bd["PMML"][modelType]["Segmentation"]["Segment"][-1]["RegressionModel"]["Output"]
del bd["PMML"][modelType]["Segmentation"]["Segment"][-1]["RegressionModel"]["Output"]
predFields = [OrderedDict([('@field', dataField['@name']),
('@column', "{}_{}".format(dataField['@feature'], dataField['@value']) if '@value' in dataField else dataField['@name'])])
for dataField in bd["PMML"][modelType]['Output']['OutputField']]
allFields = dataFields + predFields
verificationFields = OrderedDict([('VerificationField', allFields)])
retry = min(retry, len(verifydata))
while retry:
retry -= 1
verifydata = DataFrame(data=verifydata)
if len(verifydata) > 3:
kf = KFold(n_splits=len(verifydata)//2)
item = verifydata.iloc[ [i for i in kf.split(verifydata)][0][1]]
else:
item = verifydata.iloc[retry:retry+1]
test = item.to_dict(orient='row')
rows = [OrderedDict([(k, v) for k, v in inst.items()]) for inst in test]
inlineTable = OrderedDict([('row', rows)])
bd["PMML"][modelType]["ModelVerification"] = OrderedDict([('@recordCount', str(len(rows))),
('@fieldCount', str(len(allFields)))])
bd["PMML"][modelType]["ModelVerification"]['VerificationFields'] = verificationFields
bd["PMML"][modelType]["ModelVerification"]['InlineTable'] = inlineTable
serialized = xmltodict.unparse(bd, encoding="utf-8", pretty=True)
if pmml_validation(pmml):
with codecs.open("{}-verified.pmml".format(pmml), "w", encoding="utf-8") as outpmml:
outpmml.write(serialized)
print("verification done")
break
else:
raise RuntimeError("""The JPMML-SkLearn conversion application has failed.
predict_prob precision mismatch""")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment