Skip to content

Instantly share code, notes, and snippets.

@wesslen
Created November 17, 2024 20:11
Show Gist options
  • Select an option

  • Save wesslen/77cb22d955fc7c40218e2a3fdb7ecf65 to your computer and use it in GitHub Desktop.

Select an option

Save wesslen/77cb22d955fc7c40218e2a3fdb7ecf65 to your computer and use it in GitHub Desktop.
mcmc-elo-prompt-tournament.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"authorship_tag": "ABX9TyMjENF3jS/HrjU5gAnnm+OL",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"f1687c78b82044f9861546ff33513c03": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_00dd2af8d8f34ad3998153680e7e2ff8",
"IPY_MODEL_5c6b3fe72e9043b4a2f8e1f2a544e445"
],
"layout": "IPY_MODEL_80c6300ec0e44fd7a92c34aebf78bfbc"
}
},
"00dd2af8d8f34ad3998153680e7e2ff8": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ButtonModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ButtonModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ButtonView",
"button_style": "",
"description": "Choose A",
"disabled": false,
"icon": "",
"layout": "IPY_MODEL_f15d537a748f4c978683d34fa0c5890b",
"style": "IPY_MODEL_b43e5e46d77b4caab5538caeeddf6315",
"tooltip": ""
}
},
"5c6b3fe72e9043b4a2f8e1f2a544e445": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ButtonModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ButtonModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ButtonView",
"button_style": "",
"description": "Choose B",
"disabled": false,
"icon": "",
"layout": "IPY_MODEL_a99c1b3d3e7d4d12a6c898f82652c170",
"style": "IPY_MODEL_cd5deecd8de64888bda7709f93fc8678",
"tooltip": ""
}
},
"80c6300ec0e44fd7a92c34aebf78bfbc": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"f15d537a748f4c978683d34fa0c5890b": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"b43e5e46d77b4caab5538caeeddf6315": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ButtonStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ButtonStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"button_color": null,
"font_weight": ""
}
},
"a99c1b3d3e7d4d12a6c898f82652c170": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"cd5deecd8de64888bda7709f93fc8678": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ButtonStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ButtonStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"button_color": null,
"font_weight": ""
}
},
"898bbcc9208848da850f727238b9435e": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_917c2f23b1b84cb3b45789267f9efa4d",
"IPY_MODEL_b6f6374543624c40ab6eabfeda62bdf8"
],
"layout": "IPY_MODEL_dc7986e83f9f4cd69265bd3f6c46a7ea"
}
},
"917c2f23b1b84cb3b45789267f9efa4d": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ButtonModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ButtonModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ButtonView",
"button_style": "",
"description": "Choose A",
"disabled": false,
"icon": "",
"layout": "IPY_MODEL_0c4f64ce7c4d475ea4e1ab3bc718309e",
"style": "IPY_MODEL_e6c02811c55844d3b98dbf931e29bce8",
"tooltip": ""
}
},
"b6f6374543624c40ab6eabfeda62bdf8": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ButtonModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ButtonModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ButtonView",
"button_style": "",
"description": "Choose B",
"disabled": false,
"icon": "",
"layout": "IPY_MODEL_73575953daf04cceab37e8cb34c49a96",
"style": "IPY_MODEL_1dca3aaade1c47d18b57f463d5c4ae6e",
"tooltip": ""
}
},
"dc7986e83f9f4cd69265bd3f6c46a7ea": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"0c4f64ce7c4d475ea4e1ab3bc718309e": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"e6c02811c55844d3b98dbf931e29bce8": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ButtonStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ButtonStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"button_color": null,
"font_weight": ""
}
},
"73575953daf04cceab37e8cb34c49a96": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"1dca3aaade1c47d18b57f463d5c4ae6e": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ButtonStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ButtonStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"button_color": null,
"font_weight": ""
}
},
"2d2cf0f7fd534250a0dabb8d2f47c182": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_775b6a052bd04ce992aec3867c3e067c",
"IPY_MODEL_853e39c321c1492ea166d2fe50f070d1"
],
"layout": "IPY_MODEL_ca2b670185d34e268091dad81942d6fe"
}
},
"775b6a052bd04ce992aec3867c3e067c": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ButtonModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ButtonModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ButtonView",
"button_style": "",
"description": "Choose A",
"disabled": false,
"icon": "",
"layout": "IPY_MODEL_906d5d4a8dcc434dad8af3a50e42a957",
"style": "IPY_MODEL_38638d7eccc740fd8fa3fd3ee63579f4",
"tooltip": ""
}
},
"853e39c321c1492ea166d2fe50f070d1": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ButtonModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ButtonModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ButtonView",
"button_style": "",
"description": "Choose B",
"disabled": false,
"icon": "",
"layout": "IPY_MODEL_8b1359cc7b4947f1ba39145553ce196f",
"style": "IPY_MODEL_d7c72493913d412e91652740556aedc3",
"tooltip": ""
}
},
"ca2b670185d34e268091dad81942d6fe": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"906d5d4a8dcc434dad8af3a50e42a957": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"38638d7eccc740fd8fa3fd3ee63579f4": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ButtonStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ButtonStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"button_color": null,
"font_weight": ""
}
},
"8b1359cc7b4947f1ba39145553ce196f": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"d7c72493913d412e91652740556aedc3": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ButtonStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ButtonStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"button_color": null,
"font_weight": ""
}
},
"0eef207e7f0e4c5e98da0029baf58c60": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_a3b1e471ae6f4f458aa9bd1f72a2abfe",
"IPY_MODEL_080f55a633864117a7d73b65d5ad37cb"
],
"layout": "IPY_MODEL_506aba7d82594dd9b12be82f995f7546"
}
},
"a3b1e471ae6f4f458aa9bd1f72a2abfe": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ButtonModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ButtonModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ButtonView",
"button_style": "",
"description": "Choose A",
"disabled": false,
"icon": "",
"layout": "IPY_MODEL_30c7b72a36644a5d85cb2a8c4be8e79c",
"style": "IPY_MODEL_b6eaa052e26f4a0ea71b32a4b8eac871",
"tooltip": ""
}
},
"080f55a633864117a7d73b65d5ad37cb": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ButtonModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ButtonModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ButtonView",
"button_style": "",
"description": "Choose B",
"disabled": false,
"icon": "",
"layout": "IPY_MODEL_3cb97a70f85842399d7ce9c7cd7046a6",
"style": "IPY_MODEL_7c236e9a26d74e908154cb17e013d279",
"tooltip": ""
}
},
"506aba7d82594dd9b12be82f995f7546": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"30c7b72a36644a5d85cb2a8c4be8e79c": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"b6eaa052e26f4a0ea71b32a4b8eac871": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ButtonStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ButtonStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"button_color": null,
"font_weight": ""
}
},
"3cb97a70f85842399d7ce9c7cd7046a6": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"7c236e9a26d74e908154cb17e013d279": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ButtonStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ButtonStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"button_color": null,
"font_weight": ""
}
},
"760bf461bae34871b0775669558a0c50": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_19ef9fc5a8d643e897c88526fa7ceb66",
"IPY_MODEL_7e86436da1bd4b70a4d76646bdd2b68f"
],
"layout": "IPY_MODEL_1fa4701f793947a9817bbeb4c051900d"
}
},
"19ef9fc5a8d643e897c88526fa7ceb66": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ButtonModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ButtonModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ButtonView",
"button_style": "",
"description": "Choose A",
"disabled": false,
"icon": "",
"layout": "IPY_MODEL_f8e30cb90fcb48deb851db881e267912",
"style": "IPY_MODEL_eacc6723d110440cb2f2c9b6cd05ee37",
"tooltip": ""
}
},
"7e86436da1bd4b70a4d76646bdd2b68f": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ButtonModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ButtonModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ButtonView",
"button_style": "",
"description": "Choose B",
"disabled": false,
"icon": "",
"layout": "IPY_MODEL_c1a40fa15a534c7da4cd8f0816e34138",
"style": "IPY_MODEL_fc495f4d82884a4c856bd8e534bc2b97",
"tooltip": ""
}
},
"1fa4701f793947a9817bbeb4c051900d": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"f8e30cb90fcb48deb851db881e267912": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"eacc6723d110440cb2f2c9b6cd05ee37": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ButtonStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ButtonStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"button_color": null,
"font_weight": ""
}
},
"c1a40fa15a534c7da4cd8f0816e34138": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"fc495f4d82884a4c856bd8e534bc2b97": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ButtonStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ButtonStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"button_color": null,
"font_weight": ""
}
},
"82973072c9b147debfa730177b775f09": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_f8e123e03af444058d06bd448aa5ae74",
"IPY_MODEL_47846536bf544f9298eeebd25e236e16"
],
"layout": "IPY_MODEL_302c2c096b064cc2b3741a38ee328acd"
}
},
"f8e123e03af444058d06bd448aa5ae74": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ButtonModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ButtonModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ButtonView",
"button_style": "",
"description": "Choose A",
"disabled": false,
"icon": "",
"layout": "IPY_MODEL_377ee6bc07b54c5cbd6e7bf8e5ce77fd",
"style": "IPY_MODEL_9519471aaa924ee6bf40183b8cd32875",
"tooltip": ""
}
},
"47846536bf544f9298eeebd25e236e16": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ButtonModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ButtonModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ButtonView",
"button_style": "",
"description": "Choose B",
"disabled": false,
"icon": "",
"layout": "IPY_MODEL_40acca99ea764aaf8ba18ec217623b2d",
"style": "IPY_MODEL_cd8594659a604985aa606180e6d7bcb9",
"tooltip": ""
}
},
"302c2c096b064cc2b3741a38ee328acd": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"377ee6bc07b54c5cbd6e7bf8e5ce77fd": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"9519471aaa924ee6bf40183b8cd32875": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ButtonStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ButtonStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"button_color": null,
"font_weight": ""
}
},
"40acca99ea764aaf8ba18ec217623b2d": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"cd8594659a604985aa606180e6d7bcb9": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ButtonStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ButtonStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"button_color": null,
"font_weight": ""
}
}
}
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/wesslen/77cb22d955fc7c40218e2a3fdb7ecf65/mcmc-elo-prompt-tournament.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "88b6odtAn6Wg",
"outputId": "8cf1f640-3802-48cb-d358-ac02d5979bb9"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\u001b[2mUsing Python 3.10.12 environment at /usr\u001b[0m\n",
"\u001b[2mAudited \u001b[1m3 packages\u001b[0m \u001b[2min 81ms\u001b[0m\u001b[0m\n"
]
}
],
"source": [
"!uv pip install --system anthropic jinja2 ipywidgets"
]
},
{
"cell_type": "code",
"source": [
"import os\n",
"from google.colab import userdata\n",
"\n",
"os.environ['ANTHROPIC_API_KEY'] = userdata.get('ANTHROPIC_API_KEY')"
],
"metadata": {
"id": "NbutMZKyoDNu"
},
"execution_count": 10,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# First install required packages\n",
"import anthropic\n",
"import numpy as np\n",
"from dataclasses import dataclass\n",
"from typing import List, Tuple, Optional, Dict\n",
"import matplotlib.pyplot as plt\n",
"from jinja2 import Template\n",
"import ipywidgets as widgets\n",
"from IPython.display import display, HTML, clear_output\n",
"import json\n",
"import os\n",
"from datetime import datetime\n",
"\n",
"@dataclass\n",
"class PromptTemplate:\n",
" \"\"\"Represents a template for generating prompts\"\"\"\n",
" id: int\n",
" name: str\n",
" template: str\n",
" variables: Dict[str, str]\n",
" current_rating: float = 1500.0\n",
" rating_uncertainty: float = 350.0\n",
"\n",
"@dataclass\n",
"class GeneratedResponse:\n",
" \"\"\"Represents a single response generated from a prompt template\"\"\"\n",
" content: str\n",
" prompt_id: int\n",
" variables_used: Dict[str, str]\n",
" full_prompt: str\n",
"\n",
"class PromptManager:\n",
" \"\"\"Manages prompt templates and generation\"\"\"\n",
"\n",
" def __init__(self):\n",
" self.templates = [\n",
" PromptTemplate(\n",
" id=0,\n",
" name=\"concise_definition\",\n",
" template=\"\"\"Define {{ concept }} in exactly one sentence,\n",
" emphasizing its {{ aspect }}.\"\"\",\n",
" variables={\n",
" \"concept\": \"artificial intelligence\",\n",
" \"aspect\": \"practical applications\"\n",
" }\n",
" ),\n",
" PromptTemplate(\n",
" id=1,\n",
" name=\"analogy_based\",\n",
" template=\"\"\"Create one sentence that explains {{ concept }}\n",
" by comparing it to {{ comparison_object }}.\"\"\",\n",
" variables={\n",
" \"concept\": \"artificial intelligence\",\n",
" \"comparison_object\": \"a human brain\"\n",
" }\n",
" )\n",
" ]\n",
"\n",
" def render_prompt(self, template: PromptTemplate,\n",
" variables: Optional[Dict[str, str]] = None) -> str:\n",
" \"\"\"Render a prompt template with given variables\"\"\"\n",
" vars_to_use = variables if variables else template.variables\n",
" return Template(template.template).render(**vars_to_use)\n",
"\n",
"class ClaudeManager:\n",
" \"\"\"Manages interactions with Claude API\"\"\"\n",
"\n",
" def __init__(self, api_key: str):\n",
" self.client = anthropic.Anthropic(api_key=api_key)\n",
"\n",
" def generate_response(self, prompt: str) -> str:\n",
" \"\"\"Generate a response from Claude\"\"\"\n",
" try:\n",
" response = self.client.messages.create(\n",
" model=\"claude-3-opus-20240229\",\n",
" max_tokens=1024,\n",
" messages=[{\"role\": \"user\", \"content\": prompt}]\n",
" )\n",
" return response.content[0].text\n",
" except Exception as e:\n",
" print(f\"Error generating response: {e}\")\n",
" return \"\"\n",
"\n",
"class MCMCPromptRatingSystem:\n",
" \"\"\"MCMC system for rating prompt templates based on their responses\"\"\"\n",
"\n",
" def __init__(self, templates: List[PromptTemplate],\n",
" temperature: float = 50.0,\n",
" num_samples: int = 1000):\n",
" self.templates = templates\n",
" self.temperature = temperature\n",
" self.num_samples = num_samples\n",
" self.rating_samples = {t.id: [] for t in templates}\n",
"\n",
" def _proposal_step(self, current_ratings: dict) -> Tuple[dict, int, int]:\n",
" \"\"\"Generate proposal by randomly selecting two templates and adjusting their ratings\"\"\"\n",
" proposed_ratings = current_ratings.copy()\n",
"\n",
" # Select two random templates\n",
" template_ids = list(current_ratings.keys())\n",
" i, j = np.random.choice(template_ids, size=2, replace=False)\n",
"\n",
" # Generate proposed rating changes\n",
" delta = np.random.normal(0, self.temperature)\n",
" proposed_ratings[i] += delta\n",
" proposed_ratings[j] -= delta\n",
"\n",
" return proposed_ratings, i, j\n",
"\n",
" def _acceptance_probability(self, current_ratings: dict,\n",
" proposed_ratings: dict,\n",
" i: int, j: int,\n",
" preferences: List[Tuple[int, int]]) -> float:\n",
" \"\"\"Calculate acceptance probability based on how well ratings explain observed preferences\"\"\"\n",
" def likelihood(ratings: dict) -> float:\n",
" log_lik = 0\n",
" for winner_id, loser_id in preferences:\n",
" rating_diff = ratings[winner_id] - ratings[loser_id]\n",
" prob = 1 / (1 + np.exp(-rating_diff / 400))\n",
" log_lik += np.log(prob)\n",
" return log_lik\n",
"\n",
" current_ll = likelihood(current_ratings)\n",
" proposed_ll = likelihood(proposed_ratings)\n",
"\n",
" return np.exp(proposed_ll - current_ll)\n",
"\n",
" def run_mcmc(self, preferences: List[Tuple[int, int]]) -> dict:\n",
" \"\"\"Run MCMC sampling to generate rating distributions\"\"\"\n",
" current_ratings = {t.id: t.current_rating for t in self.templates}\n",
"\n",
" for _ in range(self.num_samples):\n",
" proposed_ratings, i, j = self._proposal_step(current_ratings)\n",
" accept_prob = self._acceptance_probability(\n",
" current_ratings, proposed_ratings, i, j, preferences)\n",
"\n",
" if np.random.random() < accept_prob:\n",
" current_ratings = proposed_ratings\n",
"\n",
" for template_id in current_ratings:\n",
" self.rating_samples[template_id].append(current_ratings[template_id])\n",
"\n",
" return self.rating_samples\n",
"\n",
" def plot_rating_distributions(self):\n",
" \"\"\"Plot the rating distributions for prompt templates\"\"\"\n",
" plt.figure(figsize=(10, 6))\n",
" colors = ['blue', 'red'] # One color for each template\n",
"\n",
" for idx, (template_id, samples) in enumerate(self.rating_samples.items()):\n",
" template = next(t for t in self.templates if t.id == template_id)\n",
" plt.hist(samples, bins=30, alpha=0.3, color=colors[idx],\n",
" label=f'Template: {template.name}')\n",
"\n",
" plt.xlabel('Rating')\n",
" plt.ylabel('Frequency')\n",
" plt.title('Prompt Template Rating Distributions')\n",
" plt.legend()\n",
" plt.show()\n",
"\n",
"class InteractivePromptEvaluator:\n",
" \"\"\"Manages interactive collection of preferences between prompt templates\"\"\"\n",
"\n",
" def __init__(self, claude_manager: ClaudeManager, prompt_manager: PromptManager):\n",
" self.claude_manager = claude_manager\n",
" self.prompt_manager = prompt_manager\n",
" self.responses: Dict[int, List[GeneratedResponse]] = {\n",
" t.id: [] for t in prompt_manager.templates\n",
" }\n",
" self.preferences: List[Tuple[int, int]] = []\n",
" self.current_pair: Optional[Tuple[GeneratedResponse, GeneratedResponse]] = None\n",
"\n",
" def generate_responses(self, num_variations: int = 3):\n",
" \"\"\"Generate responses for each template with slight variations\"\"\"\n",
" for template in self.prompt_manager.templates:\n",
" for _ in range(num_variations):\n",
" # Slightly modify variables\n",
" vars_copy = template.variables.copy()\n",
" if 'concept' in vars_copy:\n",
" concepts = ['machine learning', 'neural networks', 'deep learning']\n",
" vars_copy['concept'] = np.random.choice(concepts)\n",
"\n",
" prompt = self.prompt_manager.render_prompt(template, vars_copy)\n",
" content = self.claude_manager.generate_response(prompt)\n",
"\n",
" response = GeneratedResponse(\n",
" content=content,\n",
" prompt_id=template.id,\n",
" variables_used=vars_copy,\n",
" full_prompt=prompt\n",
" )\n",
" self.responses[template.id].append(response)\n",
"\n",
" def create_comparison_widget(self):\n",
" \"\"\"Create widget for comparing responses from different templates\"\"\"\n",
" # Randomly select responses from different templates\n",
" template_ids = list(self.responses.keys())\n",
" t1, t2 = np.random.choice(template_ids, size=2, replace=False)\n",
"\n",
" resp_1 = np.random.choice(self.responses[t1])\n",
" resp_2 = np.random.choice(self.responses[t2])\n",
"\n",
" self.current_pair = (resp_1, resp_2)\n",
"\n",
" display(HTML(f\"\"\"\n",
" <h3>Compare these responses:</h3>\n",
" <div style='margin: 10px; padding: 10px; border: 1px solid #ccc;'>\n",
" <p><b>Response A (Template: {self.prompt_manager.templates[t1].name})</b></p>\n",
" <p>{resp_1.content}</p>\n",
" <p><i>Generated from: {resp_1.full_prompt}</i></p>\n",
" </div>\n",
" <div style='margin: 10px; padding: 10px; border: 1px solid #ccc;'>\n",
" <p><b>Response B (Template: {self.prompt_manager.templates[t2].name})</b></p>\n",
" <p>{resp_2.content}</p>\n",
" <p><i>Generated from: {resp_2.full_prompt}</i></p>\n",
" </div>\n",
" \"\"\"))\n",
"\n",
" button_a = widgets.Button(description='Choose A')\n",
" button_b = widgets.Button(description='Choose B')\n",
"\n",
" button_a.on_click(lambda _: self._handle_choice('A'))\n",
" button_b.on_click(lambda _: self._handle_choice('B'))\n",
"\n",
" display(widgets.HBox([button_a, button_b]))\n",
"\n",
" def _handle_choice(self, choice: str):\n",
" \"\"\"Handle user's choice between two responses\"\"\"\n",
" if self.current_pair:\n",
" winner = self.current_pair[0] if choice == 'A' else self.current_pair[1]\n",
" loser = self.current_pair[1] if choice == 'A' else self.current_pair[0]\n",
" self.preferences.append((winner.prompt_id, loser.prompt_id))\n",
" clear_output(wait=True)\n",
" print(f\"Recorded preference for template: {self.prompt_manager.templates[winner.prompt_id].name}\")\n",
" self.current_pair = None\n",
"\n",
"def run_interactive_session():\n",
" \"\"\"Main function to run an interactive prompt evaluation session\"\"\"\n",
"\n",
" # Setup\n",
" api_key = os.getenv('ANTHROPIC_API_KEY')\n",
" if not api_key:\n",
" api_key = input(\"Enter your Anthropic API key: \")\n",
" os.environ['ANTHROPIC_API_KEY'] = api_key\n",
"\n",
" claude_manager = ClaudeManager(api_key)\n",
" prompt_manager = PromptManager()\n",
" evaluator = InteractivePromptEvaluator(claude_manager, prompt_manager)\n",
"\n",
" # Generate initial responses\n",
" print(\"Generating responses...\")\n",
" evaluator.generate_responses(num_variations=3)\n",
"\n",
" # Collect preferences\n",
" num_comparisons = 6 # Number of comparisons to make\n",
" print(f\"\\nPlease make {num_comparisons} comparisons:\")\n",
"\n",
" for i in range(num_comparisons):\n",
" print(f\"\\nComparison {i+1}/{num_comparisons}\")\n",
" evaluator.create_comparison_widget()\n",
" input(\"Press Enter after making your choice...\")\n",
" clear_output(wait=True)\n",
"\n",
" # Analyze results with MCMC\n",
" mcmc_system = MCMCPromptRatingSystem(prompt_manager.templates)\n",
" rating_samples = mcmc_system.run_mcmc(evaluator.preferences)\n",
"\n",
" # Display results\n",
" print(\"\\nResults:\")\n",
" for template in prompt_manager.templates:\n",
" samples = rating_samples[template.id]\n",
" mean = np.mean(samples)\n",
" ci_lower = np.percentile(samples, 2.5)\n",
" ci_upper = np.percentile(samples, 97.5)\n",
"\n",
" print(f\"\\nTemplate: {template.name}\")\n",
" print(f\"Mean rating: {mean:.1f}\")\n",
" print(f\"95% credible interval: ({ci_lower:.1f}, {ci_upper:.1f})\")\n",
"\n",
" mcmc_system.plot_rating_distributions()\n",
"\n",
"# Run in Colab\n",
"if __name__ == \"__main__\":\n",
" print(\"Running interactive prompt evaluation session...\")\n",
" run_interactive_session()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 433,
"referenced_widgets": [
"f1687c78b82044f9861546ff33513c03",
"00dd2af8d8f34ad3998153680e7e2ff8",
"5c6b3fe72e9043b4a2f8e1f2a544e445",
"80c6300ec0e44fd7a92c34aebf78bfbc",
"f15d537a748f4c978683d34fa0c5890b",
"b43e5e46d77b4caab5538caeeddf6315",
"a99c1b3d3e7d4d12a6c898f82652c170",
"cd5deecd8de64888bda7709f93fc8678",
"898bbcc9208848da850f727238b9435e",
"917c2f23b1b84cb3b45789267f9efa4d",
"b6f6374543624c40ab6eabfeda62bdf8",
"dc7986e83f9f4cd69265bd3f6c46a7ea",
"0c4f64ce7c4d475ea4e1ab3bc718309e",
"e6c02811c55844d3b98dbf931e29bce8",
"73575953daf04cceab37e8cb34c49a96",
"1dca3aaade1c47d18b57f463d5c4ae6e",
"2d2cf0f7fd534250a0dabb8d2f47c182",
"775b6a052bd04ce992aec3867c3e067c",
"853e39c321c1492ea166d2fe50f070d1",
"ca2b670185d34e268091dad81942d6fe",
"906d5d4a8dcc434dad8af3a50e42a957",
"38638d7eccc740fd8fa3fd3ee63579f4",
"8b1359cc7b4947f1ba39145553ce196f",
"d7c72493913d412e91652740556aedc3",
"0eef207e7f0e4c5e98da0029baf58c60",
"a3b1e471ae6f4f458aa9bd1f72a2abfe",
"080f55a633864117a7d73b65d5ad37cb",
"506aba7d82594dd9b12be82f995f7546",
"30c7b72a36644a5d85cb2a8c4be8e79c",
"b6eaa052e26f4a0ea71b32a4b8eac871",
"3cb97a70f85842399d7ce9c7cd7046a6",
"7c236e9a26d74e908154cb17e013d279",
"760bf461bae34871b0775669558a0c50",
"19ef9fc5a8d643e897c88526fa7ceb66",
"7e86436da1bd4b70a4d76646bdd2b68f",
"1fa4701f793947a9817bbeb4c051900d",
"f8e30cb90fcb48deb851db881e267912",
"eacc6723d110440cb2f2c9b6cd05ee37",
"c1a40fa15a534c7da4cd8f0816e34138",
"fc495f4d82884a4c856bd8e534bc2b97",
"82973072c9b147debfa730177b775f09",
"f8e123e03af444058d06bd448aa5ae74",
"47846536bf544f9298eeebd25e236e16",
"302c2c096b064cc2b3741a38ee328acd",
"377ee6bc07b54c5cbd6e7bf8e5ce77fd",
"9519471aaa924ee6bf40183b8cd32875",
"40acca99ea764aaf8ba18ec217623b2d",
"cd8594659a604985aa606180e6d7bcb9"
]
},
"id": "LRVBE937n6rr",
"outputId": "53d7cc8d-c2cf-48fd-e70a-45ef7d40c168"
},
"execution_count": 12,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Recorded preference for template: analogy_based\n"
]
}
]
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment