Created
October 27, 2024 17:57
-
-
Save wesslen/cb9f5782864e9eceb8ade832fa59798d to your computer and use it in GitHub Desktop.
fireworks-text-to-sql-berka.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "nbformat": 4, | |
| "nbformat_minor": 0, | |
| "metadata": { | |
| "colab": { | |
| "provenance": [], | |
| "authorship_tag": "ABX9TyPKPzoItc2NYH5LTuiJJ/ya", | |
| "include_colab_link": true | |
| }, | |
| "kernelspec": { | |
| "name": "python3", | |
| "display_name": "Python 3" | |
| }, | |
| "language_info": { | |
| "name": "python" | |
| } | |
| }, | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "view-in-github", | |
| "colab_type": "text" | |
| }, | |
| "source": [ | |
| "<a href=\"https://colab.research.google.com/gist/wesslen/cb9f5782864e9eceb8ade832fa59798d/fireworks-text-to-sql-berka.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": { | |
| "id": "Pc3RwwE64pby" | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "%%capture\n", | |
| "!pip install --upgrade fireworks-ai" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "schema = \"\"\"\\\n", | |
| "Table 1 name: Account\n", | |
| "Column|\tDescription|\tNotes\n", | |
| "0|account_id|\tIdentification of the account|\n", | |
| "1|district_id|\tLocation of the branch|\n", | |
| "2|date|\tDate of the account's creation|\tIn the form: YYMMDD\n", | |
| "3|frequency|\tFrequency of statement issuance|\t\"POPLATEK MESICNE\" - Monthly Issuance\n", | |
| "\"POPLATEK TYDNE\" - Weekly Issuance\n", | |
| "\"POPLATEK PO OBRATU\" - Issuance After Transaction\n", | |
| "\\\n", | |
| "Table 2 name: Loan\n", | |
| "Column|Description|Notes\n", | |
| "0|disp_id|\tRecord Identifier|\n", | |
| "1|loan_id|\tRecord Identifier|\n", | |
| "2|account_id|\tAccount Identifier|\n", | |
| "3|date|\tDate when loan was granted|\tIn the form: YYMMDD\n", | |
| "4|amount|\tAmount of Loan|\n", | |
| "5|duration|\tDuration of Loan|\n", | |
| "6|payments|Monthly Payments on Loan|\n", | |
| "7|status\t|Status in paying off the loan|\t'A' stands for contract finished, no problems\n", | |
| "'B' stands for contract finished, loan not payed\n", | |
| "'C' stands for running contract, OK thus-far\n", | |
| "'D' stands for running contract, client in debt\n", | |
| "\"\"\"\n", | |
| "entity_description = \"\"\"Each account has both static characteristics (e.g. date of creation, address of the branch) given in relation \"account\" and dynamic characteristics (e.g. payments debited or credited, balances) given in relations \"permanent order\" and \"transaction\".\n", | |
| "Relation \"client\" describes characteristics of persons who can manipulate with the accounts.\n", | |
| "One client can have more accounts, more clients can manipulate with single account; clients and accounts are related together in relation \"disposition\".\n", | |
| "Relations \"loan\" and \"credit card\" describe some services which the bank offers to its clients;\n", | |
| "More than one credit card can be issued to an account,\n", | |
| "At most one loan can be granted for an account.\n", | |
| "Relation \"demographic data\" gives some publicly available information about the districts (e.g. the unemployment rate); additional information about the clients can be deduced from this.\"\"\"" | |
| ], | |
| "metadata": { | |
| "execution": { | |
| "iopub.status.busy": "2024-09-10T14:23:07.191228Z", | |
| "iopub.execute_input": "2024-09-10T14:23:07.191976Z", | |
| "iopub.status.idle": "2024-09-10T14:23:07.198359Z", | |
| "shell.execute_reply.started": "2024-09-10T14:23:07.191929Z", | |
| "shell.execute_reply": "2024-09-10T14:23:07.19732Z" | |
| }, | |
| "trusted": true, | |
| "id": "0fekWf5i0kNz" | |
| }, | |
| "execution_count": 2, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "template=f\"\"\"\n", | |
| "First, you will be presented with the metadata of the dataset which includes the schema and some examples of what values look like:\n", | |
| "<dataset_metadata>\n", | |
| "{schema}\n", | |
| "</dataset_metadata>\n", | |
| "<entity_relationsip_description>\n", | |
| "{entity_description}\n", | |
| "</entity_relationsip_description>\n", | |
| "\n", | |
| "\n", | |
| "Based on this information, generate 5 different tasks that a financial data analyst might want to perform on this dataset.\n", | |
| "For each task:\n", | |
| "1. Describe the task in a clear and concise manner.\n", | |
| "2. Explain why this task is important in the context of financial data analysis.\n", | |
| "3. Generate a SQL query that would accomplish the task using the given dataset.\n", | |
| "\"\"\"\n", | |
| "user = template\n", | |
| "system = f\"\"\"You are an AI assistant acting as a data analyst in the financial industry.\n", | |
| "Your task is to generate common queries that a financial data analyst might consider running on a given relational data,\n", | |
| "explain the importance of each task, and provide a corresponding SQL query.\n", | |
| "Write a sqlite query to answer the question. Follow instructions exactly. Do not print out instructions. Only output your response in JSON format. Do not include new line in the JSON, make sure it could be read directly as a JSON file.\n", | |
| "\"\"\"" | |
| ], | |
| "metadata": { | |
| "height": 30, | |
| "execution": { | |
| "iopub.status.busy": "2024-09-10T14:23:07.200445Z", | |
| "iopub.execute_input": "2024-09-10T14:23:07.200795Z", | |
| "iopub.status.idle": "2024-09-10T14:23:07.215916Z", | |
| "shell.execute_reply.started": "2024-09-10T14:23:07.200761Z", | |
| "shell.execute_reply": "2024-09-10T14:23:07.215139Z" | |
| }, | |
| "trusted": true, | |
| "id": "vTKcBJMH0kNz" | |
| }, | |
| "execution_count": 26, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "from fireworks.client import Fireworks\n", | |
| "\n", | |
| "client = Fireworks(api_key=API_KEY)\n", | |
| "all_results = []\n", | |
| "\n", | |
| "response = client.chat.completions.create(\n", | |
| " model=\"accounts/fireworks/models/qwen2p5-72b-instruct\",\n", | |
| " messages=[\n", | |
| " {\"role\": \"system\", \"content\": system},\n", | |
| " {\"role\": \"user\", \"content\": user}\n", | |
| " ],\n", | |
| ")\n", | |
| "\n", | |
| "# Append model name to response content\n", | |
| "response_content = response.choices[0].message.content" | |
| ], | |
| "metadata": { | |
| "id": "r2nEVFoxCnPH" | |
| }, | |
| "execution_count": 39, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "print(response_content)" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "IgtJGa5IC2C_", | |
| "outputId": "669b67c0-6e5e-4916-f726-9851ee75c8f2" | |
| }, | |
| "execution_count": 40, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": [ | |
| "{\"tasks\":[{\"task\":\"Identify the number of accounts created each month in the year 1993.\",\"importance\":\"This task helps in understanding the growth trend of the bank's customer base over time, which is crucial for strategic planning and resource allocation.\",\"query\":\"SELECT STRFTIME('%Y-%m', '1993'||SUBSTR(date, 3, 4), 'unixepoch') AS month, COUNT(account_id) AS num_accounts FROM Account WHERE SUBSTR(date, 1, 2) = '93' GROUP BY month ORDER BY month\"},{\"task\":\"List all accounts that have a loan with a status of 'D' (running contract, client in debt) in descending order of loan amount.\",\"importance\":\"This task is important for risk management and to identify accounts that may require additional monitoring or intervention to prevent further debt accumulation.\",\"query\":\"SELECT a.account_id, l.amount, l.status FROM Account a JOIN Loan l ON a.account_id = l.account_id WHERE l.status = 'D' ORDER BY l.amount DESC\"},{\"task\":\"Calculate the total amount of loans granted per district in the first quarter of 1994.\",\"importance\":\"This information can help in assessing lending practices across different regions and identifying areas with higher demand for loans, which can guide future marketing efforts and regional strategies.\",\"query\":\"SELECT a.district_id, SUM(l.amount) AS total_loans FROM Account a JOIN Loan l ON a.account_id = l.account_id WHERE l.date BETWEEN '940101' AND '940331' GROUP BY a.district_id ORDER BY total_loans DESC\"},{\"task\":\"Find the number of loans that were fully paid off without any issues ('A' status) and the number that were not paid off ('B' status) for each account.\",\"importance\":\"This analysis provides insights into the creditworthiness of accounts and helps in evaluating the effectiveness of the bank's lending policies and credit assessment processes.\",\"query\":\"SELECT a.account_id, SUM(CASE WHEN l.status = 'A' THEN 1 ELSE 0 END) AS paid_off, SUM(CASE WHEN l.status = 'B' THEN 1 ELSE 0 END) AS not_paid FROM Account a JOIN Loan l ON a.account_id = l.account_id GROUP BY a.account_id ORDER BY a.account_id\"},{\"task\":\"Determine the average loan duration and average monthly payment for accounts that have a weekly statement issuance frequency.\",\"importance\":\"Understanding the loan characteristics of accounts with different statement issuance frequencies can help in tailoring financial products and services to meet the specific needs of these customers.\",\"query\":\"SELECT AVG(l.duration) AS avg_duration, AVG(l.payments) AS avg_monthly_payment FROM Account a JOIN Loan l ON a.account_id = l.account_id WHERE a.frequency = 'POPLATEK TYDNE'\"}]}\n" | |
| ] | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "models = [\n", | |
| " \"accounts/fireworks/models/llama-v3p1-405b-instruct\",\n", | |
| " \"accounts/fireworks/models/llama-v3p1-70b-instruct\",\n", | |
| " \"accounts/fireworks/models/mixtral-8x22b-instruct\",\n", | |
| " # \"accounts/fireworks/models/starcoder-16b\", # needs to use completion task\n", | |
| " \"accounts/fireworks/models/qwen2p5-72b-instruct\",\n", | |
| "\n", | |
| " # \"accounts/fireworks/models/codegemma-7b\",\n", | |
| " # \"accounts/fireworks/models/code-llama-34b-instruct\",\n", | |
| " # \"accounts/fireworks/models/dolphin-2p6-mixtral-8x7b\",\n", | |
| " # \"accounts/fireworks/models/deepseek-coder-v2-instruct\",\n", | |
| " # \"accounts/fireworks/models/deepseek-coder-33b-instruct\",\n", | |
| " # \"accounts/fireworks/models/phind-code-llama-34b-v1\",\n", | |
| " # \"accounts/fireworks/models/starcoder2-15b\"\n", | |
| "]" | |
| ], | |
| "metadata": { | |
| "id": "Rh3sqJsJ75g9" | |
| }, | |
| "execution_count": 12, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "import json\n", | |
| "\n", | |
| "from google.colab import userdata\n", | |
| "API_KEY = userdata.get('FIREWORKS_API_KEY')\n", | |
| "\n", | |
| "from fireworks.client import Fireworks\n", | |
| "\n", | |
| "def run_models_and_save_to_jsonl(models, system_prompt, user_prompt, api_key, filename=\"results.jsonl\"):\n", | |
| " \"\"\"\n", | |
| " Loops through a list of models, sends a chat completion request,\n", | |
| " appends the model name to the response, and saves all results to a JSONL file.\n", | |
| "\n", | |
| " Args:\n", | |
| " models (list): A list of model names.\n", | |
| " system_prompt (str): The system prompt for the chat completion.\n", | |
| " user_prompt (str): The user prompt for the chat completion.\n", | |
| " api_key (str): Your Fireworks API key.\n", | |
| " filename (str, optional): The name of the JSONL file to save the results to.\n", | |
| " Defaults to \"results.jsonl\".\n", | |
| " \"\"\"\n", | |
| "\n", | |
| " from fireworks.client import Fireworks\n", | |
| "\n", | |
| " client = Fireworks(api_key=api_key)\n", | |
| " all_results = []\n", | |
| "\n", | |
| " for model_name in models:\n", | |
| " try:\n", | |
| " response = client.chat.completions.create(\n", | |
| " model=model_name,\n", | |
| " messages=[\n", | |
| " {\"role\": \"system\", \"content\": system_prompt},\n", | |
| " {\"role\": \"user\", \"content\": user_prompt}\n", | |
| " ],\n", | |
| " )\n", | |
| "\n", | |
| " # Append model name to response content\n", | |
| " response_content = response.choices[0].message.content\n", | |
| "\n", | |
| " data = json.loads(response_content)\n", | |
| "\n", | |
| " for ex in data:\n", | |
| " ex[\"model\"] = \"mixtral\"\n", | |
| " all_results.append(ex)\n", | |
| "\n", | |
| " # add in five second wait\n", | |
| "\n", | |
| " except Exception as e:\n", | |
| " print(f\"Error processing model {model_name}: {e}\")\n", | |
| " all_results.append(f\"Error processing model {model_name}: {e}\")\n", | |
| "\n", | |
| "# Save results to JSONL\n", | |
| " with open(filename, 'w', encoding='utf-8') as jsonlfile:\n", | |
| " for result in all_results:\n", | |
| " json.dump(result, jsonlfile) # Write each result as a JSON object\n", | |
| " jsonlfile.write('\\n') # Add a newline to separate JSON objects" | |
| ], | |
| "metadata": { | |
| "id": "i5Y9NDc3594-" | |
| }, | |
| "execution_count": 35, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "run_models_and_save_to_jsonl(models, system, user, API_KEY)" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "IN5cLqNr-38t", | |
| "outputId": "64c7236f-9c68-4501-c83b-848c76381982" | |
| }, | |
| "execution_count": 36, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": [ | |
| "Error processing model accounts/fireworks/models/llama-v3p1-405b-instruct: Expecting value: line 1 column 1 (char 0)\n", | |
| "Error processing model accounts/fireworks/models/llama-v3p1-70b-instruct: 'str' object does not support item assignment\n", | |
| "Error processing model accounts/fireworks/models/starcoder-16b: {'error': {'object': 'error', 'type': 'invalid_request_error', 'message': 'model is missing conversation_config, please use /completions API instead'}}\n", | |
| "Error processing model accounts/fireworks/models/qwen2p5-72b-instruct: 'str' object does not support item assignment\n" | |
| ] | |
| } | |
| ] | |
| } | |
| ] | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment