Last active
December 27, 2024 21:49
-
-
Save calilisantos/2fa0f64f650c41b27233cd898d0fe597 to your computer and use it in GitHub Desktop.
Snippet for get size info of an azure blob storage
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "application/vnd.databricks.v1+cell": { | |
| "cellMetadata": {}, | |
| "inputWidgets": {}, | |
| "nuid": "01699b13-1e4c-4058-b860-916ba3656dc8", | |
| "showTitle": false, | |
| "tableResultSettingsMap": {}, | |
| "title": "" | |
| } | |
| }, | |
| "source": [ | |
| "## Dependencies" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 0, | |
| "metadata": { | |
| "application/vnd.databricks.v1+cell": { | |
| "cellMetadata": {}, | |
| "inputWidgets": {}, | |
| "nuid": "80af41f1-2069-4411-bd69-6c3992cb62e1", | |
| "showTitle": false, | |
| "tableResultSettingsMap": {}, | |
| "title": "" | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "%sh\n", | |
| "pip install azure-storage-blob azure-identity aiohttp" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "application/vnd.databricks.v1+cell": { | |
| "cellMetadata": {}, | |
| "inputWidgets": {}, | |
| "nuid": "ec176ee7-bd02-4cfb-a6ef-cf19ea83950d", | |
| "showTitle": false, | |
| "tableResultSettingsMap": {}, | |
| "title": "" | |
| } | |
| }, | |
| "source": [ | |
| "## Setup" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 0, | |
| "metadata": { | |
| "application/vnd.databricks.v1+cell": { | |
| "cellMetadata": {}, | |
| "inputWidgets": {}, | |
| "nuid": "f28e52e8-328f-4d5c-883a-549db0bf0c79", | |
| "showTitle": false, | |
| "tableResultSettingsMap": {}, | |
| "title": "" | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import asyncio\n", | |
| "from azure.identity.aio import ClientSecretCredential\n", | |
| "from azure.storage.blob.aio import BlobServiceClient, ContainerClient\n", | |
| "\n", | |
| "client_credential = ClientSecretCredential(\n", | |
| " tenant_id=\"seu_tenant_id\",\n", | |
| " client_id=\"seu_client_id\",\n", | |
| " client_secret=\"seu_client_secret\"\n", | |
| ")\n", | |
| "storage = \"seu_storage\"\n", | |
| "storage_service = f'https://{storage}.blob.core.windows.net/'\n", | |
| "\n", | |
| "blob_client = BlobServiceClient(storage_service, credential=client_credential)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "application/vnd.databricks.v1+cell": { | |
| "cellMetadata": {}, | |
| "inputWidgets": {}, | |
| "nuid": "83236a49-9ca5-4b98-8e28-f3e9af451db1", | |
| "showTitle": false, | |
| "tableResultSettingsMap": {}, | |
| "title": "" | |
| } | |
| }, | |
| "source": [ | |
| "## list_containers function\n", | |
| "* ####Getting all containers name from a storage" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 0, | |
| "metadata": { | |
| "application/vnd.databricks.v1+cell": { | |
| "cellMetadata": {}, | |
| "inputWidgets": {}, | |
| "nuid": "4ab51738-db3f-4890-acf9-be9ab22139f1", | |
| "showTitle": false, | |
| "tableResultSettingsMap": {}, | |
| "title": "" | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "async def list_containers(blob_service_client: BlobServiceClient):\n", | |
| " containers = []\n", | |
| " async for container in blob_service_client.list_containers():\n", | |
| " containers.append(container.name)\n", | |
| "\n", | |
| " yield containers\n", | |
| "\n", | |
| "containers_info = [container async for container in list_containers(blob_service_client=blob_client)]\n", | |
| "print(containers_info[0])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "application/vnd.databricks.v1+cell": { | |
| "cellMetadata": {}, | |
| "inputWidgets": {}, | |
| "nuid": "03ea5a21-c748-4c47-922a-5e8474fe3ccc", | |
| "showTitle": false, | |
| "tableResultSettingsMap": {}, | |
| "title": "" | |
| } | |
| }, | |
| "source": [ | |
| "## Container client instance" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 0, | |
| "metadata": { | |
| "application/vnd.databricks.v1+cell": { | |
| "cellMetadata": {}, | |
| "inputWidgets": {}, | |
| "nuid": "cf31db9e-f0d8-4279-bb0a-7215fd3062a7", | |
| "showTitle": false, | |
| "tableResultSettingsMap": {}, | |
| "title": "" | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "container_client = ContainerClient(\n", | |
| " account_url=storage_service,\n", | |
| " credential=client_credential,\n", | |
| " container_name=containers_info[0][-1]\n", | |
| ")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "application/vnd.databricks.v1+cell": { | |
| "cellMetadata": {}, | |
| "inputWidgets": {}, | |
| "nuid": "8ae24562-573f-4e17-9928-87271654229d", | |
| "showTitle": false, | |
| "tableResultSettingsMap": {}, | |
| "title": "" | |
| } | |
| }, | |
| "source": [ | |
| "## list_from_blobs function\n", | |
| "* #### Getting size from a specific blob and container client" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 0, | |
| "metadata": { | |
| "application/vnd.databricks.v1+cell": { | |
| "cellMetadata": {}, | |
| "inputWidgets": {}, | |
| "nuid": "3831682a-44c1-491d-be98-f057b8954f70", | |
| "showTitle": false, | |
| "tableResultSettingsMap": {}, | |
| "title": "" | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "async def list_from_blobs(container_client, blob_path):\n", | |
| " size = 0\n", | |
| " async for blob in container_client.list_blobs(name_starts_with=blob_path):\n", | |
| " size += int(blob.size)\n", | |
| " \n", | |
| " yield {'blob_name': blob_path, 'size': size}\n", | |
| "\n", | |
| "blob_dir = \"seu_blob\"\n", | |
| "blob_info = [blob async for blob in list_from_blobs(container_client=container_client, blob_path=blob_dir)]\n", | |
| "blob_info" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "application/vnd.databricks.v1+cell": { | |
| "cellMetadata": {}, | |
| "inputWidgets": {}, | |
| "nuid": "c5e32507-978b-47d6-80e1-14256aaaab6d", | |
| "showTitle": false, | |
| "tableResultSettingsMap": {}, | |
| "title": "" | |
| } | |
| }, | |
| "source": [ | |
| "## list_from_container function\n", | |
| "* #### Getting size for all blobs from a container client" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 0, | |
| "metadata": { | |
| "application/vnd.databricks.v1+cell": { | |
| "cellMetadata": {}, | |
| "inputWidgets": {}, | |
| "nuid": "ed4906be-6408-4866-ae3a-0d06f03720b6", | |
| "showTitle": false, | |
| "tableResultSettingsMap": {}, | |
| "title": "" | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "async def list_from_container(container_client):\n", | |
| " blobs_info = []\n", | |
| " async for blob in container_client.list_blobs():\n", | |
| " blobs_info.append({'blob_name': str(blob.name), 'size': int(blob.size)})\n", | |
| " \n", | |
| " yield blobs_info\n", | |
| "\n", | |
| "blobs_info = [blob async for blob in list_from_container(container_client=container_client)]\n", | |
| "blobs_info[0]" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "application/vnd.databricks.v1+notebook": { | |
| "computePreferences": null, | |
| "dashboards": [], | |
| "environmentMetadata": { | |
| "base_environment": "", | |
| "client": "1" | |
| }, | |
| "language": "python", | |
| "notebookMetadata": { | |
| "pythonIndentUnit": 4 | |
| }, | |
| "notebookName": "azure_blobs_info", | |
| "widgets": {} | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 0 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment