Last active
November 27, 2017 17:28
-
-
Save n01r/3e83919258768a22015c41a9c0bb0b2c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Memory requirement calculator for PIConGPU <a class=\"tocSkip\">" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Imports" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import h5py as h5\n", | |
| "import numpy as np\n", | |
| "import matplotlib as mpl\n", | |
| "from matplotlib import pyplot as plt\n", | |
| "from mpl_toolkits.mplot3d import Axes3D\n", | |
| "from matplotlib.colors import LogNorm\n", | |
| "%matplotlib inline\n", | |
| "import sys\n", | |
| "import os\n", | |
| "from scipy import constants as sc" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Override Matplotlib Defaults" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# OVERWRITE DEFAULT PLOTTING PARAMETERS\n", | |
| "params = {\n", | |
| " 'font.size' : 20,\n", | |
| " 'lines.linewidth' : 3,\n", | |
| " 'legend.fontsize' : 20,\n", | |
| " 'legend.frameon' : False,\n", | |
| " 'legend.numpoints': 1,\n", | |
| " 'xtick.labelsize' : 20,\n", | |
| " 'ytick.labelsize' : 20,\n", | |
| " 'figure.figsize': [12,8],\n", | |
| " 'axes.labelsize' : 20\n", | |
| "}\n", | |
| "mpl.rcParams.update(params)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Implementation" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Field Memory" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 43, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def mem_req_by_fields(Lx,Ly,Lz,FieldTMPSlots = 1):\n", | |
| " \"\"\" Memory reserved for fields on each GPU\n", | |
| " \n", | |
| " Returns:\n", | |
| " req_mem required memory {unit: bytes}\n", | |
| " \"\"\"\n", | |
| " # guard size in super cells in x, y, z\n", | |
| " guard_size_SC = np.array([1, 1, 1])\n", | |
| " # super cell size in cells in x, y, z\n", | |
| " SC_size = np.array([8,8,4])\n", | |
| " local_cells = (Lx + SC_size[0] * guard_size_SC[0]) \\\n", | |
| " * (Ly + SC_size[1] * guard_size_SC[1]) \\\n", | |
| " * (Lz + SC_size[2] * guard_size_SC[2])\n", | |
| " \n", | |
| " # size of a data entry in bytes\n", | |
| " data_size = np.float32().itemsize\n", | |
| " # number of fields: 3 * 3 = x,y,z for E,B,J\n", | |
| " num_fields = 3 * 3 + FieldTMPSlots\n", | |
| " req_mem = data_size * num_fields * local_cells\n", | |
| " return req_mem" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Particles" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 35, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def mem_req_by_particles(\n", | |
| " Lx,\n", | |
| " Ly,\n", | |
| " Lz,\n", | |
| " num_additional_attributes = 0,\n", | |
| " particles_per_cell = 2\n", | |
| "):\n", | |
| " \"\"\" Memory reserved for all particles of a species on a GPU.\n", | |
| " We currently neglect the constant species memory.\n", | |
| " \n", | |
| " Params:\n", | |
| " \n", | |
| " num_additional_attributes : number of additional attributes like e.g. `boundElectrons`\n", | |
| " \n", | |
| " Returns:\n", | |
| " req_mem required memory {unit: bytes} per GPU and species\n", | |
| " \"\"\"\n", | |
| " # memory required by the standard particle attributes\n", | |
| " standard_attribute_mem = np.array([\n", | |
| " 3 * 4, # momentum\n", | |
| " 3 * 4, # position\n", | |
| " 1 * 8, # multimask\n", | |
| " 1 * 8, # cell index in supercell {lcellId_t}\n", | |
| " 1 * 8 # weighting\n", | |
| " ])\n", | |
| " \n", | |
| " additional_mem = num_additional_attributes * 4 # we assume 4 bytes here - check if that's really the case\n", | |
| " \n", | |
| " local_cells = Lx * Ly * Lz\n", | |
| " \n", | |
| " req_mem = local_cells * (np.sum(standard_attribute_mem) + additional_mem) * particles_per_cell\n", | |
| " \n", | |
| " return req_mem\n", | |
| " " | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## RNG states" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 9, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def mem_req_by_rng(Lx,Ly,Lz):\n", | |
| " \"\"\" Memory reserved for the random number generator state on each GPU.\n", | |
| " The RNG we use is: MRG32ka\n", | |
| " \n", | |
| " Returns:\n", | |
| " req_mem required memory {unit: bytes} per GPU\n", | |
| " \"\"\"\n", | |
| " req_mem_per_cell = 6 * 8 # bytes\n", | |
| " local_cells = Lx * Ly * Lz\n", | |
| " req_mem = req_mem_per_cell * local_cells\n", | |
| " return req_mem" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Cu 30nm foil setup" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## *Spaghetti* case" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 44, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Memory requirement per GPU for fields: 3072.3515625 MB\n", | |
| "Memory requirement per GPU and species:\n", | |
| "e: 2657.578125 MB\n", | |
| "H: 5.83984375 MB\n", | |
| "C: 5.83984375 MB\n", | |
| "Cu: 99.27734375 MB\n", | |
| "Memory requirement per GPU for RNG states: 3018.75 MB\n", | |
| "Sum of required GPU memory: 8859.63671875 MB\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "Lx = 184\n", | |
| "Ly = 5600\n", | |
| "Lz = 64\n", | |
| "\n", | |
| "target_x = Lx\n", | |
| "target_y = 17\n", | |
| "target_z = Lz\n", | |
| "\n", | |
| "# field memory per GPU\n", | |
| "field_gpu = mem_req_by_fields(Lx, Ly, Lz, FieldTMPSlots=2)\n", | |
| "print(\"Memory requirement per GPU for fields: \",field_gpu / (1024 * 1024),\"MB\")\n", | |
| "# particle memory per GPU - only the target area contributes here\n", | |
| "e_gpu = mem_req_by_particles(target_x, target_y, target_z, \n", | |
| " num_additional_attributes=0, \n", | |
| " particles_per_cell=29 * 10)\n", | |
| "H_gpu = mem_req_by_particles(target_x, 1, target_z, \n", | |
| " num_additional_attributes=1, \n", | |
| " particles_per_cell=10)\n", | |
| "C_gpu = mem_req_by_particles(target_x, 1, target_z, \n", | |
| " num_additional_attributes=1, \n", | |
| " particles_per_cell=10)\n", | |
| "Cu_gpu = mem_req_by_particles(target_x, target_y, target_z, \n", | |
| " num_additional_attributes=1, \n", | |
| " particles_per_cell=10)\n", | |
| "print(\"Memory requirement per GPU and species:\")\n", | |
| "print(\"e: \", e_gpu / (1024 * 1024),\"MB\")\n", | |
| "print(\"H: \", H_gpu / (1024 * 1024),\"MB\")\n", | |
| "print(\"C: \", C_gpu / (1024 * 1024),\"MB\")\n", | |
| "print(\"Cu: \",Cu_gpu / (1024 * 1024),\"MB\")\n", | |
| "# RNG memory per GPU\n", | |
| "rng_gpu = mem_req_by_rng(Lx, Ly, Lz)\n", | |
| "print(\"Memory requirement per GPU for RNG states: \",rng_gpu / (1024 * 1024),\"MB\")\n", | |
| "\n", | |
| "mem_sum = field_gpu + e_gpu + H_gpu + C_gpu + Cu_gpu + rng_gpu\n", | |
| "print(\"Sum of required GPU memory: \",mem_sum / (1024 * 1024),\"MB\")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "In the *Spaghetti* case we waste a lot of memory per GPU on the exchange guards." | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## `20 x 3 x 40` case" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 41, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Memory requirement per GPU for fields: 2903.29101562 MB\n", | |
| "Memory requirement per GPU and species:\n", | |
| "e: 7857.1875 MB\n", | |
| "H: 17.265625 MB\n", | |
| "C: 17.265625 MB\n", | |
| "Cu: 293.515625 MB\n", | |
| "Memory requirement per GPU for RNG states: 2970.75 MB\n", | |
| "Sum of required GPU memory: 14059.2753906 MB\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "Lx = 272\n", | |
| "Ly = 1864\n", | |
| "Lz = 128\n", | |
| "\n", | |
| "target_x = Lx\n", | |
| "target_y = 17\n", | |
| "target_z = Lz\n", | |
| "\n", | |
| "# field memory per GPU\n", | |
| "field_gpu = mem_req_by_fields(Lx, Ly, Lz, FieldTMPSlots=2)\n", | |
| "print(\"Memory requirement per GPU for fields: \",field_gpu / (1024 * 1024),\"MB\")\n", | |
| "# particle memory per GPU - only the target area contributes here\n", | |
| "e_gpu = mem_req_by_particles(target_x, target_y, target_z, \n", | |
| " num_additional_attributes=0, \n", | |
| " particles_per_cell=29 * 10)\n", | |
| "H_gpu = mem_req_by_particles(target_x, 1, target_z, \n", | |
| " num_additional_attributes=1, \n", | |
| " particles_per_cell=10)\n", | |
| "C_gpu = mem_req_by_particles(target_x, 1, target_z, \n", | |
| " num_additional_attributes=1, \n", | |
| " particles_per_cell=10)\n", | |
| "Cu_gpu = mem_req_by_particles(target_x, target_y, target_z, \n", | |
| " num_additional_attributes=1, \n", | |
| " particles_per_cell=10)\n", | |
| "print(\"Memory requirement per GPU and species:\")\n", | |
| "print(\"e: \", e_gpu / (1024 * 1024),\"MB\")\n", | |
| "print(\"H: \", H_gpu / (1024 * 1024),\"MB\")\n", | |
| "print(\"C: \", C_gpu / (1024 * 1024),\"MB\")\n", | |
| "print(\"Cu: \",Cu_gpu / (1024 * 1024),\"MB\")\n", | |
| "rng_gpu = mem_req_by_rng(Lx, Ly, Lz)\n", | |
| "print(\"Memory requirement per GPU for RNG states: \",rng_gpu / (1024 * 1024),\"MB\")\n", | |
| "\n", | |
| "mem_sum = field_gpu + e_gpu + H_gpu + C_gpu + Cu_gpu + rng_gpu\n", | |
| "print(\"Sum of required GPU memory: \",mem_sum / (1024 * 1024),\"MB\")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.4.3" | |
| }, | |
| "toc": { | |
| "nav_menu": {}, | |
| "number_sections": true, | |
| "sideBar": true, | |
| "skip_h1_title": false, | |
| "toc_cell": false, | |
| "toc_position": {}, | |
| "toc_section_display": "block", | |
| "toc_window_display": true | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment