Skip to content

Instantly share code, notes, and snippets.

@n01r
Last active November 27, 2017 17:28
Show Gist options
  • Select an option

  • Save n01r/3e83919258768a22015c41a9c0bb0b2c to your computer and use it in GitHub Desktop.

Select an option

Save n01r/3e83919258768a22015c41a9c0bb0b2c to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Memory requirement calculator for PIConGPU <a class=\"tocSkip\">"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Imports"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import h5py as h5\n",
"import numpy as np\n",
"import matplotlib as mpl\n",
"from matplotlib import pyplot as plt\n",
"from mpl_toolkits.mplot3d import Axes3D\n",
"from matplotlib.colors import LogNorm\n",
"%matplotlib inline\n",
"import sys\n",
"import os\n",
"from scipy import constants as sc"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Override Matplotlib Defaults"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# OVERWRITE DEFAULT PLOTTING PARAMETERS\n",
"params = {\n",
" 'font.size' : 20,\n",
" 'lines.linewidth' : 3,\n",
" 'legend.fontsize' : 20,\n",
" 'legend.frameon' : False,\n",
" 'legend.numpoints': 1,\n",
" 'xtick.labelsize' : 20,\n",
" 'ytick.labelsize' : 20,\n",
" 'figure.figsize': [12,8],\n",
" 'axes.labelsize' : 20\n",
"}\n",
"mpl.rcParams.update(params)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Implementation"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Field Memory"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [],
"source": [
"def mem_req_by_fields(Lx,Ly,Lz,FieldTMPSlots = 1):\n",
" \"\"\" Memory reserved for fields on each GPU\n",
" \n",
" Returns:\n",
" req_mem required memory {unit: bytes}\n",
" \"\"\"\n",
" # guard size in super cells in x, y, z\n",
" guard_size_SC = np.array([1, 1, 1])\n",
" # super cell size in cells in x, y, z\n",
" SC_size = np.array([8,8,4])\n",
" local_cells = (Lx + SC_size[0] * guard_size_SC[0]) \\\n",
" * (Ly + SC_size[1] * guard_size_SC[1]) \\\n",
" * (Lz + SC_size[2] * guard_size_SC[2])\n",
" \n",
" # size of a data entry in bytes\n",
" data_size = np.float32().itemsize\n",
" # number of fields: 3 * 3 = x,y,z for E,B,J\n",
" num_fields = 3 * 3 + FieldTMPSlots\n",
" req_mem = data_size * num_fields * local_cells\n",
" return req_mem"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Particles"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
"def mem_req_by_particles(\n",
" Lx,\n",
" Ly,\n",
" Lz,\n",
" num_additional_attributes = 0,\n",
" particles_per_cell = 2\n",
"):\n",
" \"\"\" Memory reserved for all particles of a species on a GPU.\n",
" We currently neglect the constant species memory.\n",
" \n",
" Params:\n",
" \n",
" num_additional_attributes : number of additional attributes like e.g. `boundElectrons`\n",
" \n",
" Returns:\n",
" req_mem required memory {unit: bytes} per GPU and species\n",
" \"\"\"\n",
" # memory required by the standard particle attributes\n",
" standard_attribute_mem = np.array([\n",
" 3 * 4, # momentum\n",
" 3 * 4, # position\n",
" 1 * 8, # multimask\n",
" 1 * 8, # cell index in supercell {lcellId_t}\n",
" 1 * 8 # weighting\n",
" ])\n",
" \n",
" additional_mem = num_additional_attributes * 4 # we assume 4 bytes here - check if that's really the case\n",
" \n",
" local_cells = Lx * Ly * Lz\n",
" \n",
" req_mem = local_cells * (np.sum(standard_attribute_mem) + additional_mem) * particles_per_cell\n",
" \n",
" return req_mem\n",
" "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## RNG states"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"def mem_req_by_rng(Lx,Ly,Lz):\n",
" \"\"\" Memory reserved for the random number generator state on each GPU.\n",
" The RNG we use is: MRG32ka\n",
" \n",
" Returns:\n",
" req_mem required memory {unit: bytes} per GPU\n",
" \"\"\"\n",
" req_mem_per_cell = 6 * 8 # bytes\n",
" local_cells = Lx * Ly * Lz\n",
" req_mem = req_mem_per_cell * local_cells\n",
" return req_mem"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Cu 30nm foil setup"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## *Spaghetti* case"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Memory requirement per GPU for fields: 3072.3515625 MB\n",
"Memory requirement per GPU and species:\n",
"e: 2657.578125 MB\n",
"H: 5.83984375 MB\n",
"C: 5.83984375 MB\n",
"Cu: 99.27734375 MB\n",
"Memory requirement per GPU for RNG states: 3018.75 MB\n",
"Sum of required GPU memory: 8859.63671875 MB\n"
]
}
],
"source": [
"Lx = 184\n",
"Ly = 5600\n",
"Lz = 64\n",
"\n",
"target_x = Lx\n",
"target_y = 17\n",
"target_z = Lz\n",
"\n",
"# field memory per GPU\n",
"field_gpu = mem_req_by_fields(Lx, Ly, Lz, FieldTMPSlots=2)\n",
"print(\"Memory requirement per GPU for fields: \",field_gpu / (1024 * 1024),\"MB\")\n",
"# particle memory per GPU - only the target area contributes here\n",
"e_gpu = mem_req_by_particles(target_x, target_y, target_z, \n",
" num_additional_attributes=0, \n",
" particles_per_cell=29 * 10)\n",
"H_gpu = mem_req_by_particles(target_x, 1, target_z, \n",
" num_additional_attributes=1, \n",
" particles_per_cell=10)\n",
"C_gpu = mem_req_by_particles(target_x, 1, target_z, \n",
" num_additional_attributes=1, \n",
" particles_per_cell=10)\n",
"Cu_gpu = mem_req_by_particles(target_x, target_y, target_z, \n",
" num_additional_attributes=1, \n",
" particles_per_cell=10)\n",
"print(\"Memory requirement per GPU and species:\")\n",
"print(\"e: \", e_gpu / (1024 * 1024),\"MB\")\n",
"print(\"H: \", H_gpu / (1024 * 1024),\"MB\")\n",
"print(\"C: \", C_gpu / (1024 * 1024),\"MB\")\n",
"print(\"Cu: \",Cu_gpu / (1024 * 1024),\"MB\")\n",
"# RNG memory per GPU\n",
"rng_gpu = mem_req_by_rng(Lx, Ly, Lz)\n",
"print(\"Memory requirement per GPU for RNG states: \",rng_gpu / (1024 * 1024),\"MB\")\n",
"\n",
"mem_sum = field_gpu + e_gpu + H_gpu + C_gpu + Cu_gpu + rng_gpu\n",
"print(\"Sum of required GPU memory: \",mem_sum / (1024 * 1024),\"MB\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"In the *Spaghetti* case we waste a lot of memory per GPU on the exchange guards."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## `20 x 3 x 40` case"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Memory requirement per GPU for fields: 2903.29101562 MB\n",
"Memory requirement per GPU and species:\n",
"e: 7857.1875 MB\n",
"H: 17.265625 MB\n",
"C: 17.265625 MB\n",
"Cu: 293.515625 MB\n",
"Memory requirement per GPU for RNG states: 2970.75 MB\n",
"Sum of required GPU memory: 14059.2753906 MB\n"
]
}
],
"source": [
"Lx = 272\n",
"Ly = 1864\n",
"Lz = 128\n",
"\n",
"target_x = Lx\n",
"target_y = 17\n",
"target_z = Lz\n",
"\n",
"# field memory per GPU\n",
"field_gpu = mem_req_by_fields(Lx, Ly, Lz, FieldTMPSlots=2)\n",
"print(\"Memory requirement per GPU for fields: \",field_gpu / (1024 * 1024),\"MB\")\n",
"# particle memory per GPU - only the target area contributes here\n",
"e_gpu = mem_req_by_particles(target_x, target_y, target_z, \n",
" num_additional_attributes=0, \n",
" particles_per_cell=29 * 10)\n",
"H_gpu = mem_req_by_particles(target_x, 1, target_z, \n",
" num_additional_attributes=1, \n",
" particles_per_cell=10)\n",
"C_gpu = mem_req_by_particles(target_x, 1, target_z, \n",
" num_additional_attributes=1, \n",
" particles_per_cell=10)\n",
"Cu_gpu = mem_req_by_particles(target_x, target_y, target_z, \n",
" num_additional_attributes=1, \n",
" particles_per_cell=10)\n",
"print(\"Memory requirement per GPU and species:\")\n",
"print(\"e: \", e_gpu / (1024 * 1024),\"MB\")\n",
"print(\"H: \", H_gpu / (1024 * 1024),\"MB\")\n",
"print(\"C: \", C_gpu / (1024 * 1024),\"MB\")\n",
"print(\"Cu: \",Cu_gpu / (1024 * 1024),\"MB\")\n",
"rng_gpu = mem_req_by_rng(Lx, Ly, Lz)\n",
"print(\"Memory requirement per GPU for RNG states: \",rng_gpu / (1024 * 1024),\"MB\")\n",
"\n",
"mem_sum = field_gpu + e_gpu + H_gpu + C_gpu + Cu_gpu + rng_gpu\n",
"print(\"Sum of required GPU memory: \",mem_sum / (1024 * 1024),\"MB\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.4.3"
},
"toc": {
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"toc_cell": false,
"toc_position": {},
"toc_section_display": "block",
"toc_window_display": true
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment