Skip to content

Instantly share code, notes, and snippets.

@xrz000
Created June 19, 2025 06:31
Show Gist options
  • Select an option

  • Save xrz000/e1b37c85ea60c725b82aca379a40aa0b to your computer and use it in GitHub Desktop.

Select an option

Save xrz000/e1b37c85ea60c725b82aca379a40aa0b to your computer and use it in GitHub Desktop.
Testing segy file crossline loading time of some python libraries
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "0f604580-f351-49ec-9ffe-bc89f6463b4f",
"metadata": {},
"outputs": [],
"source": [
"import time\n",
"import tqdm\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "3ca3c792-2d33-465c-b608-06121734158e",
"metadata": {},
"outputs": [],
"source": [
"def test(slice_func=None, lines=()):\n",
" runtimes = []\n",
" trials = len(lines)\n",
" for trial in tqdm.trange(trials):\n",
" start_time = time.time()\n",
" lineidx = lines[trial]\n",
" slice_func(lineidx)\n",
" time_passed = time.time() - start_time\n",
" runtimes.append(time_passed)\n",
" total_runtime = sum(runtimes)\n",
" mean_runtime = np.mean(runtimes)\n",
" std_runtime = np.std(runtimes)\n",
" print(f\"{trials} trials, {total_runtime}s passed, avg {np.mean(runtimes)}s\")\n",
" return runtimes"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "818ba189-5fbf-4600-8c32-70ec0e329f05",
"metadata": {},
"outputs": [],
"source": [
"# Test SEGY file: 13.2GB, shape: [795, 2869, 1500]\n",
"data_path = \"test.sgy\"\n",
"n_ilines = 2256 - 1461\n",
"n_xlines = 3179 - 310\n",
"np.random.seed(0)\n",
"lines = sorted(np.random.choice(np.arange(310, 3179), 100, replace=False))\n",
"results = {}"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "88d2d2a0-6006-4750-b73e-bb50f5f88f6d",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|████████████████████████████████████████████████████████████████████| 100/100 [00:14<00:00, 6.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"100 trials, 14.81632137298584s passed, avg 0.1481632137298584s\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"import segyio\n",
"\n",
"segyio_loader = segyio.open(data_path, \"r\")\n",
"\n",
"def sgyio_func(j):\n",
" return segyio_loader.xline[j]\n",
" \n",
"times = test(sgyio_func, lines=lines)\n",
"results[\"segyio\"] = times"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "6ce220fe-6ff7-4d69-b5c3-18f094688d37",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|███████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 108.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"100 trials, 0.9151480197906494s passed, avg 0.009151480197906493s\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"times = test(sgyio_func, lines=lines)\n",
"results[\"segyio-cache\"] = times"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "b6ad2ebb-a381-452a-b0b7-e27739c438a9",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|████████████████████████████████████████████████████████████████████| 100/100 [00:30<00:00, 3.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"100 trials, 30.80193328857422s passed, avg 0.3080193328857422s\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"from mdio import MDIOReader, segy_to_mdio\n",
"\n",
"mdio_path = data_path[:data_path.rfind(\".\")] + \".mdio\"\n",
"mdio_loader = MDIOReader(mdio_path)\n",
"\n",
"def mdio_func(j):\n",
" xline_index = mdio_loader.coord_to_index(j, dimensions=\"crossline\").item()\n",
" return mdio_loader[:, xline_index, :]\n",
"\n",
"times = test(mdio_func, lines=lines)\n",
"results[\"mdio\"] = times"
]
},
{
"cell_type": "markdown",
"id": "b02060e3-9696-474f-88b1-c87d57f45e3f",
"metadata": {},
"source": [
"Clear cache before running segfast test because both segfast and segyio read original SEGY file.\n",
"\n",
"* windows: install [RAMMap](https://learn.microsoft.com/en-us/sysinternals/downloads/rammap) and run \"Empty->Empty Standby List\"\n",
"* linux: echo 3 > /proc/sys/vm/drop_caches"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "135520f0-28fe-451a-9c11-45fa967eddb4",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|████████████████████████████████████████████████████████████████████| 100/100 [00:15<00:00, 6.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"100 trials, 15.280656099319458s passed, avg 0.1528065609931946s\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"import segfast\n",
"\n",
"segfast_loader = segfast.open(data_path, engine='memmap')\n",
"\n",
"def segfast_func(j):\n",
" j = j - 310\n",
" index = np.arange(j, j + n_ilines * n_xlines, n_xlines)\n",
" return segfast_loader.load_traces(index)\n",
"\n",
"times = test(segfast_func, lines=lines)\n",
"results[\"segfast\"] = times"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "aa9d6fe3-2328-452a-b25a-c21d161e6ebb",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|███████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 260.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"100 trials, 0.38136839866638184s passed, avg 0.0038136839866638184s\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"times = test(segfast_func, lines=lines)\n",
"results[\"segfast-cache\"] = times"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "b5be96fd-09c2-463c-b359-aaaa9cd28bb1",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "daa1e2b1e94b43aea5bdbafe2559b02a",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Processing Chunks: 0%| | 0.00/23.0 [00:00<?, ? trace-chunks/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "34f7d8522f834d2fb7231c26ecd8b4d9",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Reading header: 0%| | 0.00/100k [00:00<?, ? traces/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "682b09a3a6d74b87bafa73cf7d0d7454",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Reading header: 0%| | 0.00/100k [00:00<?, ? traces/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "55db91c4ac3042319c97402109b3ae9e",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Reading header: 0%| | 0.00/100k [00:00<?, ? traces/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "5f4cefa0cb7a45b78190d0a3be18b224",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Reading header: 0%| | 0.00/100k [00:00<?, ? traces/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "06786eb54e544c1e83f4a07e85a2d102",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Reading header: 0%| | 0.00/100k [00:00<?, ? traces/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "04a8773e114e4cd5bcfb072165d54fa3",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Reading header: 0%| | 0.00/100k [00:00<?, ? traces/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "79c1691703644f5ab8081774cfaf9f43",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Reading header: 0%| | 0.00/100k [00:00<?, ? traces/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "1a5f67f73bd548c29c6cea9d046849fe",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Reading header: 0%| | 0.00/100k [00:00<?, ? traces/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "80af2c19f916434a8d2b161238b25a58",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Reading header: 0%| | 0.00/100k [00:00<?, ? traces/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "c0b62497911e4f79a1cd326d37ed8343",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Reading header: 0%| | 0.00/100k [00:00<?, ? traces/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "da93495322d7453fb57f4aaabfc5a183",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Reading header: 0%| | 0.00/100k [00:00<?, ? traces/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "6210d6ed5f0a48bcb9eee02287244294",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Reading header: 0%| | 0.00/100k [00:00<?, ? traces/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "09590a3eee46416e8f48b715fa000be4",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Reading header: 0%| | 0.00/100k [00:00<?, ? traces/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "2e6cc6f8fa59498fb3761b32d36830de",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Reading header: 0%| | 0.00/100k [00:00<?, ? traces/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "07086566c7ee4c20965c7d4ae313e603",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Reading header: 0%| | 0.00/100k [00:00<?, ? traces/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "0c08aff64d7c4d5f8cc0a9d021d6138c",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Reading header: 0%| | 0.00/100k [00:00<?, ? traces/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "6fb6a48b19634ce1a114c00ee5e991ed",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Reading header: 0%| | 0.00/100k [00:00<?, ? traces/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "2367b3c9ef3442dc80500e7c16b6527f",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Reading header: 0%| | 0.00/100k [00:00<?, ? traces/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "57e11794213743258b3f664623b37dc8",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Reading header: 0%| | 0.00/100k [00:00<?, ? traces/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "671676800aa14fadb6f881b9b399f655",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Reading header: 0%| | 0.00/100k [00:00<?, ? traces/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "4b17b6f4ffc54c3d89c11a6729f204a8",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Reading header: 0%| | 0.00/100k [00:00<?, ? traces/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "6eb0ab4a6e7140a089f692985d27c7f9",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Reading header: 0%| | 0.00/100k [00:00<?, ? traces/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "8d7822ba76f34bf1a0992c8ea76a2281",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Reading header: 0%| | 0.00/80.9k [00:00<?, ? traces/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|████████████████████████████████████████████████████████████████████| 100/100 [00:05<00:00, 18.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"100 trials, 5.4560887813568115s passed, avg 0.05456088781356812s\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"import xarray as xr\n",
"from segysak.progress import Progress\n",
"\n",
"segysak_loader = xr.open_dataset(data_path, dim_byte_fields={\"iline\": 9, \"xline\": 21})\n",
"\n",
"def segysak_func(j):\n",
" return segysak_loader.sel(xline=j).to_dataarray().to_numpy()[0, :, :]\n",
"\n",
"times = test(segysak_func, lines=lines)\n",
"results[\"segysak\"] = times"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "d721326a-e024-4854-b4a1-5fcc1c2a119c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Text(0, 0, '0.148163'),\n",
" Text(0, 0, '0.00915148'),\n",
" Text(0, 0, '0.308019'),\n",
" Text(0, 0, '0.152807'),\n",
" Text(0, 0, '0.00381368'),\n",
" Text(0, 0, '0.0545609')]"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 819.2x614.4 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import seaborn as sns\n",
"from functools import reduce\n",
"sns.set_theme(style=\"whitegrid\")\n",
"sns.set_theme(rc={\"figure.dpi\": 128})\n",
"\n",
"values = tuple([y for y in results.values()])\n",
"y = reduce(lambda a, b: a + b, values)\n",
"keys = list(results.keys())\n",
"keys = [[a] * len(b) for a, b in zip(keys, values)]\n",
"x = reduce(lambda a, b: a + b, keys)\n",
"\n",
"g = sns.barplot(x=x, y=y, log_scale=None)\n",
"g.bar_label(g.containers[0], fontsize=10)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment