Skip to content

Instantly share code, notes, and snippets.

@robkorv
Created January 6, 2023 10:56
Show Gist options
  • Select an option

  • Save robkorv/9aad865819d6acd71aede9213742fd4a to your computer and use it in GitHub Desktop.

Select an option

Save robkorv/9aad865819d6acd71aede9213742fd4a to your computer and use it in GitHub Desktop.
split large excel into multiple files with python's openpyxl
#!/usr/bin/env python3
import math
import pathlib
import openpyxl
file = pathlib.Path("filepath")
print(f"{file.name = }")
file_suffix = file.suffix
print(f"{file_suffix = }")
file_stem = file.stem
print(f"{file_stem = }")
file_parent = file.parent
print(f"{file_parent = }")
rows = 1000
print(f"{rows = }")
header_row = 4
print(f"{header_row = }")
data_start_row = 5
print(f"{data_start_row = }")
workbook = openpyxl.load_workbook(filename=file)
worksheet = workbook.active
[(header := row) for row in worksheet.iter_rows(4, 4, values_only=True)]
print(f"{header = }")
total_rows = worksheet.max_row - data_start_row
print(f"{total_rows = }")
total_files = math.ceil(total_rows / rows)
print(f"{total_files = }")
new_workbook = None
last_i = None
for i, row in enumerate(worksheet.iter_rows(data_start_row, values_only=True)):
if not i % rows:
if new_workbook:
file_number = i // rows
file_name = f"{file_stem}-{file_number}-{last_i + 1}-{i}-{file_suffix}"
print(f"{file_name = }")
file_path = file_parent.joinpath("split", file_name)
new_workbook.save(str(file_path))
new_workbook = openpyxl.Workbook()
new_worksheet = new_workbook.active
new_worksheet.append(header)
last_i = i
new_worksheet.append(row)
if new_workbook:
file_number = i // rows
file_name = f"{file_stem}-{file_number}-{last_i + 1}-{i}-{file_suffix}"
print(f"{file_name = }")
file_path = file_parent.joinpath("split", file_name)
new_workbook.save(str(file_path))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment