Skip to content

Instantly share code, notes, and snippets.

@beatzxbt
Created September 24, 2024 11:30
Show Gist options
  • Select an option

  • Save beatzxbt/d61a0ab0eae9d15deae6ca25392cb19d to your computer and use it in GitHub Desktop.

Select an option

Save beatzxbt/d61a0ab0eae9d15deae6ca25392cb19d to your computer and use it in GitHub Desktop.
high performance str -> float converter
import numpy as np
cimport numpy as np
from libc.stdlib cimport strtod
from cpython.unicode cimport PyUnicode_AsUTF8
from cython cimport boundscheck, wraparound
@boundscheck(False)
@wraparound(False)
cdef np.ndarray[np.float64_t, ndim=2] parse_list_strings_to_floats(list[list[str]] data):
"""
Parses a list of lists of strings into a NumPy array of floats.
Parameters:
data (list of lists of strings): Input data, where each inner list contains strings.
Returns
-------
np.ndarray : NumPy array of shape (n, m) with dtype float64.
"""
cdef int n = len(data)
if n == 0:
return np.empty((0, 0), dtype=np.float64)
cdef list row
cdef int m = len(data[0])
cdef np.ndarray[np.float64_t, ndim=2] result = np.empty((n, m), dtype=np.float64)
cdef int i, j
cdef double value
cdef str item
cdef const char* s
cdef char* endptr
for i in range(n):
row = data[i]
if len(row) != m:
raise ValueError("All inner lists must have the same length.")
for j in range(m):
item = row[j]
s = PyUnicode_AsUTF8(item)
value = strtod(s, &endptr)
if endptr == s:
raise ValueError(f"Invalid float value: '{item}' at position ({i}, {j}).")
result[i, j] = value
return result
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment