-
-
Save tylov/8bfb5984864602ad52614f8ddea3544c to your computer and use it in GitHub Desktop.
| /* | |
| MIT License | |
| * | |
| * Copyright (c) 2025 Tyge Løvset | |
| * | |
| * Permission is hereby granted, free of charge, to any person obtaining a copy | |
| * of this software and associated documentation files (the "Software"), to deal | |
| * in the Software without restriction, including without limitation the rights | |
| * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
| * copies of the Software, and to permit persons to whom the Software is | |
| * furnished to do so, subject to the following conditions: | |
| * | |
| * The above copyright notice and this permission notice shall be included in all | |
| * copies or substantial portions of the Software. | |
| * | |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
| * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
| * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
| * SOFTWARE. | |
| */ | |
| /* | |
| This is a single header, C implementation of python numpy multidimensional array slicing. | |
| It supports both row- and column-major layout, transposing of md spans, and flat iteration. | |
| cspan.h is also a part of the STC library, https://github.com/stclib/STC | |
| Documentation: https://github.com/stclib/STC/blob/master/docs/cspan_api.md | |
| Examples: https://github.com/stclib/STC/blob/master/docs/cspan_api.md | |
| The python line: | |
| slice = sp4[2][3:][1:5:2][:] # reduces rank from 4 to 3 | |
| can be written in C as: | |
| Span3 slice = cspan_slice(&sp4, Span3, {2}, {3,c_END}, {1,5,2}, {c_ALL}); | |
| Compile-time check that number of arguments matches type of sp4. | |
| Default runtime argument bounds check and that possible rank-reduction is type-correct. | |
| There are also optimized common slicing constructors (for dimensions 1-4), e.g.: | |
| Span sub = cspan_subspan(&sp, 5, 3); // cspan_slice(&sp, Span, {5,8}); | |
| Span3 ms3 = cspan_submd4(&sp3, 1); // cspan_slice(&sp3, Span3, {1}, {c_ALL}, {c_ALL}); | |
| Span2 ms2 = cspan_submd4(&sp3, 1, 0); // cspan_slice(&sp3, Span2, {1}, {0}, {c_ALL}); | |
| The code generated and the implementation is fast and compact, while being | |
| generic and type-safe. For comparison, C++23 std::mdspan does not support | |
| slicing, subspan, submd, or flattened iteration. | |
| // Example: | |
| #include <stdio.h> | |
| #include <stdlib.h> | |
| #include "cspan.h" | |
| use_cspan3(DSpan, double); | |
| int main(void) { | |
| enum{nx=3, ny=4, nz=5}; | |
| double data[nx*ny*nz]; | |
| printf("\nMultidim span ms[3, 4, 5], fortran ordered"); | |
| DSpan3 ms = cspan_md_layout(c_COLMAJOR, data, nx, ny, nz); | |
| int idx = 0; | |
| for (int i = 0; i < ms.shape[0]; ++i) | |
| for (int j = 0; j < ms.shape[1]; ++j) | |
| for (int k = 0; k < ms.shape[2]; ++k) | |
| *cspan_at(&ms, i, j, k) = ++idx; | |
| cspan_transpose(&ms); | |
| printf(", transposed:\n\n"); | |
| cspan_print(DSpan3, "%.2f", ms); | |
| puts("Slicing:"); | |
| printf("ms[0, :, :]\n"); | |
| cspan_print(DSpan2, "%g", cspan_slice(&ms, DSpan2, {0}, {c_ALL}, {c_ALL})); | |
| printf("ms[:, 0, :]\n"); | |
| cspan_print(DSpan2, "%g", cspan_slice(&ms, DSpan2, {c_ALL}, {0}, {c_ALL})); | |
| printf("ms[:, :, 0]\n"); | |
| cspan_print(DSpan2, "%g", cspan_slice(&ms, DSpan2, {c_ALL}, {c_ALL}, {0})); | |
| } | |
| */ | |
| #ifndef CSPAN_H_INCLUDED | |
| #define CSPAN_H_INCLUDED | |
| #include <assert.h> | |
| #include <stddef.h> | |
| #include <string.h> | |
| #include <stdint.h> | |
| #include <stdbool.h> | |
| #define c_MACRO_OVERLOAD(name, ...) \ | |
| c_JOIN(c_JOIN0(name,_), c_NUMARGS(__VA_ARGS__))(__VA_ARGS__) | |
| #define c_TUPLE_AT_1(x,y,...) y | |
| #define c_JOIN0(a, b) a ## b | |
| #define c_JOIN(a, b) c_JOIN0(a, b) | |
| #define c_EXPAND(...) __VA_ARGS__ | |
| #define c_NUMARGS(...) _c_APPLY_ARG_N((__VA_ARGS__, _c_RSEQ_N)) | |
| #define _c_APPLY_ARG_N(args) c_EXPAND(_c_ARG_N args) | |
| #define _c_RSEQ_N 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 | |
| #define _c_ARG_N(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, \ | |
| _14, _15, _16, N, ...) N | |
| #define c_static_assert(expr) (void)sizeof(int[(expr) ? 1 : -1]) | |
| #define c_countof(a) (intptr_t)(sizeof(a)/sizeof 0[a]) | |
| #define c_uless(a, b) ((size_t)(a) < (size_t)(b)) | |
| #define c_swap(T, xp, yp) do { T _t = *(xp); *(xp) = *(yp); *(yp) = _t; } while (0) | |
| #define c_make_array(T, ...) ((T[])__VA_ARGS__) | |
| #define c_make_array2d(T, N, ...) ((T[][N])__VA_ARGS__) | |
| #define c_each(it, C, cnt) \ | |
| C##_iter it = C##_begin(&cnt); it.ref; C##_next(&it) | |
| #ifndef CSPAN_INDEX_TYPE | |
| #define CSPAN_INDEX_TYPE int32_t | |
| #endif | |
| typedef CSPAN_INDEX_TYPE _istride; | |
| #define using_cspan use_cspan // [deprecated] | |
| #define using_cspan2 use_cspan2 // [deprecated] | |
| #define using_cspan3 use_cspan3 // [deprecated] | |
| #define using_cspan2_with_eq use_cspan2_with_eq // [deprecated] | |
| #define using_cspan3_with_eq use_cspan3_with_eq // [deprecated] | |
| #define use_cspan(...) c_MACRO_OVERLOAD(use_cspan, __VA_ARGS__) | |
| #define use_cspan_2(Self, T) \ | |
| use_cspan_3(Self, T, 1); \ | |
| static inline Self Self##_from_n(Self##_value* dataptr, intptr_t n) \ | |
| { return (Self)cspan_from_n(dataptr, n); } \ | |
| static inline const Self##_value* Self##_at(const Self* self, intptr_t idx) \ | |
| { return cspan_at(self, idx); } \ | |
| static inline Self##_value* Self##_at_mut(Self* self, intptr_t idx) \ | |
| { return cspan_at(self, idx); } \ | |
| struct stc_nostruct | |
| #define use_cspan_with_eq(...) c_MACRO_OVERLOAD(use_cspan_with_eq, __VA_ARGS__) | |
| #define use_cspan_with_eq_3(Self, T, i_eq) \ | |
| use_cspan_with_eq_4(Self, T, i_eq, 1); \ | |
| static inline Self Self##_from_n(Self##_value* dataptr, intptr_t n) \ | |
| { return (Self)cspan_from_n(dataptr, n); } \ | |
| struct stc_nostruct | |
| #define use_cspan_3(Self, T, RANK) \ | |
| typedef T Self##_value; \ | |
| typedef T Self##_raw; \ | |
| typedef struct { \ | |
| Self##_value *data; \ | |
| _istride shape[RANK]; \ | |
| cspan_tuple##RANK stride; \ | |
| } Self; \ | |
| \ | |
| typedef struct { \ | |
| Self##_value *ref; \ | |
| const Self *_s; \ | |
| _istride pos[RANK]; \ | |
| } Self##_iter; \ | |
| \ | |
| static inline Self Self##_slice_(Self##_value* d, const _istride shape[], const _istride stri[], \ | |
| const intptr_t args[][3], const int rank) { \ | |
| Self s; int outrank; \ | |
| s.data = d + _cspan_slice(s.shape, s.stride.d, &outrank, shape, stri, args, rank); \ | |
| assert(outrank == RANK); \ | |
| return s; \ | |
| } \ | |
| static inline Self##_iter Self##_begin(const Self* self) { \ | |
| return (Self##_iter){ \ | |
| .ref=RANK==1 && self->shape[0]==0 ? NULL : self->data, ._s=self}; \ | |
| } \ | |
| static inline Self##_iter Self##_end(const Self* self) { \ | |
| (void)self; \ | |
| return (Self##_iter){0}; \ | |
| } \ | |
| static inline void Self##_next(Self##_iter* it) { \ | |
| intptr_t off = it->_s->stride.d[RANK - 1]; \ | |
| bool done = _cspan_next##RANK(it->pos, it->_s->shape, it->_s->stride.d, RANK, &off); \ | |
| if (done) it->ref = NULL; else it->ref += off; \ | |
| } \ | |
| static inline intptr_t Self##_size(const Self* self) \ | |
| { return cspan_size(self); } \ | |
| static inline Self Self##_transposed(Self sp) \ | |
| { _cspan_transpose(sp.shape, sp.stride.d, cspan_rank(&sp)); return sp; } \ | |
| static inline Self Self##_swapped_axes(Self sp, int ax1, int ax2) \ | |
| { _cspan_swap_axes(sp.shape, sp.stride.d, cspan_rank(&sp), ax1, ax2); return sp; } \ | |
| struct stc_nostruct | |
| #define use_cspan_with_eq_4(Self, T, i_eq, RANK) \ | |
| use_cspan_3(Self, T, RANK); \ | |
| static inline bool Self##_eq(const Self* x, const Self* y) { \ | |
| if (memcmp(x->shape, y->shape, sizeof x->shape) != 0) \ | |
| return false; \ | |
| for (Self##_iter _i = Self##_begin(x), _j = Self##_begin(y); \ | |
| _i.ref != NULL; Self##_next(&_i), Self##_next(&_j)) \ | |
| { if (!(i_eq(_i.ref, _j.ref))) return false; } \ | |
| return true; \ | |
| } \ | |
| static inline bool Self##_equals(Self sp1, Self sp2) \ | |
| { return Self##_eq(&sp1, &sp2); } \ | |
| struct stc_nostruct | |
| #define use_cspan2(Self, T) use_cspan_2(Self, T); use_cspan_3(Self##2, T, 2) | |
| #define use_cspan3(Self, T) use_cspan2(Self, T); use_cspan_3(Self##3, T, 3) | |
| #define use_cspan2_with_eq(Self, T, eq) use_cspan_with_eq_3(Self, T, eq); \ | |
| use_cspan_with_eq_4(Self##2, T, eq, 2) | |
| #define use_cspan3_with_eq(Self, T, eq) use_cspan2_with_eq(Self, T, eq); \ | |
| use_cspan_with_eq_4(Self##3, T, eq, 3) | |
| #define use_cspan_tuple(N) typedef struct { _istride d[N]; } cspan_tuple##N | |
| use_cspan_tuple(1); use_cspan_tuple(2); | |
| use_cspan_tuple(3); use_cspan_tuple(4); | |
| use_cspan_tuple(5); use_cspan_tuple(6); | |
| use_cspan_tuple(7); use_cspan_tuple(8); | |
| // Construct a cspan from a pointer+size | |
| #define cspan_from_n(dataptr, n) \ | |
| {.data=dataptr, \ | |
| .shape={(_istride)(n)}, \ | |
| .stride=(cspan_tuple1){.d={1}}} | |
| // Create a 1d-span in the local lexical scope. N must be a compile-time constant. | |
| #define cspan_by_copy(dataptr, N) \ | |
| cspan_from_n(memcpy((char[(N)*sizeof *(dataptr)]){0}, dataptr, (N)*sizeof *(dataptr)), N) | |
| // Create a zeroed out 1d-span in the local lexical scope. N must be a compile-time constant. | |
| #define cspan_zeros(Span, N) \ | |
| ((Span)cspan_from_n((Span##_value[N]){0}, N)) | |
| // Create a global scope 1d-span from constant initializer list, otherwise like c_make(Span, ...). | |
| #define cspan_make(Span, ...) \ | |
| ((Span)cspan_from_n(c_make_array(Span##_value, __VA_ARGS__), \ | |
| sizeof((Span##_value[])__VA_ARGS__)/sizeof(Span##_value))) | |
| // Make 1d-span from a c-array. | |
| #define cspan_from_array(array) \ | |
| cspan_from_n(array, c_countof(array)) | |
| // Make 1d-span from a vec or stack container. | |
| #define cspan_from_vec(container) \ | |
| cspan_from_n((container)->data, (container)->size) | |
| // Make a 1d-sub-span from a 1d-span | |
| #define cspan_subspan(self, offset, count) \ | |
| {.data=cspan_at(self, offset), \ | |
| .shape={(_istride)(count)}, \ | |
| .stride=(self)->stride} | |
| // Accessors | |
| // | |
| #define cspan_size(self) _cspan_size((self)->shape, cspan_rank(self)) | |
| #define cspan_rank(self) c_countof((self)->shape) // constexpr | |
| #define cspan_at(self, ...) ((self)->data + cspan_index(self, __VA_ARGS__)) | |
| #define cspan_front(self) ((self)->data) | |
| #define cspan_back(self) ((self)->data + cspan_size(self) - 1) | |
| #define cspan_index(...) cspan_index_fn(__VA_ARGS__, c_COMMA_N(cspan_index_3d), c_COMMA_N(cspan_index_2d), \ | |
| c_COMMA_N(cspan_index_1d),)(__VA_ARGS__) | |
| #define cspan_index_fn(self, i,j,k,n, ...) c_TUPLE_AT_1(n, cspan_index_nd,) | |
| #define cspan_index_1d(self, i) (c_static_assert(cspan_rank(self) == 1), \ | |
| assert((i) < (self)->shape[0]), \ | |
| (i)*(self)->stride.d[0]) | |
| #define cspan_index_2d(self, i,j) (c_static_assert(cspan_rank(self) == 2), \ | |
| assert((i) < (self)->shape[0] && (j) < (self)->shape[1]), \ | |
| (i)*(self)->stride.d[0] + (j)*(self)->stride.d[1]) | |
| #define cspan_index_3d(self, i,j,k) (c_static_assert(cspan_rank(self) == 3), \ | |
| assert((i) < (self)->shape[0] && (j) < (self)->shape[1] && (k) < (self)->shape[2]), \ | |
| (i)*(self)->stride.d[0] + (j)*(self)->stride.d[1] + (k)*(self)->stride.d[2]) | |
| #define cspan_index_nd(self, ...) _cspan_index((self)->shape, (self)->stride.d, c_make_array(intptr_t, {__VA_ARGS__}), \ | |
| (c_static_assert(cspan_rank(self) == c_NUMARGS(__VA_ARGS__)), cspan_rank(self))) | |
| // Multi-dimensional span constructors | |
| // | |
| typedef enum {c_ROWMAJOR, c_COLMAJOR, c_STRIDED} cspan_layout; | |
| #define cspan_is_colmajor(self) \ | |
| _cspan_is_layout(c_COLMAJOR, (self)->shape, (self)->stride.d, cspan_rank(self)) | |
| #define cspan_is_rowmajor(self) \ | |
| _cspan_is_layout(c_ROWMAJOR, (self)->shape, (self)->stride.d, cspan_rank(self)) | |
| #define cspan_get_layout(self) \ | |
| (cspan_is_rowmajor(self) ? c_ROWMAJOR : cspan_is_colmajor(self) ? c_COLMAJOR : c_STRIDED) | |
| #define cspan_md(dataptr, ...) \ | |
| cspan_md_layout(c_ROWMAJOR, dataptr, __VA_ARGS__) | |
| // Span2 sp1 = cspan_md(data, 30, 50); | |
| // Span2 sp2 = {data, cspan_shape(15, 25), cspan_strides(50*2, 2)}; // every second in each dim | |
| #define cspan_shape(...) {__VA_ARGS__} | |
| #define cspan_strides(...) {.d={__VA_ARGS__}} | |
| #define cspan_md_layout(layout, dataptr, ...) \ | |
| {.data=dataptr, \ | |
| .shape={__VA_ARGS__}, \ | |
| .stride=*(c_JOIN(cspan_tuple,c_NUMARGS(__VA_ARGS__))*) \ | |
| _cspan_shape2stride(layout, c_make_array(_istride, {__VA_ARGS__}), c_NUMARGS(__VA_ARGS__))} | |
| // Transpose matrix | |
| #define cspan_transpose(self) \ | |
| _cspan_transpose((self)->shape, (self)->stride.d, cspan_rank(self)) | |
| // Swap two matrix axes | |
| #define cspan_swap_axes(self, ax1, ax2) \ | |
| _cspan_swap_axes((self)->shape, (self)->stride.d, cspan_rank(self), ax1, ax2) | |
| // Set all span elements to value. | |
| #define cspan_set_all(Span, self, value) do { \ | |
| Span##_value _v = value; \ | |
| for (c_each(_it, Span, *(self))) *_it.ref = _v; \ | |
| } while (0) | |
| // General slicing function. | |
| // | |
| #define c_END (_istride)(((size_t)1 << (sizeof(_istride)*8 - 1)) - 1) | |
| #define c_ALL 0,c_END | |
| #define cspan_slice(self, Outspan, ...) \ | |
| Outspan##_slice_((self)->data, (self)->shape, (self)->stride.d, \ | |
| c_make_array2d(const intptr_t, 3, {__VA_ARGS__}), \ | |
| (c_static_assert(cspan_rank(self) == sizeof((intptr_t[][3]){__VA_ARGS__})/sizeof(intptr_t[3])), cspan_rank(self))) | |
| // submd#(): Reduces rank, fully typesafe + range checked by default | |
| // int ms3[N1][N2][N3]; | |
| // int (*ms2)[N3] = ms3[1]; // traditional, lose range test/info. VLA. | |
| // Span3 ms3 = cspan_md(data, N1,N2,N3); // Uses cspan_md instead. | |
| // *cspan_at(&ms3, 1,1,1) = 42; | |
| // Span2 ms2 = cspan_slice(&ms3, Span2, {1}, {c_ALL}, {c_ALL}); | |
| // Span2 ms2 = cspan_submd3(&ms3, 1); // Same as line above, optimized. | |
| #define cspan_submd2(self, x) \ | |
| {.data=cspan_at(self, x, 0), \ | |
| .shape={(self)->shape[1]}, \ | |
| .stride=(cspan_tuple1){.d={(self)->stride.d[1]}}} | |
| #define cspan_submd3(...) c_MACRO_OVERLOAD(cspan_submd3, __VA_ARGS__) | |
| #define cspan_submd3_2(self, x) \ | |
| {.data=cspan_at(self, x, 0, 0), \ | |
| .shape={(self)->shape[1], (self)->shape[2]}, \ | |
| .stride=(cspan_tuple2){.d={(self)->stride.d[1], (self)->stride.d[2]}}} | |
| #define cspan_submd3_3(self, x, y) \ | |
| {.data=cspan_at(self, x, y, 0), \ | |
| .shape={(self)->shape[2]}, \ | |
| .stride=(cspan_tuple1){.d={(self)->stride.d[2]}}} | |
| #define cspan_submd4(...) c_MACRO_OVERLOAD(cspan_submd4, __VA_ARGS__) | |
| #define cspan_submd4_2(self, x) \ | |
| {.data=cspan_at(self, x, 0, 0, 0), \ | |
| .shape={(self)->shape[1], (self)->shape[2], (self)->shape[3]}, \ | |
| .stride=(cspan_tuple3){.d={(self)->stride.d[1], (self)->stride.d[2], (self)->stride.d[3]}}} | |
| #define cspan_submd4_3(self, x, y) \ | |
| {.data=cspan_at(self, x, y, 0, 0), \ | |
| .shape={(self)->shape[2], (self)->shape[3]}, \ | |
| .stride=(cspan_tuple2){.d={(self)->stride.d[2], (self)->stride.d[3]}}} | |
| #define cspan_submd4_4(self, x, y, z) \ | |
| {.data=cspan_at(self, x, y, z, 0), \ | |
| .shape={(self)->shape[3]}, \ | |
| .stride=(cspan_tuple1){.d={(self)->stride.d[3]}}} | |
| #define cspan_print(...) c_MACRO_OVERLOAD(cspan_print, __VA_ARGS__) | |
| #define cspan_print_3(Span, fmt, span) \ | |
| cspan_print_4(Span, fmt, span, stdout) | |
| #define cspan_print_4(Span, fmt, span, fp) \ | |
| cspan_print_5(Span, fmt, span, fp, "[]") | |
| #define cspan_print_5(Span, fmt, span, fp, brackets) \ | |
| cspan_print_6(Span, fmt, span, fp, brackets, c_EXPAND) | |
| #define cspan_print_complex(Span, prec, span, fp) \ | |
| cspan_print_6(Span, "%." #prec "f%+." #prec "fi", span, fp, "[]", cspan_CMPLX_FLD) | |
| #define cspan_CMPLX_FLD(x) creal(x), cimag(x) | |
| #define cspan_print_6(Span, fmt, span, fp, brackets, field) do { \ | |
| const Span _s = span; \ | |
| const char *_f = fmt, *_b = brackets; \ | |
| FILE* _fp = fp; \ | |
| int _w, _max = 0; \ | |
| char _res[2][20], _fld[64]; \ | |
| for (c_each(_it, Span, _s)) { \ | |
| _w = snprintf(NULL, 0ULL, _f, field(_it.ref[0])); \ | |
| if (_w > _max) _max = _w; \ | |
| } \ | |
| for (c_each(_it, Span, _s)) { \ | |
| _cspan_print_assist(_it.pos, _s.shape, cspan_rank(&_s), _b, _res); \ | |
| _w = _max + (_it.pos[cspan_rank(&_s) - 1] > 0); \ | |
| snprintf(_fld, sizeof _fld, _f, field(_it.ref[0])); \ | |
| fprintf(_fp, "%s%*s%s", _res[0], _w, _fld, _res[1]); \ | |
| } \ | |
| } while (0) | |
| /* ----- PRIVATE ----- */ | |
| static inline intptr_t _cspan_size(const _istride shape[], int rank) { | |
| intptr_t size = shape[0]; | |
| while (--rank) size *= shape[rank]; | |
| return size; | |
| } | |
| static inline void _cspan_swap_axes(_istride shape[], _istride stride[], | |
| int rank, int ax1, int ax2) { | |
| (void)rank; | |
| assert(c_uless(ax1, rank) & c_uless(ax2, rank)); | |
| c_swap(_istride, shape + ax1, shape + ax2); | |
| c_swap(_istride, stride + ax1, stride + ax2); | |
| } | |
| static inline void _cspan_transpose(_istride shape[], _istride stride[], int rank) { | |
| for (int i = 0; i < --rank; ++i) { | |
| c_swap(_istride, shape + i, shape + rank); | |
| c_swap(_istride, stride + i, stride + rank); | |
| } | |
| } | |
| static inline intptr_t _cspan_index(const _istride shape[], const _istride stride[], | |
| const intptr_t args[], int rank) { | |
| intptr_t off = 0; | |
| (void)shape; | |
| while (rank-- != 0) { | |
| assert(args[rank] < shape[rank]); | |
| off += args[rank]*stride[rank]; | |
| } | |
| return off; | |
| } | |
| void _cspan_print_assist(_istride pos[], const _istride shape[], const int rank, | |
| const char* brackets, char result[2][20]); | |
| bool _cspan_nextN(_istride pos[], const _istride shape[], const _istride stride[], int rank, intptr_t* off); | |
| #define _cspan_next1(pos, shape, stride, rank, off) (++pos[0] == shape[0]) | |
| #define _cspan_next2(pos, shape, stride, rank, off) (++pos[1] == shape[1] && \ | |
| (pos[1] = 0, *off += stride[0] - (intptr_t)shape[1]*stride[1], ++pos[0] == shape[0])) | |
| #define _cspan_next3(pos, shape, stride, rank, off) (++pos[2] == shape[2] && \ | |
| (pos[2] = 0, *off += stride[1] - (intptr_t)shape[2]*stride[2], ++pos[1] == shape[1]) && \ | |
| (pos[1] = 0, *off += stride[0] - (intptr_t)shape[1]*stride[1], ++pos[0] == shape[0])) | |
| #define _cspan_next4 _cspan_nextN | |
| #define _cspan_next5 _cspan_nextN | |
| #define _cspan_next6 _cspan_nextN | |
| #define _cspan_next7 _cspan_nextN | |
| #define _cspan_next8 _cspan_nextN | |
| intptr_t _cspan_slice(_istride oshape[], _istride ostride[], int* orank, | |
| const _istride shape[], const _istride stride[], | |
| const intptr_t args[][3], int rank); | |
| _istride* _cspan_shape2stride(cspan_layout layout, _istride shape[], int rank); | |
| bool _cspan_is_layout(cspan_layout layout, const _istride shape[], const _istride strides[], int rank); | |
| #endif // CSPAN_H_INCLUDED | |
| /* --------------------- IMPLEMENTATION --------------------- */ | |
| #ifdef CSPAN_IMPLEMENT | |
| bool _cspan_is_layout(cspan_layout layout, const _istride shape[], const _istride strides[], int rank) { | |
| _istride tmpshape[16]; // 16 = "max" rank | |
| size_t sz = (size_t)rank*sizeof(_istride); | |
| memcpy(tmpshape, shape, sz); | |
| return memcmp(strides, _cspan_shape2stride(layout, tmpshape, rank), sz) == 0; | |
| } | |
| void _cspan_print_assist(_istride pos[], const _istride shape[], const int rank, | |
| const char* brackets, char result[2][20]) { | |
| int n = 0, j = 0, r = rank - 1; | |
| memset(result, 0, 32); | |
| // left braces: | |
| while (n <= r && pos[r - n] == 0) | |
| ++n; | |
| if (n) for (; j < rank; ++j) | |
| result[0][j] = j < rank - n ? ' ' : brackets[0]; | |
| // right braces: | |
| for (j = 0; r >= 0 && pos[r] + 1 == shape[r]; --r, ++j) | |
| result[1][j] = brackets[1]; | |
| // comma and newlines: | |
| n = (j > 0) + ((j > 1) & (j < rank)); | |
| if (brackets[2] && j < rank) | |
| result[1][j++] = brackets[2]; // comma | |
| while (n--) | |
| result[1][j++] = '\n'; | |
| } | |
| bool _cspan_nextN(_istride pos[], const _istride shape[], const _istride stride[], | |
| int rank, intptr_t* off) { | |
| ++pos[--rank]; | |
| for (; rank && pos[rank] == shape[rank]; --rank) { | |
| pos[rank] = 0; ++pos[rank - 1]; | |
| *off += stride[rank - 1] - (intptr_t)shape[rank]*stride[rank]; | |
| } | |
| return pos[rank] == shape[rank]; | |
| } | |
| _istride* _cspan_shape2stride(cspan_layout layout, _istride shpstri[], int rank) { | |
| int i, inc; | |
| if (layout == c_COLMAJOR) i = 0, inc = 1; | |
| else i = rank - 1, inc = -1; | |
| _istride k = 1, s1 = shpstri[i], s2; | |
| shpstri[i] = 1; | |
| while (--rank) { | |
| i += inc; | |
| s2 = shpstri[i]; | |
| shpstri[i] = (k *= s1); | |
| s1 = s2; | |
| } | |
| return shpstri; | |
| } | |
| intptr_t _cspan_slice(_istride oshape[], _istride ostride[], int* orank, | |
| const _istride shape[], const _istride stride[], | |
| const intptr_t args[][3], int rank) { | |
| intptr_t end, off = 0; | |
| int i = 0, oi = 0; | |
| for (; i < rank; ++i) { | |
| off += args[i][0]*stride[i]; | |
| switch (args[i][1]) { | |
| case 0: assert(c_uless(args[i][0], shape[i])); continue; | |
| case c_END: end = shape[i]; break; | |
| default: end = args[i][1]; | |
| } | |
| oshape[oi] = (_istride)(end - args[i][0]); | |
| ostride[oi] = stride[i]; | |
| assert((oshape[oi] > 0) & !c_uless(shape[i], end)); | |
| if (args[i][2] > 0) { | |
| ostride[oi] *= (_istride)args[i][2]; | |
| oshape[oi] = (oshape[oi] - 1)/(_istride)args[i][2] + 1; | |
| } | |
| ++oi; | |
| } | |
| *orank = oi; | |
| return off; | |
| } | |
| #endif // CSPAN_IMPLEMENT |
Good question. I need It only to write the cspan_md() / cspan_md_layout() constructor macros in this generic way. It returns a cspan_tupleN type, where N depends on number of args passed to it. I could have avoided wrapping the stride by making one cspan_mdN() macro per dimension, which would have made access to the stride a bit more natural syntactically.
I am new to this C macro trickery. What is the limitation that prevents you from doing this for a general size?
Could you use another macro to dispatch to cspan_md1, cspan_md2, etc. based on the number of arguments?
BTW, this is a very cool thing that you have accomplished here.
As a reminder for myself, the cspan_submdN() functions does not work for the "general size" as they are only defined for dims 2, 3, 4. However, these are just convenience/optimized versions of the general cspan_slice() function.
Beyond 14 dimensions, you would need to extend the _c_ARG_N and _c_RSEQ_N.
Beyond 8 dimensions you would need to (e.g. for dim 9):
#define _cspan_next9 _cspan_nextN
use_cspan_tuple(9);
Is there a reason why you are wrapping strides array in 'cspan_tuple' structure?