ivanstepanovftw · March 6, 2026 18:33
diff --git a/float.zig b/float.zig
 // TODO: normalfloat https://arxiv.org/pdf/2305.14314
 // TODO: NVFP4
 const std = @import("std");
 const math = std.math;
 const pretty = @import("pretty.zig");
 const print = pretty.print;
 const p = pretty.p;
 const comptimePrint = std.fmt.comptimePrint;

 comptime {
    @setEvalBranchQuota(100000);
 }

 /// Custom representation of IEEE 754 single precision floating point number.
 /// https://en.wikipedia.org/wiki/IEEE_754
 /// https://github.com/ziglang/zig/blob/f29bdd6746691d0a547140e435056a000419480f/lib/std/math/float.zig#L13
 /// https://github.com/ziglang/zig/blob/f29bdd6746691d0a547140e435056a000419480f/lib/std/math.zig#L1725
 pub fn Float(
    comptime sign_bits: comptime_int,
    comptime exponent_bits: comptime_int,
    comptime fraction_bits: comptime_int,
    comptime subnormals: bool,
 ) type {
    return packed struct {
        const Self = @This();
        pub const Sign = std.meta.Int(.unsigned, sign_bits);
        pub const BiasedExponent = std.meta.Int(.unsigned, exponent_bits);
        pub const Fraction = std.meta.Int(.unsigned, fraction_bits);
        pub const exponent_bias = (1 << (exponent_bits - 1)) - 1; // https://en.wikipedia.org/wiki/Exponent_bias

        sign: Sign,
        biased_exponent: BiasedExponent,
        fraction: Fraction,

        pub fn init(sign: Sign, biased_exponent: BiasedExponent, fraction: Fraction) Self {
            return Self{ .sign = sign, .biased_exponent = biased_exponent, .fraction = fraction };
        }

        pub fn cast(self: Self, comptime Other: type) Other {
            switch (@typeInfo(Other)) {
                .float => {
                    const bits = @bitSizeOf(Other);
                    const OtherStruct = switch (bits) {
                        16 => Float(1, 5, 10, true),
                        32 => Float(1, 8, 23, true),
                        64 => Float(1, 11, 52, true),
                        // 80 => Float(1, 15, 64, true), // TODO: f80 is not supported yet because of implicit leading bit in the fraction
                        128 => Float(1, 15, 112, true),
                        else => unreachable,
                    };
                    const OtherBinary = std.meta.Int(.unsigned, bits);

                    const other_struct = self.cast(OtherStruct);
                    var other_binary: OtherBinary = 0;
                    other_binary |= @as(OtherBinary, other_struct.sign) << (bits - 1);
                    other_binary |= @as(OtherBinary, other_struct.biased_exponent) << (bits - 1 - @bitSizeOf(OtherStruct.BiasedExponent));
                    other_binary |= @as(OtherBinary, other_struct.fraction) << (bits - 1 - @bitSizeOf(OtherStruct.BiasedExponent) - @bitSizeOf(OtherStruct.Fraction));
                    return @bitCast(other_binary);
                },
                .@"struct" => { // Assume the struct is a custom float
                    const sign: Other.Sign = @as(Other.Sign, self.sign);

                    const self_inf = (1 << @bitSizeOf(Self.BiasedExponent)) - 1;
                    const other_inf = (1 << @bitSizeOf(Other.BiasedExponent)) - 1;

                    // Infinity and NaN cases
                    if (self.biased_exponent == self_inf) {
                        if (self.fraction == 0) {
                            return Other.init(sign, @intCast(other_inf), 0); // Inf
                        } else {
                            // NaN
                            const s_bits = @bitSizeOf(Self.Fraction);
                            const o_bits = @bitSizeOf(Other.Fraction);
                            var o_frac: Other.Fraction = 0;
                            if (s_bits > o_bits) {
                                o_frac = @truncate(self.fraction >> @intCast(s_bits - o_bits));
                            } else {
                                o_frac = @as(Other.Fraction, self.fraction) << @intCast(o_bits - s_bits);
                            }
                            if (o_frac == 0) o_frac = 1; // Preserve NaN payload
                            return Other.init(sign, @intCast(other_inf), o_frac);
                        }
                    }

                    // Zero case
                    if (self.biased_exponent == 0 and self.fraction == 0) {
                        return Other.init(sign, 0, 0);
                    }

                    // ------------------------------------------------------------------
                    // 1. Extract true exponent and integer significand (value = M * 2^E)
                    // ------------------------------------------------------------------
                    var M: u128 = self.fraction;
                    var E: i32 = 0;
                    const S_bias = @as(i32, @intCast(Self.exponent_bias));
                    const S_Fs = @as(i32, @intCast(@bitSizeOf(Self.Fraction)));

                    if (self.biased_exponent == 0) {
                        // Source is Subnormal (no implicit 1)
                        E = 1 - S_bias - S_Fs;
                    } else {
                        // Source is Normal (add implicit 1)
                        M |= (@as(u128, 1) << @intCast(S_Fs));
                        E = @as(i32, @intCast(self.biased_exponent)) - S_bias - S_Fs;
                    }

                    const O_bias = @as(i32, @intCast(Other.exponent_bias));
                    const O_Fs = @as(i32, @intCast(@bitSizeOf(Other.Fraction)));

                    // ------------------------------------------------------------------
                    // 2. Normalize M so its MSB is exactly positioned at O_Fs
                    // ------------------------------------------------------------------
                    const msb_idx = 127 - @as(i32, @intCast(@clz(M)));
                    var target_M: u128 = 0;
                    var target_E: i32 = E;

                    if (msb_idx < O_Fs) {
                        const shl = @as(u7, @intCast(O_Fs - msb_idx));
                        target_M = M << shl;
                        target_E -= @as(i32, @intCast(shl));
                    } else if (msb_idx > O_Fs) {
                        const shr = @as(u7, @intCast(msb_idx - O_Fs));
                        target_M = M >> shr;
                        target_E += @as(i32, @intCast(shr));
                    } else {
                        target_M = M;
                        target_E = E;
                    }

                    // ------------------------------------------------------------------
                    // 3. Pack into the Target format
                    // ------------------------------------------------------------------

                    // Calculate target biased exponent assuming it's a normal number
                    const final_O_exp = target_E + O_bias + O_Fs;

                    if (final_O_exp >= other_inf) {
                        // Overflow to Infinity
                        return Other.init(sign, @intCast(other_inf), 0);
                    } else if (final_O_exp > 0) {
                        // Target is Normal
                        const mask = (@as(u128, 1) << @intCast(O_Fs)) - 1;
                        const o_frac = @as(Other.Fraction, @truncate(target_M & mask));
                        return Other.init(sign, @intCast(final_O_exp), o_frac);
                    } else {
                        // Target is Subnormal or Underflow
                        if (subnormals) {
                            // If exponent <= 0, we right-shift the mantissa
                            const shift_right = 1 - final_O_exp;
                            if (shift_right >= 128) {
                                return Other.init(sign, 0, 0); // Underflow to absolute zero
                            } else {
                                const shr = @as(u7, @intCast(shift_right));
                                const o_frac = @as(Other.Fraction, @truncate(target_M >> shr));
                                return Other.init(sign, 0, o_frac);
                            }
                        } else {
                            // Subnormals are flushed to zero
                            return Other.init(sign, 0, 0);
                        }
                    }
                },
                else => unreachable,
            }
        }
        /// Internal representation for arithmetic
        const Unpacked = struct {
            sign: Sign,
            exp: i32,
            mantissa: u128, // Includes implicit bit
            is_nan: bool = false,
            is_inf: bool = false,

            const implicit_bit = @as(u128, 1) << fraction_bits;
        };

        fn unpack(self: Self) Unpacked {
            const max_exp = (1 << exponent_bits) - 1;
            if (self.biased_exponent == max_exp) {
                return .{
                    .sign = self.sign,
                    .exp = 0,
                    .mantissa = self.fraction,
                    .is_nan = self.fraction != 0,
                    .is_inf = self.fraction == 0,
                };
            }
            if (self.biased_exponent == 0) {
                if (self.fraction == 0) return .{ .sign = self.sign, .exp = -exponent_bias, .mantissa = 0 };
                // Subnormal
                return .{
                    .sign = self.sign,
                    .exp = 1 - exponent_bias,
                    .mantissa = self.fraction,
                };
            }
            // Normal
            return .{
                .sign = self.sign,
                .exp = @as(i32, @intCast(self.biased_exponent)) - exponent_bias,
                .mantissa = Unpacked.implicit_bit | self.fraction,
            };
        }

        fn pack(unpacked: Unpacked) Self {
            if (unpacked.is_nan) return Self.init(unpacked.sign, (1 << exponent_bits) - 1, 1);
            if (unpacked.is_inf) return Self.init(unpacked.sign, (1 << exponent_bits) - 1, 0);
            if (unpacked.mantissa == 0) return Self.init(unpacked.sign, 0, 0);

            var m = unpacked.mantissa;
            var e = unpacked.exp;

            // 1. Normalize: Ensure MSB is at the implicit bit position
            const msb = 127 - @clz(m);
            const target_bit = fraction_bits;

            if (msb > target_bit) {
                const shift = @as(u7, @intCast(msb - target_bit));
                m >>= shift;
                e += shift;
            } else if (msb < target_bit) {
                const shift = @as(u7, @intCast(target_bit - msb));
                m <<= shift;
                e -= shift;
            }

            // 2. Handle Exponent range
            const biased_e = e + exponent_bias;

            if (biased_e >= (1 << exponent_bits) - 1) {
                return Self.init(unpacked.sign, (1 << exponent_bits) - 1, 0); // Overflow to Inf
            }

            if (biased_e <= 0) {
                if (!subnormals) return Self.init(unpacked.sign, 0, 0);
                // Subnormal handling
                const shift = @as(u7, @intCast(1 - biased_e));
                if (shift > fraction_bits + 1) return Self.init(unpacked.sign, 0, 0);
                m >>= shift;
                return Self.init(unpacked.sign, 0, @truncate(m));
            }

            return Self.init(unpacked.sign, @intCast(biased_e), @truncate(m ^ Unpacked.implicit_bit));
        }

        pub fn add(self: Self, other: Self) Self {
            const a = self.unpack();
            const b = other.unpack();

            // Handle Specials
            if (a.is_nan or b.is_nan) return pack(.{ .sign = 0, .exp = 0, .mantissa = 0, .is_nan = true });
            if (a.is_inf and b.is_inf and a.sign != b.sign) return pack(.{ .sign = 0, .exp = 0, .mantissa = 0, .is_nan = true });
            if (a.is_inf) return self;
            if (b.is_inf) return other;

            // Align exponents
            var m_a = a.mantissa;
            var m_b = b.mantissa;
            var res_exp = a.exp;

            if (a.exp > b.exp) {
                const diff = @as(u7, @intCast(@min(127, a.exp - b.exp)));
                m_b >>= diff;
                res_exp = a.exp;
            } else if (b.exp > a.exp) {
                const diff = @as(u7, @intCast(@min(127, b.exp - a.exp)));
                m_a >>= diff;
                res_exp = b.exp;
            }

            // Add/Sub significands
            var res_mant: u128 = 0;
            var res_sign: Sign = a.sign;

            if (a.sign == b.sign) {
                res_mant = m_a + m_b;
            } else {
                if (m_a >= m_b) {
                    res_mant = m_a - m_b;
                    res_sign = a.sign;
                } else {
                    res_mant = m_b - m_a;
                    res_sign = b.sign;
                }
            }

            return pack(.{ .sign = res_sign, .exp = res_exp, .mantissa = res_mant });
        }

        pub fn mul(self: Self, other: Self) Self {
            const a = self.unpack();
            const b = other.unpack();

            const res_sign = a.sign ^ b.sign;

            // Handle Specials
            if (a.is_nan or b.is_nan) return pack(.{ .sign = res_sign, .exp = 0, .mantissa = 0, .is_nan = true });
            if ((a.is_inf and b.mantissa == 0 and b.exp == -exponent_bias) or
                (b.is_inf and a.mantissa == 0 and a.exp == -exponent_bias))
                {
                    return pack(.{ .sign = res_sign, .exp = 0, .mantissa = 0, .is_nan = true });
                }
            if (a.is_inf or b.is_inf) return pack(.{ .sign = res_sign, .exp = 0, .mantissa = 0, .is_inf = true });
            if (a.mantissa == 0 or b.mantissa == 0) return Self.init(res_sign, 0, 0);

            // Multiply significands
            // Result is in range [1, 4) if both are normal
            const res_mant = (a.mantissa * b.mantissa) >> fraction_bits;
            const res_exp = a.exp + b.exp;

            return pack(.{ .sign = res_sign, .exp = res_exp, .mantissa = res_mant });
        }

        // const Shift = std.meta.Int(.unsigned, @max(exponent_bits, fraction_bits));
        //
        // pub fn shiftRight(self: Self, shift: Shift) Self {
        //     if (shift == 0) return self;
        //     // return Self.init(self.sign, self.exponent -| shift, @shlExact(self.significand, shift));
        //     return Self.init(self.sign, @truncate(self.biased_exponent -| shift), self.fraction);
        // }

        pub fn format(self: Self, writer: anytype) !void {
            const s_fmt = "{b:0>" ++ comptimePrint("{d}", .{sign_bits}) ++ "}";
            const e_fmt = "{b:0>" ++ comptimePrint("{d}", .{exponent_bits}) ++ "}";
            const m_fmt = "{b:0>" ++ comptimePrint("{d}", .{fraction_bits}) ++ "}";
            try writer.print(
                s_fmt ++ " " ++ e_fmt ++ " " ++ m_fmt,
                .{ self.sign, self.biased_exponent, self.fraction },
            );
        }
    };
 }

 const F4 = Float(1, 2, 1, true);
 const F8 = Float(1, 4, 3, true);
 const F16 = Float(1, 5, 10, true);
 const BF16 = Float(1, 8, 7, true);
 const F32 = Float(1, 8, 23, true);
 const F64 = Float(1, 11, 52, true);
 const F128 = Float(1, 15, 112, true);

 pub fn main() void {
    @setEvalBranchQuota(50000);
    {
        const a: u3 = 1;
        const b: u10 = @as(u6969, @intCast(a)) << (@as(u6969, @bitSizeOf(@TypeOf(a))) + @clz(a));
        print(.{ "a: ", a, "\n" });
        print(.{ "b: ", b, "\n" });
    }
    {
        print(.{"F4 to F4\n"});
        print(.{"expected         | casted\n"});
        print(.{"F4     | decimal | F4\n"});
        print(.{ "0 00 0 |     0.0 | ", p("f", F4.init(0, 0b00, 0).cast(F4)), "\n" });
        print(.{ "0 00 1 |     0.5 | ", p("f", F4.init(0, 0b00, 1).cast(F4)), "\n" });
        print(.{ "0 01 0 |     1.0 | ", p("f", F4.init(0, 0b01, 0).cast(F4)), "\n" });
        print(.{ "0 01 1 |     1.5 | ", p("f", F4.init(0, 0b01, 1).cast(F4)), "\n" });
        print(.{ "0 10 0 |     2.0 | ", p("f", F4.init(0, 0b10, 0).cast(F4)), "\n" });
        print(.{ "0 10 1 |     3.0 | ", p("f", F4.init(0, 0b10, 1).cast(F4)), "\n" });
    }
    {
        print(.{"-" ** 80 ++ "\n"});
        print(.{"F4 to F8\n"});
        print(.{"given            | expected   | casted\n"});
        print(.{"F4     | decimal | F8         | F8\n"});
        print(.{ "0 00 0 |     0.0 | 0 0000 000 | ", p("f", F4.init(0, 0b00, 0).cast(F8)), "\n" });
        print(.{ "0 00 1 |     0.5 | 0 0110 000 | ", p("f", F4.init(0, 0b00, 1).cast(F8)), "\n" });
        print(.{ "0 01 0 |     1.0 | 0 0111 000 | ", p("f", F4.init(0, 0b01, 0).cast(F8)), "\n" });
        print(.{ "0 01 1 |     1.5 | 0 0111 100 | ", p("f", F4.init(0, 0b01, 1).cast(F8)), "\n" });
        print(.{ "0 10 0 |     2.0 | 0 1000 000 | ", p("f", F4.init(0, 0b10, 0).cast(F8)), "\n" });
        print(.{ "0 10 1 |     3.0 | 0 1000 100 | ", p("f", F4.init(0, 0b10, 1).cast(F8)), "\n" });
    }
    inline for (.{16}) |bits| {
        const CustomFloat = switch (bits) {
            16 => F16,
            32 => F32,
            64 => F64,
            128 => F128,
            else => unreachable,
        };
        const MetaFloat = std.meta.Float(bits);
        const MetaUnsigned = std.meta.Int(.unsigned, bits);
        print(.{"-" ** 80 ++ "\n"});
        print(.{ "F8 to f", bits, "\n" });
        print(.{"given            | expected         | casted\n"});
        print(.{ "F4     | decimal | f", bits, " binary       | F", bits, "                | f", bits, " decimal | ok\n" });
        print(.{"                   seeeeeffffffffff\n"});
        print(.{ "0 00 0 |     0.0 | ", p(comptimePrint("b:0>{d}", .{bits}), @as(MetaUnsigned, @bitCast(@as(MetaFloat, 0.0)))), " | ", p("f", F4.init(0, 0b00, 0).cast(CustomFloat)), " | ", p("d:>11.1", F4.init(0, 0b00, 0).cast(MetaFloat)), " | ", F4.init(0, 0b00, 0).cast(MetaFloat) == 0.0, "\n" });
        print(.{ "0 00 1 |     0.5 | ", p(comptimePrint("b:0>{d}", .{bits}), @as(MetaUnsigned, @bitCast(@as(MetaFloat, 0.5)))), " | ", p("f", F4.init(0, 0b00, 1).cast(CustomFloat)), " | ", p("d:>11.1", F4.init(0, 0b00, 1).cast(MetaFloat)), " | ", F4.init(0, 0b00, 1).cast(MetaFloat) == 0.5, "\n" });
        print(.{ "0 01 0 |     1.0 | ", p(comptimePrint("b:0>{d}", .{bits}), @as(MetaUnsigned, @bitCast(@as(MetaFloat, 1.0)))), " | ", p("f", F4.init(0, 0b01, 0).cast(CustomFloat)), " | ", p("d:>11.1", F4.init(0, 0b01, 0).cast(MetaFloat)), " | ", F4.init(0, 0b01, 0).cast(MetaFloat) == 1.0, "\n" });
        print(.{ "0 01 1 |     1.5 | ", p(comptimePrint("b:0>{d}", .{bits}), @as(MetaUnsigned, @bitCast(@as(MetaFloat, 1.5)))), " | ", p("f", F4.init(0, 0b01, 1).cast(CustomFloat)), " | ", p("d:>11.1", F4.init(0, 0b01, 1).cast(MetaFloat)), " | ", F4.init(0, 0b01, 1).cast(MetaFloat) == 1.5, "\n" });
        print(.{ "0 10 0 |     2.0 | ", p(comptimePrint("b:0>{d}", .{bits}), @as(MetaUnsigned, @bitCast(@as(MetaFloat, 2.0)))), " | ", p("f", F4.init(0, 0b10, 0).cast(CustomFloat)), " | ", p("d:>11.1", F4.init(0, 0b10, 0).cast(MetaFloat)), " | ", F4.init(0, 0b10, 0).cast(MetaFloat) == 2.0, "\n" });
        print(.{ "0 10 1 |     3.0 | ", p(comptimePrint("b:0>{d}", .{bits}), @as(MetaUnsigned, @bitCast(@as(MetaFloat, 3.0)))), " | ", p("f", F4.init(0, 0b10, 1).cast(CustomFloat)), " | ", p("d:>11.1", F4.init(0, 0b10, 1).cast(MetaFloat)), " | ", F4.init(0, 0b10, 1).cast(MetaFloat) == 3.0, "\n" });
        print(.{ "0 11 0 |     inf | ", p(comptimePrint("b:0>{d}", .{bits}), @as(MetaUnsigned, @bitCast(std.math.inf(MetaFloat)))), " | ", p("f", F4.init(0, 0b11, 0).cast(CustomFloat)), " | ", p("d:>11.1", F4.init(0, 0b11, 0).cast(MetaFloat)), " | ", std.math.isPositiveInf(F4.init(0, 0b11, 0).cast(MetaFloat)), "\n" });
        print(.{ "0 11 1 |     nan | ", p(comptimePrint("b:0>{d}", .{bits}), @as(MetaUnsigned, @bitCast(std.math.nan(MetaFloat)))), " | ", p("f", F4.init(0, 0b11, 1).cast(CustomFloat)), " | ", p("d:>11.1", F4.init(0, 0b11, 1).cast(MetaFloat)), " | ", std.math.isNan(F4.init(0, 0b11, 1).cast(MetaFloat)), "\n" });
        print(.{ "1 00 0 |    -0.0 | ", p(comptimePrint("b:0>{d}", .{bits}), @as(MetaUnsigned, @bitCast(@as(MetaFloat, -0.0)))), " | ", p("f", F4.init(1, 0b00, 0).cast(CustomFloat)), " | ", p("d:>11.1", F4.init(1, 0b00, 0).cast(MetaFloat)), " | ", F4.init(1, 0b00, 0).cast(MetaFloat) == -0.0, "\n" });
        print(.{ "1 00 1 |    -0.5 | ", p(comptimePrint("b:0>{d}", .{bits}), @as(MetaUnsigned, @bitCast(@as(MetaFloat, -0.5)))), " | ", p("f", F4.init(1, 0b00, 1).cast(CustomFloat)), " | ", p("d:>11.1", F4.init(1, 0b00, 1).cast(MetaFloat)), " | ", F4.init(1, 0b00, 1).cast(MetaFloat) == -0.5, "\n" });
        print(.{ "1 10 1 |    -3.0 | ", p(comptimePrint("b:0>{d}", .{bits}), @as(MetaUnsigned, @bitCast(@as(MetaFloat, -3.0)))), " | ", p("f", F4.init(1, 0b10, 1).cast(CustomFloat)), " | ", p("d:>11.1", F4.init(1, 0b10, 1).cast(MetaFloat)), " | ", F4.init(1, 0b10, 1).cast(MetaFloat) == -3.0, "\n" });
        print(.{ "1 11 0 |    -inf | ", p(comptimePrint("b:0>{d}", .{bits}), @as(MetaUnsigned, @bitCast(-std.math.inf(MetaFloat)))), " | ", p("f", F4.init(1, 0b11, 0).cast(CustomFloat)), " | ", p("d:>11.1", F4.init(1, 0b11, 0).cast(MetaFloat)), " | ", std.math.isNegativeInf(F4.init(1, 0b11, 0).cast(MetaFloat)), "\n" });
        print(.{ "1 11 1 |     nan | ", p(comptimePrint("b:0>{d}", .{bits}), @as(MetaUnsigned, @bitCast(-std.math.nan(MetaFloat)))), " | ", p("f", F4.init(1, 0b11, 1).cast(CustomFloat)), " | ", p("d:>11.1", F4.init(1, 0b11, 1).cast(MetaFloat)), " | ", std.math.isNan(F4.init(1, 0b11, 1).cast(MetaFloat)), "\n" });
    }
    // https://en.wikipedia.org/wiki/Minifloat#Table_of_values
    // inline for (.{ 16, 32, 64, 128 }) |bits| {
    inline for (.{16}) |bits| {
        const CustomFloat = switch (bits) {
            16 => F16,
            32 => F32,
            64 => F64,
            128 => F128,
            else => unreachable,
        };
        const MetaFloat = std.meta.Float(bits);
        const MetaUnsigned = std.meta.Int(.unsigned, bits);
        print(.{"-" ** 80 ++ "\n"});
        print(.{ "F8 to f", bits, "\n" });
        print(.{"given                    | expected         | casted\n"});
        print(.{ "F8         | decimal     | f", bits, " binary       | F", bits, "                | f", bits, " decimal | ok\n" });
        print(.{"                           seeeeeffffffffff\n"});
        print(.{ "0 0000 000 | 0.0         | ", p(comptimePrint("b:0>{d}", .{bits}), @as(MetaUnsigned, @bitCast(@as(MetaFloat, 0.000000000)))), " | ", p("f", F8.init(0, 0b0000, 0b000).cast(CustomFloat)), " | ", p("d:11.9", F8.init(0, 0b0000, 0b000).cast(MetaFloat)), " | ", F8.init(0, 0b0000, 0b000).cast(MetaFloat) == 0.0, "\n" });
        print(.{ "0 0000 001 | 0.001953125 | ", p(comptimePrint("b:0>{d}", .{bits}), @as(MetaUnsigned, @bitCast(@as(MetaFloat, 0.001953125)))), " | ", p("f", F8.init(0, 0b0000, 0b001).cast(CustomFloat)), " | ", p("d:11.9", F8.init(0, 0b0000, 0b001).cast(MetaFloat)), " | ", F8.init(0, 0b0000, 0b001).cast(MetaFloat) == 0.001953125, "\n" });
        print(.{ "0 0000 010 | 0.00390625  | ", p(comptimePrint("b:0>{d}", .{bits}), @as(MetaUnsigned, @bitCast(@as(MetaFloat, 0.003906250)))), " | ", p("f", F8.init(0, 0b0000, 0b010).cast(CustomFloat)), " | ", p("d:11.9", F8.init(0, 0b0000, 0b010).cast(MetaFloat)), " | ", F8.init(0, 0b0000, 0b010).cast(MetaFloat) == 0.00390625, "\n" });
        print(.{ "0 0000 011 | 0.005859375 | ", p(comptimePrint("b:0>{d}", .{bits}), @as(MetaUnsigned, @bitCast(@as(MetaFloat, 0.005859375)))), " | ", p("f", F8.init(0, 0b0000, 0b011).cast(CustomFloat)), " | ", p("d:11.9", F8.init(0, 0b0000, 0b011).cast(MetaFloat)), " | ", F8.init(0, 0b0000, 0b011).cast(MetaFloat) == 0.005859375, "\n" });
        print(.{ "0 0000 100 | 0.0078125   | ", p(comptimePrint("b:0>{d}", .{bits}), @as(MetaUnsigned, @bitCast(@as(MetaFloat, 0.007812500)))), " | ", p("f", F8.init(0, 0b0000, 0b100).cast(CustomFloat)), " | ", p("d:11.9", F8.init(0, 0b0000, 0b100).cast(MetaFloat)), " | ", F8.init(0, 0b0000, 0b100).cast(MetaFloat) == 0.0078125, "\n" });
        print(.{ "0 0000 111 | 0.013671875 | ", p(comptimePrint("b:0>{d}", .{bits}), @as(MetaUnsigned, @bitCast(@as(MetaFloat, 0.013671875)))), " | ", p("f", F8.init(0, 0b0000, 0b111).cast(CustomFloat)), " | ", p("d:11.9", F8.init(0, 0b0000, 0b111).cast(MetaFloat)), " | ", F8.init(0, 0b0000, 0b111).cast(MetaFloat) == 0.013671875, "\n" });
        print(.{ "0 0001 000 | 0.015625    | ", p(comptimePrint("b:0>{d}", .{bits}), @as(MetaUnsigned, @bitCast(@as(MetaFloat, 0.015625000)))), " | ", p("f", F8.init(0, 0b0001, 0b000).cast(CustomFloat)), " | ", p("d:11.9", F8.init(0, 0b0001, 0b000).cast(MetaFloat)), " | ", F8.init(0, 0b0001, 0b000).cast(MetaFloat) == 0.015625, "\n" });
        print(.{ "0 0001 001 | 0.017578125 | ", p(comptimePrint("b:0>{d}", .{bits}), @as(MetaUnsigned, @bitCast(@as(MetaFloat, 0.017578125)))), " | ", p("f", F8.init(0, 0b0001, 0b001).cast(CustomFloat)), " | ", p("d:11.9", F8.init(0, 0b0001, 0b001).cast(MetaFloat)), " | ", F8.init(0, 0b0001, 0b001).cast(MetaFloat) == 0.017578125, "\n" });
        print(.{ "0 0001 010 | 0.01953125  | ", p(comptimePrint("b:0>{d}", .{bits}), @as(MetaUnsigned, @bitCast(@as(MetaFloat, 0.019531250)))), " | ", p("f", F8.init(0, 0b0001, 0b010).cast(CustomFloat)), " | ", p("d:11.9", F8.init(0, 0b0001, 0b010).cast(MetaFloat)), " | ", F8.init(0, 0b0001, 0b010).cast(MetaFloat) == 0.01953125, "\n" });
        print(.{ "0 0001 100 | 0.0234375   | ", p(comptimePrint("b:0>{d}", .{bits}), @as(MetaUnsigned, @bitCast(@as(MetaFloat, 0.023437500)))), " | ", p("f", F8.init(0, 0b0001, 0b100).cast(CustomFloat)), " | ", p("d:11.9", F8.init(0, 0b0001, 0b100).cast(MetaFloat)), " | ", F8.init(0, 0b0001, 0b100).cast(MetaFloat) == 0.0234375, "\n" });
        print(.{ "0 0001 111 | 0.029296875 | ", p(comptimePrint("b:0>{d}", .{bits}), @as(MetaUnsigned, @bitCast(@as(MetaFloat, 0.029296875)))), " | ", p("f", F8.init(0, 0b0001, 0b111).cast(CustomFloat)), " | ", p("d:11.9", F8.init(0, 0b0001, 0b111).cast(MetaFloat)), " | ", F8.init(0, 0b0001, 0b111).cast(MetaFloat) == 0.029296875, "\n" });
        print(.{ "0 0010 000 | 0.03125     | ", p(comptimePrint("b:0>{d}", .{bits}), @as(MetaUnsigned, @bitCast(@as(MetaFloat, 0.031250000)))), " | ", p("f", F8.init(0, 0b0010, 0b000).cast(CustomFloat)), " | ", p("d:11.9", F8.init(0, 0b0010, 0b000).cast(MetaFloat)), " | ", F8.init(0, 0b0010, 0b000).cast(MetaFloat) == 0.03125, "\n" });
        print(.{ "0 0100 000 | 0.125       | ", p(comptimePrint("b:0>{d}", .{bits}), @as(MetaUnsigned, @bitCast(@as(MetaFloat, 0.125000000)))), " | ", p("f", F8.init(0, 0b0100, 0b000).cast(CustomFloat)), " | ", p("d:11.9", F8.init(0, 0b0100, 0b000).cast(MetaFloat)), " | ", F8.init(0, 0b0100, 0b000).cast(MetaFloat) == 0.125, "\n" });
        print(.{ "0 1000 000 | 2.0         | ", p(comptimePrint("b:0>{d}", .{bits}), @as(MetaUnsigned, @bitCast(@as(MetaFloat, 2.000000000)))), " | ", p("f", F8.init(0, 0b1000, 0b000).cast(CustomFloat)), " | ", p("d:11.9", F8.init(0, 0b1000, 0b000).cast(MetaFloat)), " | ", F8.init(0, 0b1000, 0b000).cast(MetaFloat) == 2.0, "\n" });
        print(.{ "0 1111 000 | inf         | ", p(comptimePrint("b:0>{d}", .{bits}), @as(MetaUnsigned, @bitCast(std.math.inf(MetaFloat)))), " | ", p("f", F8.init(0, 0b1111, 0b000).cast(CustomFloat)), " | ", p("d:11.9", F8.init(0, 0b1111, 0b000).cast(MetaFloat)), " | ", std.math.isPositiveInf(F8.init(0, 0b1111, 0b000).cast(MetaFloat)), "\n" });
    }
    // inline for (.{ 16, 32, 64, 128 }) |bits| {
    {
        print(.{"-" ** 80 ++ "\n"});
        print(.{"F32 to f16\n"});
        print(.{"given                            | expected         | casted\n"});
        print(.{"F32                              | f16 binary       | F16                | ok\n"});
        print(.{"seeeeeeeefffffffffffffffffffffff   seeeeeffffffffff   s eeeee ffffffffff\n"});
        //                                       seeeeeeeefffffffffffffffffffffff
        print(.{ p("b:0>32", @as(u32, 0b00000000000000000000000000000000)), " | ", p("b:0>16", @as(u16, @bitCast(@as(f16, @floatCast(@as(f32, @bitCast(@as(u32, 0b00000000000000000000000000000000)))))))), " | ", p("f", F32.init(0, 0b00000000, 0b00000000000000000000000).cast(F16)), "\n" });
        print(.{ p("b:0>32", @as(u32, 0b00000000000000000000000000000001)), " | ", p("b:0>16", @as(u16, @bitCast(@as(f16, @floatCast(@as(f32, @bitCast(@as(u32, 0b00000000000000000000000000000001)))))))), " | ", p("f", F32.init(0, 0b00000000, 0b00000000000000000000001).cast(F16)), "\n" });
        print(.{ p("b:0>32", @as(u32, 0b00000000000010000000000000000000)), " | ", p("b:0>16", @as(u16, @bitCast(@as(f16, @floatCast(@as(f32, @bitCast(@as(u32, 0b00000000000010000000000000000000)))))))), " | ", p("f", F32.init(0, 0b00000000, 0b00010000000000000000000).cast(F16)), "\n" });
        print(.{ p("b:0>32", @as(u32, 0b00000000000011100000000000000000)), " | ", p("b:0>16", @as(u16, @bitCast(@as(f16, @floatCast(@as(f32, @bitCast(@as(u32, 0b00000000000011100000000000000000)))))))), " | ", p("f", F32.init(0, 0b00000000, 0b00011100000000000000000).cast(F16)), "\n" });
        print(.{ p("b:0>32", @as(u32, 0b00000000000100000000000000000000)), " | ", p("b:0>16", @as(u16, @bitCast(@as(f16, @floatCast(@as(f32, @bitCast(@as(u32, 0b00000000000100000000000000000000)))))))), " | ", p("f", F32.init(0, 0b00000000, 0b00100000000000000000000).cast(F16)), "\n" });
        print(.{ p("b:0>32", @as(u32, 0b00000000001000000000000000000000)), " | ", p("b:0>16", @as(u16, @bitCast(@as(f16, @floatCast(@as(f32, @bitCast(@as(u32, 0b00000000001000000000000000000000)))))))), " | ", p("f", F32.init(0, 0b00000000, 0b01000000000000000000000).cast(F16)), "\n" });
        print(.{ p("b:0>32", @as(u32, 0b00000000010000000000000000000000)), " | ", p("b:0>16", @as(u16, @bitCast(@as(f16, @floatCast(@as(f32, @bitCast(@as(u32, 0b00000000010000000000000000000000)))))))), " | ", p("f", F32.init(0, 0b00000000, 0b10000000000000000000000).cast(F16)), "\n" });
        print(.{ p("b:0>32", @as(u32, 0b00000000100000000000000000000000)), " | ", p("b:0>16", @as(u16, @bitCast(@as(f16, @floatCast(@as(f32, @bitCast(@as(u32, 0b00000000100000000000000000000000)))))))), " | ", p("f", F32.init(0, 0b00000001, 0b00000000000000000000000).cast(F16)), "\n" });
        print(.{ p("b:0>32", @as(u32, 0b00000001000000000000000000000000)), " | ", p("b:0>16", @as(u16, @bitCast(@as(f16, @floatCast(@as(f32, @bitCast(@as(u32, 0b00000001000000000000000000000000)))))))), " | ", p("f", F32.init(0, 0b00000010, 0b00000000000000000000000).cast(F16)), "\n" });
        print(.{ p("b:0>32", @as(u32, 0b00000010000000000000000000000000)), " | ", p("b:0>16", @as(u16, @bitCast(@as(f16, @floatCast(@as(f32, @bitCast(@as(u32, 0b00000010000000000000000000000000)))))))), " | ", p("f", F32.init(0, 0b00000100, 0b00000000000000000000000).cast(F16)), "\n" });
        print(.{ p("b:0>32", @as(u32, 0b00000100000000000000000000000000)), " | ", p("b:0>16", @as(u16, @bitCast(@as(f16, @floatCast(@as(f32, @bitCast(@as(u32, 0b00000100000000000000000000000000)))))))), " | ", p("f", F32.init(0, 0b00001000, 0b00000000000000000000000).cast(F16)), "\n" });
        print(.{ p("b:0>32", @as(u32, 0b00001000000000000000000000000000)), " | ", p("b:0>16", @as(u16, @bitCast(@as(f16, @floatCast(@as(f32, @bitCast(@as(u32, 0b00001000000000000000000000000000)))))))), " | ", p("f", F32.init(0, 0b00010000, 0b00000000000000000000000).cast(F16)), "\n" });
        print(.{ p("b:0>32", @as(u32, 0b00111000000000000000000000000000)), " | ", p("b:0>16", @as(u16, @bitCast(@as(f16, @floatCast(@as(f32, @bitCast(@as(u32, 0b00111000000000000000000000000000)))))))), " | ", p("f", F32.init(0, 0b01110000, 0b00000000000000000000000).cast(F16)), "\n" });
    }
 }

 test "F4 to F4" {
    @setEvalBranchQuota(100000);
    // Values should produce the same value, if subnormal values are enabled
    print(.{"F4 (or 1.2.1-float, or 4-bit float) to F4\n"});
    inline for (0..1) |sign| {
        inline for (0..1 << 2) |exponent| {
            inline for (0..1 << 1) |mantissa| {
                try std.testing.expectFmt(comptimePrint("{b} {b:0>2} {b}", .{ sign, exponent, mantissa }), "{f}", .{F4.init(sign, exponent, mantissa).cast(F4)});
            }
        }
    }
 }

 test "F8 to F8" {
    @setEvalBranchQuota(100000);
    // Values should produce the same value, if subnormal values are enabled
    print(.{"F8 (or 1.4.3-float, or 8-bit float, or Minifloat) to F8\n"});
    inline for (0..1) |sign| {
        inline for (0..1 << 4) |exponent| {
            inline for (0..1 << 3) |mantissa| {
                try std.testing.expectFmt(std.fmt.comptimePrint("{b} {b:0>4} {b:0>3}", .{ sign, exponent, mantissa }), "{f}", .{F8.init(sign, exponent, mantissa).cast(F8)});
            }
        }
    }
 }

 test "F4 to F8" {
    @setEvalBranchQuota(100000);
    // 1.2.1-float values: https://en.wikipedia.org/wiki/Minifloat#4_bits_and_fewer, accessed on 2024-09-07
    // 1.4.3-float values: https://en.wikipedia.org/wiki/Minifloat#Table_of_values, accessed on 2024-09-07
    inline for (0..1) |sign| {
        try std.testing.expectFmt(comptimePrint("{d}", .{sign}) ++ " 0000 000", "{f}", .{F4.init(sign, 0b00, 0).cast(F8)}); // 0.0
        try std.testing.expectFmt(comptimePrint("{d}", .{sign}) ++ " 0110 000", "{f}", .{F4.init(sign, 0b00, 1).cast(F8)}); // 0.5
        try std.testing.expectFmt(comptimePrint("{d}", .{sign}) ++ " 0111 000", "{f}", .{F4.init(sign, 0b01, 0).cast(F8)}); // 1.0
        try std.testing.expectFmt(comptimePrint("{d}", .{sign}) ++ " 0111 100", "{f}", .{F4.init(sign, 0b01, 1).cast(F8)}); // 1.5
        try std.testing.expectFmt(comptimePrint("{d}", .{sign}) ++ " 1000 000", "{f}", .{F4.init(sign, 0b10, 0).cast(F8)}); // 2.0
        try std.testing.expectFmt(comptimePrint("{d}", .{sign}) ++ " 1000 100", "{f}", .{F4.init(sign, 0b10, 1).cast(F8)}); // 3.0
        try std.testing.expectFmt(comptimePrint("{d}", .{sign}) ++ " 1111 000", "{f}", .{F4.init(sign, 0b11, 0).cast(F8)}); // inf
        try std.testing.expectFmt(comptimePrint("{d}", .{sign}) ++ " 1111 100", "{f}", .{F4.init(sign, 0b11, 1).cast(F8)}); // nan
    }
 }

 test "F8 to F4" {
    @setEvalBranchQuota(100000);
    // 1.2.1-float values: https://en.wikipedia.org/wiki/Minifloat#4_bits_and_fewer, accessed on 2024-09-07
    // 1.4.3-float values: https://en.wikipedia.org/wiki/Minifloat#Table_of_values, accessed on 2024-09-07
    inline for (0..1) |sign| {
        try std.testing.expectFmt(comptimePrint("{d}", .{sign}) ++ " 00 0", "{f}", .{F8.init(sign, 0b0000, 0b000).cast(F4)}); // 0.0
        try std.testing.expectFmt(comptimePrint("{d}", .{sign}) ++ " 00 1", "{f}", .{F8.init(sign, 0b0110, 0b000).cast(F4)}); // 0.5 // FIXME: subnormal handling
        try std.testing.expectFmt(comptimePrint("{d}", .{sign}) ++ " 01 0", "{f}", .{F8.init(sign, 0b0111, 0b000).cast(F4)}); // 1.0
        try std.testing.expectFmt(comptimePrint("{d}", .{sign}) ++ " 01 1", "{f}", .{F8.init(sign, 0b0111, 0b100).cast(F4)}); // 1.5
        try std.testing.expectFmt(comptimePrint("{d}", .{sign}) ++ " 10 0", "{f}", .{F8.init(sign, 0b1000, 0b000).cast(F4)}); // 2.0
        try std.testing.expectFmt(comptimePrint("{d}", .{sign}) ++ " 10 1", "{f}", .{F8.init(sign, 0b1000, 0b100).cast(F4)}); // 3.0
        try std.testing.expectFmt(comptimePrint("{d}", .{sign}) ++ " 11 0", "{f}", .{F8.init(sign, 0b1111, 0b000).cast(F4)}); // inf
        try std.testing.expectFmt(comptimePrint("{d}", .{sign}) ++ " 11 1", "{f}", .{F8.init(sign, 0b1111, 0b100).cast(F4)}); // nan
    }
 }

 // Just a helper function to make the test code more readable
 inline fn expectEqualSwapped(expected: anytype, actual: anytype) !void {
    return std.testing.expectEqual(actual, expected);
 }

 test "F4 to meta float" {
    @setEvalBranchQuota(100000);
    // 1.2.1-float values: https://en.wikipedia.org/wiki/Minifloat#4_bits_and_fewer, accessed on 2024-09-07
    inline for (.{ 16, 32, 64, 128 }) |bits| {
        print(.{ "F4 (or 1.2.1-float, or 4-bit float) to f", bits, " (meta float)\n" });
        const TestFloat = std.meta.Float(bits);
        inline for (0..1) |sign| {
            try expectEqualSwapped(F4.init(sign, 0b00, 0).cast(TestFloat), (if (sign > 0) -0.0 else 0.0));
            try expectEqualSwapped(F4.init(sign, 0b00, 1).cast(TestFloat), (if (sign > 0) -0.5 else 0.5));
            try expectEqualSwapped(F4.init(sign, 0b01, 0).cast(TestFloat), (if (sign > 0) -1.0 else 1.0));
            try expectEqualSwapped(F4.init(sign, 0b01, 1).cast(TestFloat), (if (sign > 0) -1.5 else 1.5));
            try expectEqualSwapped(F4.init(sign, 0b10, 0).cast(TestFloat), (if (sign > 0) -2.0 else 2.0));
            try expectEqualSwapped(F4.init(sign, 0b10, 1).cast(TestFloat), (if (sign > 0) -3.0 else 3.0));
            try expectEqualSwapped(F4.init(sign, 0b11, 0).cast(TestFloat), (if (sign > 0) -std.math.inf(TestFloat) else std.math.inf(TestFloat)));
            try std.testing.expect(std.math.isNan(F4.init(sign, 0b11, 1).cast(TestFloat)));
        }
    }
 }

 test "F8 to meta float" {
    @setEvalBranchQuota(100000);
    // 1.4.3-float values: https://en.wikipedia.org/wiki/Minifloat#Table_of_values, accessed on 2024-09-07
    inline for (.{ 16, 32, 64, 128 }) |bits| {
        print(.{ "F8 (or 1.4.3-float, or 8-bit float, or Minifloat) to f", bits, " (meta float)\n" });
        const TestFloat = std.meta.Float(bits);
        try expectEqualSwapped(F8.init(0, 0b0000, 0b000).cast(TestFloat), 0.0);
        try expectEqualSwapped(F8.init(0, 0b0000, 0b001).cast(TestFloat), 0.001953125);
        try expectEqualSwapped(F8.init(0, 0b0000, 0b010).cast(TestFloat), 0.00390625);
        try expectEqualSwapped(F8.init(0, 0b0000, 0b100).cast(TestFloat), 0.0078125);
        try expectEqualSwapped(F8.init(0, 0b0000, 0b111).cast(TestFloat), 0.013671875);
        try expectEqualSwapped(F8.init(0, 0b0001, 0b000).cast(TestFloat), 0.015625);
        try expectEqualSwapped(F8.init(0, 0b0001, 0b001).cast(TestFloat), 0.017578125);
        try expectEqualSwapped(F8.init(0, 0b0001, 0b010).cast(TestFloat), 0.01953125);
        try expectEqualSwapped(F8.init(0, 0b0001, 0b100).cast(TestFloat), 0.0234375);
        try expectEqualSwapped(F8.init(0, 0b0001, 0b111).cast(TestFloat), 0.029296875);
        try expectEqualSwapped(F8.init(0, 0b0010, 0b000).cast(TestFloat), 0.03125);
        try expectEqualSwapped(F8.init(0, 0b0010, 0b001).cast(TestFloat), 0.03515625);
        try expectEqualSwapped(F8.init(0, 0b0010, 0b010).cast(TestFloat), 0.0390625);
        try expectEqualSwapped(F8.init(0, 0b0010, 0b100).cast(TestFloat), 0.046875);
        try expectEqualSwapped(F8.init(0, 0b0010, 0b111).cast(TestFloat), 0.05859375);
        try expectEqualSwapped(F8.init(0, 0b0100, 0b000).cast(TestFloat), 0.125);
        try expectEqualSwapped(F8.init(0, 0b0100, 0b001).cast(TestFloat), 0.140625);
        try expectEqualSwapped(F8.init(0, 0b0100, 0b010).cast(TestFloat), 0.15625);
        try expectEqualSwapped(F8.init(0, 0b0100, 0b100).cast(TestFloat), 0.1875);
        try expectEqualSwapped(F8.init(0, 0b0100, 0b111).cast(TestFloat), 0.234375);
        try expectEqualSwapped(F8.init(0, 0b0111, 0b000).cast(TestFloat), 1);
        try expectEqualSwapped(F8.init(0, 0b0111, 0b001).cast(TestFloat), 1.125);
        try expectEqualSwapped(F8.init(0, 0b0111, 0b010).cast(TestFloat), 1.25);
        try expectEqualSwapped(F8.init(0, 0b0111, 0b100).cast(TestFloat), 1.5);
        try expectEqualSwapped(F8.init(0, 0b0111, 0b111).cast(TestFloat), 1.875);
        try expectEqualSwapped(F8.init(0, 0b1000, 0b000).cast(TestFloat), 2);
        try expectEqualSwapped(F8.init(0, 0b1000, 0b001).cast(TestFloat), 2.25);
        try expectEqualSwapped(F8.init(0, 0b1000, 0b010).cast(TestFloat), 2.5);
        try expectEqualSwapped(F8.init(0, 0b1000, 0b100).cast(TestFloat), 3);
        try expectEqualSwapped(F8.init(0, 0b1000, 0b111).cast(TestFloat), 3.75);
        try expectEqualSwapped(F8.init(0, 0b1110, 0b000).cast(TestFloat), 128);
        try expectEqualSwapped(F8.init(0, 0b1110, 0b001).cast(TestFloat), 144);
        try expectEqualSwapped(F8.init(0, 0b1110, 0b010).cast(TestFloat), 160);
        try expectEqualSwapped(F8.init(0, 0b1110, 0b100).cast(TestFloat), 192);
        try expectEqualSwapped(F8.init(0, 0b1110, 0b111).cast(TestFloat), 240);
        try std.testing.expect(std.math.isPositiveInf(F8.init(0, 0b1111, 0b000).cast(TestFloat)));
        try std.testing.expect(std.math.isNan(F8.init(0, 0b1111, 0b001).cast(TestFloat)));
        try std.testing.expect(std.math.isNan(F8.init(0, 0b1111, 0b010).cast(TestFloat)));
        try std.testing.expect(std.math.isNan(F8.init(0, 0b1111, 0b100).cast(TestFloat)));
        try std.testing.expect(std.math.isNan(F8.init(0, 0b1111, 0b111).cast(TestFloat)));
    }
 }

 test "F4 Addition" {
    const one = F4.init(0, 0b01, 0);      // 1.0
    const one_five = F4.init(0, 0b01, 1); // 1.5
    const two = F4.init(0, 0b10, 0);      // 2.0
    const three = F4.init(0, 0b10, 1);    // 3.0
    const zero = F4.init(0, 0, 0);

    // 1.0 + 1.0 = 2.0
    try std.testing.expectEqual(two, one.add(one));
    // 1.0 + 0.5 = 1.5
    const half = F4.init(0, 0b00, 1); // 0.5 (subnormal in F4)
    try std.testing.expectEqual(one_five, one.add(half));
    // 1.5 + 1.5 = 3.0
    try std.testing.expectEqual(three, one_five.add(one_five));
    // x + 0 = x
    try std.testing.expectEqual(one, one.add(zero));
    // 2.0 + (-1.0) = 1.0
    const neg_one = F4.init(1, 0b01, 0);
    try std.testing.expectEqual(one, two.add(neg_one));
 }

 test "F4 Multiplication" {
    const one = F4.init(0, 0b01, 0);      // 1.0
    const two = F4.init(0, 0b10, 0);      // 2.0
    const three = F4.init(0, 0b10, 1);    // 3.0
    const one_five = F4.init(0, 0b01, 1); // 1.5

    // 1.0 * 2.0 = 2.0
    try std.testing.expectEqual(two, one.mul(two));
    // 1.5 * 2.0 = 3.0
    try std.testing.expectEqual(three, one_five.mul(two));
    // 2.0 * 1.5 = 3.0 (Commutative)
    try std.testing.expectEqual(three, two.mul(one_five));
    // -1.0 * 2.0 = -2.0
    const neg_one = F4.init(1, 0b01, 0);
    const neg_two = F4.init(1, 0b10, 0);
    try std.testing.expectEqual(neg_two, neg_one.mul(two));
    // Truncation: 1.5 * 1.5 = 2.25 -> truncated to 2.0 in F4
    try std.testing.expectEqual(two, one_five.mul(one_five));
 }

 test "Special Cases: Infinity and NaN" {
    const inf = F4.init(0, 0b11, 0);
    const neg_inf = F4.init(1, 0b11, 0);
    const nan = F4.init(0, 0b11, 1);
    const one = F4.init(0, 0b01, 0);
    const zero = F4.init(0, 0, 0);

    // Inf + x = Inf
    try std.testing.expectEqual(inf, inf.add(one));
    // Inf - Inf = NaN
    const res = inf.add(neg_inf);
    try std.testing.expect(res.biased_exponent == 0b11 and res.fraction != 0);

    // NaN propagation
    try std.testing.expect((one.add(nan)).biased_exponent == 0b11);
    try std.testing.expect((one.mul(nan)).biased_exponent == 0b11);

    // 0 * Inf = NaN
    const zero_inf = zero.mul(inf);
    try std.testing.expect(zero_inf.biased_exponent == 0b11 and zero_inf.fraction != 0);

    // Inf * Inf = Inf
    try std.testing.expectEqual(inf, inf.mul(inf));
    // Inf * -1 = -Inf
    const neg_one = F4.init(1, 0b01, 0);
    try std.testing.expectEqual(neg_inf, inf.mul(neg_one));
 }

 test "F4 Overflow and Underflow" {
    const three = F4.init(0, 0b10, 1); // Max finite value in F4 is 3.0
    const one = F4.init(0, 0b01, 0);
    const inf = F4.init(0, 0b11, 0);

    // 3.0 + 1.0 = 4.0 (Overflows F4 range)
    try std.testing.expectEqual(inf, three.add(one));

    // 3.0 * 2.0 = 6.0 (Overflows F4 range)
    const two = F4.init(0, 0b10, 0);
    try std.testing.expectEqual(inf, three.mul(two));

    // Underflow to zero (if subnormals can't represent it)
    // Smallest subnormal in F4 is 0.5
    const half = F4.init(0, 0b00, 1);
    // 0.5 * 0.5 = 0.25 (Underflow)
    try std.testing.expectEqual(F4.init(0, 0, 0), half.mul(half));
 }

 test "F8 Arithmetic (Minifloat)" {
    print(.{"Arithm ok"});
    // F8: 1 sign, 4 exponent (bias 7), 3 fraction
    const f8_one = F8.init(0, 7, 0); // 1.0
    const f8_two = F8.init(0, 8, 0); // 2.0

    // 1.0 + 2.0 = 3.0 (Exponent 8, mantissa 0.5 -> 1.5 * 2^(8-7) = 3)
    const expected_three = F8.init(0, 8, 0b100);
    try std.testing.expectEqual(expected_three, f8_one.add(f8_two));

    // 2.0 * 2.0 = 4.0 (Exponent 9, mantissa 0 -> 1.0 * 2^(9-7) = 4)
    const expected_four = F8.init(0, 9, 0);
    try std.testing.expectEqual(expected_four, f8_two.mul(f8_two));
 }
No results found