Created
October 25, 2025 06:45
-
-
Save jandk/982dcbefcb71f3f6c485cb5a2225fb9d to your computer and use it in GitHub Desktop.
Manual zlib decompression in Java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import java.io.Closeable; | |
| import java.io.EOFException; | |
| import java.io.IOException; | |
| import java.io.InputStream; | |
| import java.util.Objects; | |
| public final class BitInputStream implements Closeable { | |
| private final InputStream in; | |
| private long bitBuffer; | |
| private int bitCount; | |
| public BitInputStream(InputStream in) { | |
| this.in = Objects.requireNonNull(in); | |
| } | |
| public void alignWithByteBoundary() { | |
| consumeBits(bitCount & 7); | |
| } | |
| public int readBit() throws IOException { | |
| return readBits(1); | |
| } | |
| public boolean readBoolean() throws IOException { | |
| return readBits(1) != 0; | |
| } | |
| public int readBits(int count) throws IOException { | |
| assert count >= 0 && count <= 32; | |
| return (int) readBitsLong(count); | |
| } | |
| private long readBitsLong(int count) throws IOException { | |
| assert count >= 0 && count <= 57; | |
| refill(count); | |
| long result = peekBits(count); | |
| consumeBits(count); | |
| return result; | |
| } | |
| private void refill(int count) throws IOException { | |
| assert count >= 0 && count <= 57; | |
| while (bitCount < count) { | |
| long read = in.read(); | |
| if (read < 0) { | |
| throw new EOFException(); | |
| } | |
| bitBuffer |= read << bitCount; | |
| bitCount += 8; | |
| } | |
| } | |
| private long peekBits(int count) { | |
| assert bitCount >= count; | |
| return bitBuffer & (1L << count) - 1; | |
| } | |
| private void consumeBits(int count) { | |
| assert bitCount >= count; | |
| bitBuffer >>>= count; | |
| bitCount -= count; | |
| } | |
| @Override | |
| public void close() throws IOException { | |
| in.close(); | |
| } | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import java.io.IOException; | |
| enum BlockType { | |
| NONE(0), | |
| FIXED(1), | |
| DYNAMIC(2), | |
| ; | |
| private final int value; | |
| BlockType(int value) { | |
| this.value = value; | |
| } | |
| public int getValue() { | |
| return value; | |
| } | |
| public static BlockType from(BitInputStream bits) throws IOException { | |
| int value = bits.readBits(2); | |
| return switch (value) { | |
| case 0 -> NONE; | |
| case 1 -> FIXED; | |
| case 2 -> DYNAMIC; | |
| default -> throw new IllegalArgumentException("Unknown block type: " + value); | |
| }; | |
| } | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import java.io.IOException; | |
| import java.rmi.RemoteException; | |
| import java.util.Objects; | |
| import static be.twofold.common.Inflate.MAXBITS; | |
| public final class HuffmanTree { | |
| private final short[] counts; | |
| private final short[] symbols; | |
| private HuffmanTree(short[] counts, short[] symbols) { | |
| this.counts = counts; | |
| this.symbols = symbols; | |
| } | |
| public static HuffmanTree build(int numCounts, int numSymbols, int[] lengths) { | |
| return build(numCounts, numSymbols, lengths, 0, lengths.length); | |
| } | |
| public static HuffmanTree build(int numCounts, int numSymbols, int[] lengths, int offset, int length) { | |
| short[] counts = new short[numCounts]; | |
| short[] symbols = new short[numSymbols]; | |
| Objects.checkFromIndexSize(offset, length, lengths.length); | |
| for (int i = offset; i < offset + length; i++) { | |
| counts[lengths[i]]++; | |
| } | |
| if (counts[0] == length) { | |
| throw new RuntimeException("Huffman tree is empty"); | |
| } | |
| int left = 1; | |
| for (int len = 1; len < numCounts; len++) { | |
| left <<= 1; | |
| left -= counts[len]; | |
| if (left < 0) { | |
| throw new RuntimeException("Over subscribed"); | |
| } | |
| } | |
| short[] offsets = new short[numCounts + 1]; | |
| offsets[1] = 0; | |
| for (int len = 1; len < numCounts; len++) { | |
| offsets[len + 1] = (short) (offsets[len] + counts[len]); | |
| } | |
| for (int symbol = 0; symbol < lengths.length; symbol++) { | |
| if (lengths[symbol] != 0) { | |
| symbols[offsets[lengths[symbol]]++] = (short) symbol; | |
| } | |
| } | |
| return new HuffmanTree(counts, symbols); | |
| } | |
| public final int[] lengths = new int[MAXBITS + 1]; | |
| public int decode(BitInputStream in) throws IOException { | |
| int code = 0; | |
| int first = 0; | |
| int index = 0; | |
| for (int len = 1; len <= MAXBITS; len++) { | |
| code |= in.readBit(); | |
| int count = counts[len]; | |
| if (code - count < first) { | |
| lengths[len]++; | |
| return symbols[index + (code - first)]; | |
| } | |
| index += count; | |
| first += count; | |
| first <<= 1; | |
| code <<= 1; | |
| } | |
| throw new RemoteException("Ran out of codes"); | |
| } | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import java.io.IOException; | |
| import java.util.Arrays; | |
| public final class Inflate { | |
| static final int MAXBITS = 15; /* maximum bits in a code */ | |
| private static final int MAXLCODES = 286; /* maximum number of literal/length codes */ | |
| private static final int MAXDCODES = 30; /* maximum number of distance codes */ | |
| private static final int MAXCODES = (MAXLCODES + MAXDCODES); /* maximum codes lengths to read */ | |
| private static final int FIXLCODES = 288; /* number of fixed literal/length codes */ | |
| private static final int[] LEN_ORDER = {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; | |
| private static final short[] LEN_LENS = {3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258}; | |
| private static final short[] LEN_BITS = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0}; | |
| private static final short[] DIST_LENS = {1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577}; | |
| private static final short[] DIST_BITS = {0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13}; | |
| private static HuffmanTree readLengths(BitInputStream bits) throws IOException { | |
| var numLengths = bits.readBits(4) + 4; | |
| var lengths = new int[LEN_ORDER.length]; | |
| for (var i = 0; i < numLengths; i++) { | |
| lengths[LEN_ORDER[i]] = bits.readBits(3); | |
| } | |
| return HuffmanTree.build(7, 19, lengths); | |
| } | |
| public void decodeBlock(BitInputStream bits, byte[] out) throws IOException { | |
| var isFinal = bits.readBoolean(); | |
| var blockType = BlockType.from(bits); | |
| System.out.println("isFinal = " + isFinal); | |
| System.out.println("blockType = " + blockType); | |
| if (blockType != BlockType.DYNAMIC) { | |
| throw new IOException("Only dynamic huff supported"); | |
| } | |
| dynamicHuffman(bits, out); | |
| } | |
| private void dynamicHuffman(BitInputStream in, byte[] out) throws IOException { | |
| var nLen = in.readBits(5) + 257; | |
| var nDist = in.readBits(5) + 1; | |
| if (nLen > MAXLCODES || nDist > MAXDCODES) { | |
| throw new IOException("Bad counts"); | |
| } | |
| // We heard you like huffman, so we put huffman in your huffman | |
| var huff = readLengths(in); | |
| var lengths = new int[MAXCODES]; | |
| for (var index = 0; index < nLen + nDist; ) { | |
| int symbol = huff.decode(in); | |
| if (symbol < 0) { | |
| throw new IOException("Bad symbol"); | |
| } | |
| if (symbol < 16) { | |
| lengths[index++] = symbol; | |
| } else { | |
| var len = 0; | |
| if (symbol == 16) { | |
| if (index == 0) { | |
| throw new IOException("No last length"); | |
| } | |
| len = lengths[index - 1]; | |
| symbol = 3 + in.readBits(2); | |
| } else if (symbol == 17) { | |
| symbol = 3 + in.readBits(3); | |
| } else { | |
| symbol = 11 + in.readBits(7); | |
| } | |
| if (index + symbol > nLen + nDist) { | |
| throw new IOException("Too many lengths!"); | |
| } | |
| while (symbol-- != 0) { | |
| lengths[index++] = len; | |
| } | |
| } | |
| } | |
| if (lengths[256] == 0) { | |
| throw new IOException("No end of block length"); | |
| } | |
| var lenHuff = HuffmanTree.build(MAXBITS + 1, MAXLCODES, Arrays.copyOfRange(lengths, 0, nLen)); | |
| var distHuff = HuffmanTree.build(MAXBITS + 1, MAXDCODES, Arrays.copyOfRange(lengths, nLen, nLen + nDist)); | |
| var outOff = 0; | |
| while (true) { | |
| var symbol = lenHuff.decode(in); | |
| if (symbol < 0) { | |
| throw new IOException("Bad symbol"); | |
| } else if (symbol < 256) { | |
| out[outOff++] = (byte) symbol; | |
| } else if (symbol > 256) { | |
| symbol -= 257; | |
| if (symbol >= 29) { | |
| throw new IOException("Invalid fixed code"); | |
| } | |
| var len = LEN_LENS[symbol] + in.readBits(LEN_BITS[symbol]); | |
| symbol = distHuff.decode(in); | |
| if (symbol < 0) { | |
| throw new IOException("Bad symbol"); | |
| } | |
| var dist = DIST_LENS[symbol] + in.readBits(DIST_BITS[symbol]); | |
| copyReference(out, outOff, out.length, dist, len); | |
| outOff += len; | |
| } else { | |
| break; | |
| } | |
| } | |
| System.out.println(Arrays.toString(huff.lengths)); | |
| System.out.println(Arrays.toString(lenHuff.lengths)); | |
| System.out.println(Arrays.toString(distHuff.lengths)); | |
| System.out.println(new String(out)); | |
| } | |
| void copyReference(byte[] a, int aOff, int aLen, int offset, int length) throws IOException { | |
| if (offset <= 0 || aOff - offset < 0 || length > aLen) { | |
| throw new IOException("Invalid match"); | |
| } | |
| if (offset == 1) { | |
| var b = a[aOff - 1]; | |
| for (var i = 0; i < length; i++) { | |
| a[aOff++] = b; | |
| } | |
| } else if (offset >= length) { | |
| System.arraycopy(a, aOff - offset, a, aOff, length); | |
| } else { | |
| for (int i = 0, pos = aOff - offset; i < length; i++) { | |
| a[aOff++] = a[pos + i]; | |
| } | |
| } | |
| } | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import java.io.ByteArrayInputStream; | |
| import java.io.IOException; | |
| import java.util.HexFormat; | |
| public final class Main { | |
| public static void main(String[] args) throws IOException { | |
| var hex = """ | |
| 5556ddaee3360fbc3f4fc10730f20ec5b6053ee0eb628ba2bd57642621204b3e | |
| fa09f6f1774849b6cf551247e2cfcc70e8ffa7cc1bc95eda466b0a2953914a6e | |
| e3ba904fb1b0af5c5b26b7ca2ec54b7c1207a937fa5f248995f38a8baef98a9b | |
| 5b0a5caaf08dfe6e523e1bd35dee1cf504e2b54251eeaf85de1ca8859ac50b17 | |
| e2281be5578a1e073e71ef46bfa7c89e628af4105cc1733d74a31fd971e15811 | |
| a704e29cca42c1a101ae16540ba386c20b6f3b673c8b1c5d9542ab38bdff7285 | |
| 4340bc770aadee0e8d7ac4c167ab14d04443f6ffe4ed369c7141d042b5bb56cc | |
| 5bde9cb3a307b7a73840f047d5bf24cefeb52203664248590a228d0b14e4d982 | |
| 3b93af6eb7ee64b5c6c901cfdbc7c73fbc12eeb61c1d01cf134d2d565b748874 | |
| 47b299f7ada8df5b68e8f12fd79094aa442f6b03562025e0815e5c346c714fa9 | |
| 8a092ed19eb98a12882cbf012d1729ad920cef852a604478547a0f2eae9a79a1 | |
| 1708cd9cf1a3a0ce6a78dee87b0be80c9ca21294ba0cc2916fc70970c6d60114 | |
| f5765a0f88eb60ec2de35c9703d4603fadce292630acca18dc12ffbcd1b7e00a | |
| d0daab6aa13a2f55a8242f0939205484062aa82665e4c411d50232b7bb381059 | |
| 6a461bfa0c18f15e53a1976c100a77f62a3fedbf43941d3a9564512c91aead4a | |
| 7af64df1668fbba75c72ab79ce12b42ed5313d409431ad0370e18601fb8f9c10 | |
| 0c9d4d2e1ec85720831f57e0628b48cf20a5780e80bf0ba2926f592526112d01 | |
| 2913734faa489dc10de62e1b0c98a9cdfbb61530ae8de899396c6f7e89073717 | |
| 3040818a68734f68f2e8e6b399fc8709402ad01540c7081d82a4fde5d017d838 | |
| f8dc409eeb827f66f7961589ac207b346702f3aca8abc6d0af3ca0ba81f89f0d | |
| 101c69418c49aff7fc456ca36a331c88c74c63c0a5df179da707d48333480cba | |
| 608436743d56cfa6c318380db942b6e98e3e757c1af1e3616ab87ae6653ca609 | |
| 98f97c7cc0175ec8f87277842f05c50657d9e16ff8cea673fe6f3dd1048e3ada | |
| aa3ae374733f455de92467fad4e972a8041eadaae84495b43a803e858c11578d | |
| 2a676a8faba81b74a7341346473e6d5b5aa190efaab7632aa7ddcc0c73329472 | |
| 54323ca7c3d62d4febee9a4b4034a3593d7a546f7e8adcd3eb19d423f1200175 | |
| a5ec456776d5c9043c0816de8248a71bf6f0362d308ca911158c318d2fcbd9c1 | |
| a13e0d7cd924536e737abbd968107b62c4759b073ae7e0baf66c58735d8a9779 | |
| 0e3cca1dd0cf0934d44da8dd5535c794c7588dec6de122ee514c41711ccf31dc | |
| 9c19f7db65c18d6e34e7bfc7fc8d8886e5a246f6652a6cda27de0c857ff11944 | |
| baac089d8663311c6aea044320ba7df14600579d63c51a6338a0f34346736f5e | |
| 5f2702bea8d12bd2879a94d00b376373f5a3275e7b2a8da1a8630df7dd33f832 | |
| 650dbd0e7b399cee8b31a94fce456c441fd331aec1504d61d8ce0fce83d7d376 | |
| ba8d99b37c590b1d18a8e51c3283bc8f240e183bf3e5415f5b469f86ebf4665c | |
| 1a2230f39a736276f8f3d4d932c7ab57839ac7ef2ef36f2dabdb00ef47c69b9b | |
| 284e5d1c079aa6e42b35cbe5a54df96e0e08da381e3abffd02 | |
| """; | |
| var bytes = HexFormat.of().parseHex(hex.replaceAll("\\R", "")); | |
| System.out.println(bytes.length); | |
| byte[] out = new byte[2613]; | |
| try (var in = new BitInputStream(new ByteArrayInputStream(bytes))) { | |
| Inflate inflate = new Inflate(); | |
| inflate.decodeBlock(in, out); | |
| } | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment