Skip to content

Instantly share code, notes, and snippets.

@fp64
Created November 23, 2024 21:21
Show Gist options
  • Select an option

  • Save fp64/caf40c13eb74ed159c9e11a8f9686fd3 to your computer and use it in GitHub Desktop.

Select an option

Save fp64/caf40c13eb74ed159c9e11a8f9686fd3 to your computer and use it in GitHub Desktop.
A proof-of-concept lossless random-access compressor for GLSL (and C).
// Public Domain under http://unlicense.org, see link for details.
// A lossless random-access compressor for GLSL (and C).
// Version 1.0.
// WARNING: proof-of-concept, may be buggy.
// See https://www.shadertoy.com/view/4cdyWN for usage example.
//
// Compilation:
// g++ -O3 glhf.cpp -o glhf
// Example of usage:
// cat file.txt | glhf --glsl > decompressor.glsl
// See 'glhf --help' for more options.
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <vector>
#include <map>
#include <algorithm>
#include <functional>
typedef unsigned char u8;
typedef uint32_t u32;
typedef uint64_t u64;
using std::vector;
using std::map;
using std::sort;
using std::greater;
static void usage()
{
printf("Usage: glhf [OPTION]...\n");
printf("Loselessly compress stdin, and output the code for \"self-extracting\"\n");
printf("C/GLSL decompressor to stdout. See https://www.shadertoy.com/view/4cdyWN\n");
printf("for usage example.\n");
printf("\n");
printf("Only long options are supported.\n");
printf(" --help print this message and exit\n");
printf(" --c output C decompressor\n");
printf(" --glsl output GLSL decompressor\n");
printf(" --main include main function (C only)\n");
printf(" --nomain do not include main function (C only)\n");
printf(" --prefix=STRING apply namespacing prefix (useful for encoding\n");
printf(" several files in the same translation unit)\n");
printf(" --input=FILENAME read from FILENAME, not stdin\n");
printf(" --otput=FILENAME write to FILENAME, not stdout\n");
printf(" --codelen=N max. Huffman code length (default: 9)\n");
printf(" --follow=N consider N most likely following chars (default: 4)\n");
printf(" --dict=N dictionary size (default: 64)\n");
}
// Optimal length-limited Huffman codes, using a straightforward
// O(L*n^3) dynamic programming algorithm from
// Garey, M. R. (1974). Optimal Binary Search Trees with Restricted Maximal Depth. SIAM Journal on Computing, 3(2), 101–110. doi:10.1137/0203008
// foregoing even O(L*n^2) improvement from the same paper, because
// we do not care about speed in our usecase. If you do, you may consider
// package-merge or some other O(L*n) algorithm.
static u64 length_limited_huffman(int n,int L,const u64 *weights,int *lengths,u32 *bits)
{
u64 INF=1ull<<53;
u64 *sum=new u64[n+1];
u64 *K=new u64[(L+1)*n*n];
int *I=new int[(L+1)*n*n];
#define IDX(x,i,j,k) ((x)[(i)+n*((j)+n*(k))])
sum[0]=0;
for(int i=0;i<n;++i) sum[i+1]=sum[i]+weights[i];
for(int k=0;k<=L;++k)
{
for(int i=0;i<n;++i)
{
IDX(K,i,i,k)=0;
IDX(I,i,i,k)=1;
for(int j=i+1;j<n;++j)
if((1<<k)<j-i+1)
{
IDX(K,i,j,k)=INF;
IDX(I,i,j,k)=-1;
}
else
{
u64 B=INF;
int b=-1;
for(int m=i;m<j;++m)
{
u64 C=IDX(K,i,m,k-1)+IDX(K,m+1,j,k-1);
if(C<B) {B=C; b=m;}
}
IDX(K,i,j,k)=(sum[j+1]-sum[i])+B;
IDX(I,i,j,k)=b;
}
}
}
u64 ret=IDX(K,0,n-1,L);
for(int i=0;i<n;++i)
{
int l=0,h=n-1,k=L,c=0,b=0;
if(ret==INF) {c=-1;b=-1;}
else while(l<h)
{
int m=IDX(I,l,h,k);
// NOTE: 'root' bit is lsb.
if(i>m) {l=m+1;b+=1<<c;}
else {h=m;}
++c;
--k;
}
lengths[i]=c;
bits[i]=u32(b);
}
#undef IDX
delete[] sum;
delete[] K;
delete[] I;
return ret;
}
static void print_table(int mode,const char *prefix,const char *name,const char *size,int n,const u32 *data)
{
char buf[256]={0};
if(size) snprintf(buf,sizeof(buf),"%s%s",prefix,size);
else snprintf(buf,sizeof(buf),"%d",n>>2);
if(mode==0) printf("uint32_t %s%s[%s][4]={\n",prefix,name,buf);
else printf("uvec4 %s%s[%s]=uvec4[%s](\n",prefix,name,buf,buf);
for(int i=0;i<(n>>2);++i)
{
printf(" %s",(mode?"uvec4(":"{"));
for(int j=0;j<4;++j)
printf("%10uu%s",data[4*i+j],(j==3?"":","));
printf("%s",(mode?")":"}"));
if(i<(n>>2)-1) printf(",");
printf("\n");
}
printf("%s;\n\n",(mode?")":"}"));
}
static u32 load32le(const unsigned char *v) {return v[0]+256u*(v[1]+256u*(v[2]+256u*v[3]));}
int main(int argc,char **argv)
{
const char *filename=0;
const char *output_filename=0;
const char *prefix="";
int mode=0;
int use_main=1;
int max_codelen=9;
int following=4;
int dict_size=64;
for(int i=1;i<argc;++i)
{
const char *s=argv[i];
int t;
if(strcmp(s,"--help")==0) {usage(); return 0;}
if(strncmp(s,"--input=",8)==0) filename=s+8;
if(strncmp(s,"--output=",9)==0) output_filename=s+9;
if(strcmp(s,"--c")==0) {mode=0;}
if(strcmp(s,"--glsl")==0) {mode=1;}
if(strcmp(s,"--main")==0) {use_main=1;}
if(strcmp(s,"--nomain")==0) {use_main=0;}
if(strncmp(s,"--prefix=",9)==0) prefix=s+9;
if(sscanf(s,"--codelen=%d",&t)==1) {max_codelen=t;}
if(sscanf(s,"--follow=%d",&t)==1) {following=t;}
if(sscanf(s,"--dict=%d",&t)==1) {dict_size=t;}
}
if(max_codelen<1) {fprintf(stderr,"ERROR: max. codelen too small.\n"); return 1;}
if(max_codelen>12) {fprintf(stderr,"ERROR: max. codelen too large.\n"); return 1;}
if(following<0||following>256) {fprintf(stderr,"ERROR: invalid --follow value.\n"); return 1;}
if(following<0||following>256) {fprintf(stderr,"ERROR: invalid --follow value.\n"); return 1;}
if(dict_size<0||dict_size>4096) {fprintf(stderr,"ERROR: invalid --dict_size value.\n"); return 1;}
if(256+following+dict_size>4096) {fprintf(stderr,"ERROR: --dict is too large.\n"); return 1;}
FILE *file=(filename?fopen(filename,"rb"):freopen(0,"rb",stdin));
if(!file) {fprintf(stderr,"ERROR: input file inaccessible.\n"); return 1;}
if(output_filename&&!freopen(output_filename,"w",stdout)) {fprintf(stderr,"ERROR: output file inaccessible.\n"); return 1;}
int c;
vector<u8> file_bytes;
while((c=fgetc(file))!=EOF) file_bytes.push_back(u8(c));
fclose(file);
u64 orig_len=u64(file_bytes.size()),len=orig_len;
while((len=file_bytes.size())&15) file_bytes.push_back(0);
u8* input=file_bytes.data();
if(len==0) {fprintf(stderr,"ERROR: input file is empty.\n"); return 1;}
// For our encoding of bookmarks we need compressed length
// to be no more than 2^24 bits. Consider spliting larger
// files into chunks.
if(len>1<<20) {fprintf(stderr,"ERROR: input file is too large.\n"); return 1;}
int ok=0;
for(u64 i=0;i<len;++i) ok|=(input[i]!=input[0]);
if(!ok)
{
printf("/* This file is machine-generated. */\n");
printf("/* WARNING: assumes int is at least 32-bit. */\n\n");
printf("#define %sUNCOMPRESSED_BYTES %llu\n",prefix,(unsigned long long)len);
if(mode==1) printf("uvec4 %sload16(int idx) {uvec4 ret=uvec4(0u); idx=(idx>>4)<<4; for(i=0;i<16;++i) ret[i>>2]+=(idx>=0&&idx+i<%sUNCOMPRESSED_BYTES?%du:0u)<<(8*(i&3)); return ret;}\n",prefix,prefix,int(input[0]));
else printf("void %sload16(int idx,unsigned char *output) {int i; idx=(idx>>4)<<4; for(i=0;i<16;++i) output[i]=(idx>=0&&idx+i<%sUNCOMPRESSED_BYTES?%d:0);}\n",prefix,prefix,int(input[0]));
if(mode==0&&use_main)
{
printf("\n#include <stdio.h>\n\n");
printf("int main() {\n");
printf(" unsigned char buf[16];\n");
printf(" freopen(0,\"wb\",stdout);\n");
printf(" for(int i=0;i<%sUNCOMPRESSED_BYTES;i+=16) {\n",prefix);
printf(" %sload16(i,buf);\n",prefix);
printf(" for(int j=0;j<16;++j)\n");
printf(" if(i+j<%sUNCOMPRESSED_BYTES) fputc(buf[j],stdout);\n",prefix);
printf(" }\n");
printf(" return 0;\n");
printf("}\n");
}
return 0;
}
static u64 F[256][256];
for(int i=0;i<256;++i)
for(int j=0;j<256;++j)
F[i][j]=u64(j);
for(u64 i=1;i<len;++i)
F[input[i-1]][input[i]]+=256;
for(int i=0;i<256;++i)
std::sort(&(F[i][0]),&(F[i][255]),std::greater<u64>());
map<u32,u32> M;
for(u64 i=0;i+3<len;++i)
M[load32le(&(input[0])+i)]++;
vector<u64> dict;
for(map<u32,u32>::iterator it=M.begin();it!=M.end();++it)
dict.push_back((u64(it->second)<<32)+(it->first));
sort(dict.begin(),dict.end(),greater<u64>());
// Just store most frequent dictionary enires. This
// is dumb. E.g. the 2 most common 4-character sequences
// in English text often are 'the ' and ' the', and we
// don't generally need both.
vector<u32> dict2;
for(u32 i=0;i<u32(((dict_size+3)>>2)<<2);++i)
dict2.push_back(i<dict.size()?u32(dict[i]):0u);
vector<int> codes;
for(u64 i=0;i<len;++i)
{
int code=int(input[i]);
if(i&15) for(int j=0;j<following;++j)
if((F[input[i-1]][j]&255)==input[i])
code=256+j;
if((i&15)<16-3&&i<len-3)
{
for(int j=0;j<dict_size;++j)
if(load32le(&(input[0])+i)==dict2[u32(j)])
{
code=256+following+j;
i+=3;
break;
}
}
codes.push_back(code);
}
u32 len2=codes.size();
static u64 freq[1024]={0};
int alphabet=256+following+dict_size;
for(int i=0;i<alphabet;++i) freq[i]=u64(i);
for(u64 i=0;i<len2;++i) freq[codes[size_t(i)]]+=1024;
sort(freq+0,freq+alphabet,std::greater<u64>());
while(alphabet>0&&(freq[alphabet-1]>>10)==0) --alphabet;
static int idx2code[1024],code2idx[1024];
for(int i=0;i<alphabet;++i) {idx2code[i]=int(freq[i]&1023); code2idx[idx2code[i]]=i; freq[i]>>=10;}
static int lengths[1024];
static u32 bits[1024];
u64 encoded_bits=length_limited_huffman(alphabet,max_codelen,freq,lengths,bits);
if(encoded_bits>1ull<<32) {fprintf(stderr,"ERROR: max. codelen is too small.\n"); return 0;}
max_codelen=0;
for(int i=0;i<alphabet;++i) if(lengths[i]>max_codelen) max_codelen=lengths[i];
vector<u32> follow;
for(int i=0;i<following;++i)
{
u32 b=0;
for(int j=0;j<256;++j)
{
b+=u32(F[j][i]&255)<<(8*(j&3));
if((j&3)==3) {follow.push_back(b); b=0;}
}
}
vector<u32> huffman_table(1<<max_codelen);
for(int i=0;i<alphabet;++i)
{
int b=max_codelen-lengths[i];
for(int j=0;j<(1<<b);++j)
huffman_table[bits[i]+u32(j<<lengths[i])]=u32(idx2code[i]+(lengths[i]<<12));
}
vector<u32> bookmark,bookmark2;
vector<u32> encoded;
u64 B=0;
int b=0;
// Write bitstream in LSB-first little-endian fashion,
// with uint32 granularity, see
// https://fgiesen.wordpress.com/2018/02/19/reading-bits-in-far-too-many-ways-part-1/
// This is the reason we use Huffman codes with
// 'root' bit at lsb.
for(u32 i=0,j=0;i<len2;++i)
{
if((j&15)==0) bookmark.push_back(encoded.size()*32+u32(b));
j+=(codes[i]>=256+following?4u:1u);
int idx=code2idx[codes[i]];
B+=u64(bits[idx])<<b;
b+=lengths[idx];
if(b>=32)
{
encoded.push_back(u32(B));
B>>=32;
b-=32;
}
}
if(b>0) encoded.push_back(u32(B));
while(encoded.size()&3) encoded.push_back(0u);
while(bookmark.size()&7) bookmark.push_back(bookmark[bookmark.size()-1]);
for(u32 i=0;i<u32(bookmark.size());i+=2)
bookmark2.push_back((bookmark[i]<<8)+(bookmark[i+1]-bookmark[i]));
vector<u32> huffman2(huffman_table.size()/2);
for(u32 i=0;i<u32(huffman2.size());++i) huffman2[i]=huffman_table[2*i+0]+(huffman_table[2*i+1]<<16);
printf("/* This file is machine-generated. */\n");
printf("/* WARNING: assumes int is at least 32-bit. */\n\n");
printf("#define %sUNCOMPRESSED_BYTES %lluu\n",prefix,(unsigned long long)orig_len);
printf("/* Alphabet size: %d */\n",alphabet);
printf("#define %sCOMPRESSED_BITS %lluu\n",prefix,(unsigned long long)encoded_bits);
printf("#define %sCOMPRESSED_BYTES ((%sCOMPRESSED_BITS+7u)>>3)\n",prefix,prefix);
printf("#define %sCOMPRESSED_BLOCKS ((%sCOMPRESSED_BITS+127u)>>7)\n",prefix,prefix);
u64 compressed_length=4*(
bookmark2.size()+
follow.size()+
dict2.size()+
huffman2.size()+
encoded.size());
printf("/* Size of encoded data (incl. LUTs): %llu byte(s) (%7.3f%%) */\n",(unsigned long long)compressed_length,100.0*double(compressed_length)/double(len));
printf("#define %sBOOKMARK_LUT_SIZE ((%sUNCOMPRESSED_BYTES+127u)>>7)\n",prefix,prefix);
printf("#define %sFOLLOWING %du\n",prefix,following);
printf("#define %sFOLLOWING_LUT_SIZE (%sFOLLOWING<<4)\n",prefix,prefix);
printf("#define %sDICT_SIZE %du\n",prefix,dict_size);
printf("#define %sDICT_LUT_SIZE ((DICT_SIZE+3u)>>2)\n",prefix);
printf("#define %sMAX_CODELEN %du\n",prefix,max_codelen);
printf("#define %sHUFFMAN_LUT_SIZE (1u<<(%sMAX_CODELEN-3u))\n",prefix,prefix);
printf("\n");
if(mode==0) printf("#include <stdint.h>\n\n");
print_table(mode,prefix,"bookmark","BOOKMARK_LUT_SIZE",int(bookmark2.size()),bookmark2.data());
if(following>0)
print_table(mode,prefix,"follow","FOLLOWING_LUT_SIZE",int(follow.size()),follow.data());
if(dict_size>0)
print_table(mode,prefix,"dict","DICT_LUT_SIZE",int(dict2.size()),dict2.data());
print_table(mode,prefix,"huffman","HUFFMAN_LUT_SIZE",int(huffman2.size()),huffman2.data());
print_table(mode,prefix,"compressed","COMPRESSED_BLOCKS",int(encoded.size()),encoded.data());
if(mode==1) printf("uvec4 %sload16(int idx) {\n",prefix);
else printf("void %sload16(int idx,unsigned char *output) {\n",prefix);
printf(" %s i,j,a,b,c,code,sym,len,bit_idx,prev=0u;\n",(mode?"uint":"uint32_t"));
if(mode==0) printf(" uint32_t A[4],B[4];\n");
else printf(" uvec4 A,B,C;\n");
printf(" idx>>=4;\n");
printf(" if(idx<0||16*idx>=%s(%sUNCOMPRESSED_BYTES)) {\n",(mode==1?"int":"(int)"),prefix);
if(mode==0)
{
printf(" for(i=0;i<16;++i) output[i]=0;\n");
printf(" return;\n");
}
else printf(" return uvec4(0u);\n");
printf(" }\n");
printf(" bit_idx=%sbookmark[idx>>3][(idx>>1)&3];\n",prefix);
printf(" bit_idx=(bit_idx>>8)+%s(idx&1)*(bit_idx&255u);\n",(mode==1?"uint":"(uint32_t)"));
if(mode==0)
{
printf(" for(i=0u;i<4u;++i) {\n");
printf(" a=(bit_idx>>5)+i;\n");
printf(" b=a+4u;\n");
printf(" if(a>=4u*%sCOMPRESSED_BLOCKS) a=4*%sCOMPRESSED_BLOCKS-1u;\n",prefix,prefix);
printf(" if(b>=4u*%sCOMPRESSED_BLOCKS) b=4*%sCOMPRESSED_BLOCKS-1u;\n",prefix,prefix);
printf(" A[i]=%scompressed[a>>2][a&3u];\n",prefix);
printf(" B[i]=%scompressed[b>>2][b&3u];\n",prefix);
printf(" }\n");
}
else
{
printf(" A=%scompressed[min((bit_idx>>7)+0u,%sCOMPRESSED_BLOCKS-1u)];\n",prefix,prefix);
printf(" B=%scompressed[min((bit_idx>>7)+1u,%sCOMPRESSED_BLOCKS-1u)];\n",prefix,prefix);
printf(" C=%scompressed[min((bit_idx>>7)+2u,%sCOMPRESSED_BLOCKS-1u)];\n",prefix,prefix);
printf(" len=bit_idx&127u;\n");
printf(" if(len>=64u) {\n");
printf(" A=uvec4(A.zw,B.xy);\n");
printf(" B=uvec4(B.zw,C.xy);\n");
printf(" C=uvec4(C.zw,0u,0u);\n");
printf(" len-=64u;\n");
printf(" }\n");
printf(" if(len>=32u) {\n");
printf(" A=uvec4(A.yzw,B.x);\n");
printf(" B=uvec4(B.yzw,C.x);\n");
printf(" C=uvec4(C.yzw,0u);\n");
printf(" len-=32u;\n");
printf(" }\n");
}
printf(" if((len=(bit_idx&31u))>0u) {\n");
if(mode==0)
{
printf(" for(j=0u;j<4u;++j) A[j]=(A[j]>>len)|((j==3u?B[0]:A[j+1u])<<(32u-len));\n");
printf(" for(j=0u;j<4u;++j) B[j]=(B[j]>>len)|((j==3u?0u :B[j+1u])<<(32u-len));\n");
}
else
{
printf(" A=(A>>len)|(uvec4(A.yzw,B.x)<<(32u-len));\n");
printf(" B=(B>>len)|(uvec4(B.yzw,C.x)<<(32u-len));\n");
}
printf(" }\n");
if(mode==1) printf(" C=uvec4(0u);\n");
printf(" for(i=0u;i<16u;++i) {\n");
printf(" b=A[0]&((1u<<%sMAX_CODELEN)-1u);\n",prefix);
printf(" code=(%shuffman[b>>3][(b>>1)&3u]>>(16u*(b&1u)))&65535u;\n",prefix);
printf(" sym=code&4095u;\n");
printf(" len=code>>12;\n");
printf(" if(sym<256u) prev=sym;\n");
if(following>0)
{
printf(" if(sym>=256u&&sym<256u+%sFOLLOWING) {\n",prefix);
printf(" c=256u*(sym-256u)+prev;\n");
printf(" prev=(%sfollow[c>>4][(c>>2)&3u]>>(8u*(c&3u)))&255u;\n",prefix);
printf(" }\n");
}
if(dict_size>0)
{
printf(" if(sym>=256u+%sFOLLOWING&&sym<256u+%sFOLLOWING+%sDICT_SIZE) {\n",prefix,prefix,prefix);
printf(" c=(sym-256u-%sFOLLOWING);\n",prefix);
printf(" c=%sdict[c>>2][c&3u];\n",prefix);
printf(" for(j=0u;j<3u;++j) {\n");
printf(" a=(c>>(8u*j))&255u;\n");
if(mode==0) printf(" output[(i+j)]=(unsigned char)a;\n");
else printf(" C[(i+j)>>2]+=a<<(8u*((i+j)&3u));\n");
printf(" }\n");
printf(" prev=c>>24;\n");
printf(" i+=3u;\n");
printf(" }\n");
}
if(mode==0) printf(" output[i]=(unsigned char)prev;\n");
else printf(" C[i>>2]+=prev<<(8u*(i&3u));\n");
if(mode==0)
{
printf(" for(j=0u;j<4u;++j) A[j]=(A[j]>>len)|((j==3u?B[0]:A[j+1])<<(32u-len));\n");
printf(" for(j=0u;j<4u;++j) B[j]=(B[j]>>len)|((j==3u?0u :B[j+1])<<(32u-len));\n");
}
else
{
printf(" A=(A>>len)|(uvec4(A.yzw,B.x)<<(32u-len));\n");
printf(" B=(B>>len)|(uvec4(B.yzw,0u )<<(32u-len));\n");
}
printf(" }\n");
if(mode==1) printf(" return C;\n");
printf("}\n");
if(mode==0&&use_main)
{
printf("\n#include <stdio.h>\n\n");
printf("int main() {\n");
printf(" unsigned char buf[16];\n");
printf(" int i,j;\n");
printf(" freopen(0,\"wb\",stdout);\n");
printf(" for(i=0;i<(int)%sUNCOMPRESSED_BYTES;i+=16) {\n",prefix);
printf(" %sload16(i,buf);\n",prefix);
printf(" for(j=0;j<16;++j)\n");
printf(" if(i+j<(int)%sUNCOMPRESSED_BYTES) fputc(buf[j],stdout);\n",prefix);
printf(" }\n");
printf(" return 0;\n");
printf("}\n");
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment