-
-
Save cpq/4714740 to your computer and use it in GitHub Desktop.
| // Copyright (c) Sergey Lyubka, 2013. | |
| // All rights reserved. | |
| // Released under the MIT license. | |
| // This program is used to embed arbitrary data into a C binary. It takes | |
| // a list of files as an input, and produces a .c data file that contains | |
| // contents of all these files as collection of char arrays. | |
| // Usage: | |
| // 1. Compile this file: | |
| // cc -o embed embed.c | |
| // | |
| // 2. Convert list of files into single .c: | |
| // ./embed file1.data file2.data > embedded_data.c | |
| // | |
| // 3. In your application code, you can access files using this function: | |
| // | |
| // const char *find_embedded_file(const char *file_name, size_t *size); | |
| // size_t size; | |
| // const char *data = find_embedded_file("file1.data", &size); | |
| // | |
| // 4. Build your app with embedded_data.c: | |
| // cc -o my_app my_app.c embedded_data.c | |
| #include <stdio.h> | |
| #include <stdlib.h> | |
| static const char *code = | |
| "const char *find_embedded_file(const char *name, size_t *size) {\n" | |
| " const struct embedded_file *p;\n" | |
| " for (p = embedded_files; p->name != NULL; p++) {\n" | |
| " if (!strcmp(p->name, name)) {\n" | |
| " if (size != NULL) { *size = p->size; }\n" | |
| " return (const char *) p->data;\n" | |
| " }\n" | |
| " }\n" | |
| " return NULL;\n" | |
| "}\n"; | |
| int main(int argc, char *argv[]) { | |
| FILE *fp; | |
| int i, j, ch; | |
| for (i = 1; i < argc; i++) { | |
| if ((fp = fopen(argv[i], "rb")) == NULL) { | |
| exit(EXIT_FAILURE); | |
| } else { | |
| printf("static const unsigned char v%d[] = {", i); | |
| for (j = 0; (ch = fgetc(fp)) != EOF; j++) { | |
| if ((j % 12) == 0) { | |
| printf("%s", "\n "); | |
| } | |
| printf(" %#04x,", ch); | |
| } | |
| // Append zero byte at the end, to make text files appear in memory | |
| // as nul-terminated strings. | |
| printf("%s", " 0x00\n};\n"); | |
| fclose(fp); | |
| } | |
| } | |
| printf("%s", "\nconst struct embedded_file {\n"); | |
| printf("%s", " const char *name;\n"); | |
| printf("%s", " const unsigned char *data;\n"); | |
| printf("%s", " size_t size;\n"); | |
| printf("%s", "} embedded_files[] = {\n"); | |
| for (i = 1; i < argc; i++) { | |
| printf(" {\"%s\", v%d, sizeof(v%d) - 1},\n", argv[i], i, i); | |
| } | |
| printf("%s", " {NULL, NULL, 0}\n"); | |
| printf("%s", "};\n\n"); | |
| printf("%s", code); | |
| return EXIT_SUCCESS; | |
| } |
| # Copyright (c) Sergey Lyubka, 2013. | |
| # All rights reserved. | |
| # Released under the MIT license. | |
| # | |
| # Same functionality as C code above, but in Perl. | |
| # | |
| # This program is used to embed arbitrary data into a C binary. It takes | |
| # a list of files as an input, and produces a .c data file that contains | |
| # contents of all these files as collection of char arrays. | |
| # | |
| # Usage: perl <this_file> <file1> [file2, ...] > embedded_data.c | |
| foreach my $i (0 .. $#ARGV) { | |
| open FD, '<:raw', $ARGV[$i] or die "Cannot open $ARGV[$i]: $!\n"; | |
| printf("static const unsigned char v%d[] = {", $i); | |
| my $byte; | |
| my $j = 0; | |
| while (read(FD, $byte, 1)) { | |
| if (($j % 12) == 0) { | |
| print "\n"; | |
| } | |
| printf ' %#04x,', ord($byte); | |
| $j++; | |
| } | |
| print " 0x00\n};\n"; | |
| close FD; | |
| } | |
| print <<EOS; | |
| #include <stddef.h> | |
| #include <string.h> | |
| static const struct embedded_file { | |
| const char *name; | |
| const unsigned char *data; | |
| size_t size; | |
| } embedded_files[] = { | |
| EOS | |
| foreach my $i (0 .. $#ARGV) { | |
| print " {\"$ARGV[$i]\", v$i, sizeof(v$i) - 1},\n"; | |
| } | |
| print <<EOS; | |
| {NULL, NULL, 0} | |
| }; | |
| const char *find_embedded_file(const char *name, size_t *size) { | |
| const struct embedded_file *p; | |
| for (p = embedded_files; p->name != NULL; p++) { | |
| if (!strcmp(p->name, name)) { | |
| if (size != NULL) { *size = p->size; } | |
| return (const char *) p->data; | |
| } | |
| } | |
| return NULL; | |
| } | |
| EOS |
Actually - scratch that. I wrote my own embedder Python which doesn't trigger compiler warnings about over-long strings. :-)
Thanks for the inspiration!
Oh hey Bjarni!
Haven't heard from you for ages. Cool that you find it useful, I've added the "MIT" license snippet just in case.
So I may seem like a noob...but let's say I'm using this to pack a zip into the binary, how would I recreate the zip file using the const char *data = find_embedded_file("test.zip", &size); function?
I'm basically needing to pack a lot of files as "resources" into a single C binary, then unpack them into files at runtime...I've done this successfully in C#, however I'm not sure how to convert the hex values created in the C file generated from the embed program back into tangible files.
@anonneo If I'm not too late, all you need to do is loop over the file string in your C program and put it in to a FILE pointer.
FILE *fp = fopen("some_file.ext");
for(int i = 0; i < size; i++) {
fputc(some_file[i], fp);
}Sorry about the formatting, markdown code styling is new to me.
@cpq sorry to bother you but I was just wondering how much bigger the data.c output file would be compared to the original binary?
I'm running an ~480 MB zipped archive through and getting a c file that is greater than 2.6 GB. Is this a problem, or is that actually what is supposed to happen?
@pete-the-dev that's about right. Each byte gets transformed into , 0xYY string which is 6 characters, so expect a ~6x size increase.
That's a .c file though, remember that when it gets compiled, file data will occupy the original space in the binary, not 6x.
Thanks!
Since C23 you can embed files in C. For C++ you can build object files with C.
Hi @cpq - this came up when I was googling for a solution to exactly this problem, and I really like how you've done this. It's very clean and elegant!
However I notice you've marked the sources as "all rights reserved", and no other license details. May I have your permission to include this code in in
libpagekite, a software library which I release under the Apache and AGPL licenses?Thanks!