Skip to content

Instantly share code, notes, and snippets.

@cw2k
Last active February 9, 2026 07:37
Show Gist options
  • Select an option

  • Save cw2k/2b2163c422183b884b7405bc0e09dfb2 to your computer and use it in GitHub Desktop.

Select an option

Save cw2k/2b2163c422183b884b7405bc0e09dfb2 to your computer and use it in GitHub Desktop.
Advanced Installer Extractor / Unpacker
#!/usr/bin/env python
# =============================================================================
# advancedInstallerExtractor.py
#
# Extracts embedded files from Advanced Installer self‑extracting executables.
#
# The format consists of two tightly connected structures placed at the end
# of the EXE: a footer block and a file‑info table. The footer describes where
# the file‑info entries are located and where the actual file data begins.
#
# ---------------------------------------------------------------------------
# Footer discovery
# ---------------------------------------------------------------------------
# Advanced Installer places the ASCII marker "ADVINSTSFX" at the very end of
# the executable. The real footer begins exactly 0x40 bytes *before* this
# marker. The extractor scans backwards from EOF to locate the marker, then
# subtracts 0x40 to find the structured footer header.
#
# This footer contains absolute and relative offsets that link directly to
# the file‑info table and the file‑data region. Without the footer, the
# archive cannot be parsed.
#
# ---------------------------------------------------------------------------
# Footer structure (0x40 bytes before "ADVINSTSFX")
# ---------------------------------------------------------------------------
# +00 offset Absolute offset of the footer within the EXE
# +04 nfiles Number of embedded files
# +08 version Version / compatibility field
# +0C offset1 Internal Advanced Installer offset
# +10 fileInfo Relative offset to the file‑info table
# +14 fileData Relative offset to the file‑data region
# +18 hexhash[32] 32‑byte signature / hash block
# +38 dummy2 Reserved
# +3C name[...] Package name (UTF‑8 or ANSI)
#
# ---------------------------------------------------------------------------
# File‑info table relationship
# ---------------------------------------------------------------------------
# The file‑info table is located at:
#
# fileInfoTablePos = offset + fileInfo
#
# Each entry describes one embedded file. The footer tells how many entries
# exist (nfiles) and where the table begins. The offsets inside each entry
# point into the file‑data region, whose base address is:
#
# fileDataPos = offset + fileData
#
# Thus, footer → file‑info table → file‑data region form a single linked
# structure that fully describes the archive.
#
# ---------------------------------------------------------------------------
# File‑info entry (24 bytes + UTF‑16LE filename)
# ---------------------------------------------------------------------------
# +00 dummy1
# +04 dummy2
# +08 xor_flag 2 = first 0x200 bytes of the file are XOR‑encoded
# +0C size File size
# +10 offset Relative offset to file data
# +14 namesize Number of UTF‑16 code units in the filename
# +18 name[...] UTF‑16LE filename (namesize * 2 bytes)
#
# ---------------------------------------------------------------------------
# XOR encoding
# ---------------------------------------------------------------------------
# If xor_flag == 2, the first 0x200 bytes of the file are XOR‑decoded using
# a simple 0xFF bytewise XOR. The remainder of the file is stored verbatim.
#
# ---------------------------------------------------------------------------
# Notes
# ---------------------------------------------------------------------------
# Found on https://gist.github.com/KasparNagu/9ee02cb62d81d9e4c7a833518a710d6e
# • Inspired by the public domain script from aluigi:
# https://aluigi.altervista.org/bms/advanced_installer.bms
# • Additional reverse engineering was performed to understand footer
# placement, structural variations, and XOR behavior.
# • License: Public Domain
# =============================================================================
import sys
import struct
import os
class AI_Reader:
MAGIC = b"ADVINSTSFX"
search_back = 0x4000 # search window from EOF
# If the exe is signed we need a bigger window to seek over the signature
# we might seek there via PE.optionalhdr.offsetsecurity
# [$3c]+14]
def __init__(self, filename, debug=None):
self.filename = filename
self.filehandle = open(filename, "rb")
self.debug = debug
# 0x4A <= 8 *4 + 32 + 10 <= "I I I I I I I 32s I 10s"
self.footer_Struct_Size = 0x4A
# 0x18 <= 6 *4 <= "I I I I I I "
self.FileInfo_Struct_Size = 0x18
self.xorSize = 0x200
self.footer_position = None
self.threadsafeReopen = True
self.files = []
# Determine EOF once
self.filehandle.seek(0, os.SEEK_END)
self.eof = self.filehandle.tell()
# self.eof = 0xc0829cf0
def debug_write(self, s):
if self.debug:
self.debug.write(s)
# ------------------------------------------------------------
# Footer search (robust)
# ------------------------------------------------------------
def search_footer(self):
magic_len = len(self.MAGIC)
for magic_start in range(self.search_back):
pos = self.eof - magic_len - magic_start
if pos < 0:
break
self.filehandle.seek(pos)
data = self.filehandle.read(magic_len)
# Reverse Engineering Note:
# AI also checks for (Version == 100)
# but that is not implemented here
if data == self.MAGIC:
# Footer size is 0x48, magic starts at offset +0x3C
self.footer_position = magic_start + self.footer_Struct_Size
return
raise Exception("Footer magic not found in last %d bytes" % self.search_back)
# ------------------------------------------------------------
# Footer fallback parser (separate method)
# ------------------------------------------------------------
def _parse_footer_fallback(_, footer):
# Format B: 66‑byte footer (common newer format)
try:
offset, nfiles, dummy1, offset1, hexhash, info_off = \
struct.unpack("<I I I I I I I 32s I", footer[:_.footer_Struct_Size-0x14])
file_off = 0
dummy2 = 0
name = footer[_.footer_Struct_Size-0x14:]
return offset, nfiles, dummy1, offset1, info_off, file_off, hexhash, dummy2, name
except struct.error:
pass
# Format C: minimal fallback
try:
offset, nfiles, dummy1, offset1, info_off, file_off = \
struct.unpack("<I I I I I I I", footer[:_.footer_Struct_Size-0x14])
hexhash = footer[_.footer_Struct_Size-0x14:0x38]
name = footer[0x38:]
dummy2 = 0
return offset, nfiles, dummy1, offset1, info_off, file_off, hexhash, dummy2, name
except struct.error:
pass
raise Exception("Unable to parse footer (fallback failed)")
# ------------------------------------------------------------
# Footer parsing (main)
# ------------------------------------------------------------
def read_footer(_):
if _.footer_position is None:
_.search_footer()
footer_start = _.eof - _.footer_position
_.filehandle.seek(footer_start)
footer = _.filehandle.read(_.footer_Struct_Size)
parsed = False
# Primary format (older AI versions)
try:
dummy0, offset, nfiles, version, offset1, info_off, file_off, hexhash, dummy2, name = \
struct.unpack("<I I I I I I I 32s I 10s", footer)
parsed = True
except struct.error:
pass
if not parsed:
offset, nfiles, version, offset1, info_off, file_off, hexhash, dummy2, name =_._parse_footer_fallback(footer)
_.nfiles = nfiles
_.info_off = info_off
#self.offset = offset
#self.file_off = file_off
# Reverse Engineering Note:
# hexhash[12..15] = pos - offset1
# ^- Some 'specialty' found in the code
_.debug_write(
f"\n ============ AI_Header ============ \n\t"
f"+00_dummy0 = @{dummy0:08X}\n\t"
f"+04_offset = @{offset:08X}\n\t"
f"+08_nfiles = {_.nfiles:08X}\n\t"
f"+0c_Version = {version:8d}\n\t"
f"+10_offset1 = @{offset1:08X}\n\t"
f"+14_fileInfo = @{_.info_off:08X}\n\t"
f"+18_fileData = @{file_off:08X}\n\t"
f"+1C_hexhash = {hexhash}\n\t"
f"+3C_dummy2 = {dummy2:08X}\n\t"
f"+40_name = {name}\n"
)
# ------------------------------------------------------------
# File index parsing
# ------------------------------------------------------------
def read_info(self):
self.read_footer()
self.files = []
self.filehandle.seek(self.info_off, os.SEEK_SET)
self.debug_write("\n ============ AI_info ============\n")
for i in range(self.nfiles):
entry = self.filehandle.read( self.FileInfo_Struct_Size )
if len(entry) < self.FileInfo_Struct_Size:
self.debug_write(
f"Warning: incomplete info block for file #{i}\n")
break
dummy1, dummy2, xoredSize, size, offset, namesize = \
struct.unpack("<I I I I I I", entry)
self.debug_write( f"\n\t"
f"+10_@{offset :08X}\t"
f"+00_v1: {dummy1 :03X}\t"
f"+04_v2: {dummy2 :03X}\t"
f"+08_xoredSize: {xoredSize:03X}\t"
f"+0C_FileSize = {size :10d}\t"
f"\n"
)
#f"+14_FileNameSize = {namesize}\t"
namesize <<=1
if 0 < namesize < 0xFFFF:
FileNameRaw = self.filehandle.read( namesize )
if len(FileNameRaw) != namesize :
self.debug_write(f"Warning: incomplete name for file #{i}\n")
continue
try:
name = FileNameRaw.decode("UTF-16LE").rstrip("\x00")
except UnicodeDecodeError:
name = f"file_{i}.bin"
self.debug_write(" name= %s (unnamed)\n" % name)
else:
self.debug_write(" name= %s\n" % name)
elif namesize == 0:
name = f"unnamed_file_{i}.bin"
else:
self.debug_write(
f"Warning: invalid namesize {namesize} for file #{i}\n")
continue
self.files.append(
AI_FileInfo(
name,
size,
offset,
self.xorSize if xoredSize == 2 else 0
)
)
# ------------------------------------------------------------
# Thread‑safe file opening
# ------------------------------------------------------------
def open(self, infoFile):
# Lookup by name
if not isinstance(infoFile, AI_FileInfo):
if not self.files:
self.read_info()
for f in self.files:
if f.name == infoFile:
infoFile = f
break
else:
return None
# Thread‑safe: always open a fresh handle if enabled
if self.threadsafeReopen:
fh = open(self.filename, "rb")
close_after = True
else:
fh = self.filehandle
close_after = False
fh.seek(infoFile.offset,0)
return AI_FileReader(
fh,
infoFile.size,
close_after,
infoFile.xorSize
)
def close(self):
self.filehandle.close()
# ------------------------------------------------------------
def infolist(self):
if not self.files:
self.read_info()
return self.files
def __enter__(self):
return self
def __exit__(self, *_):
self.close()
def __repr__(self):
return f"[path={self.filename} footer={self.footer_position} Files={len(self.files)}]"
class AI_FileInfo:
def __init__(self, name, size, offset, xorSize):
self.name = name
self.size = size
self.offset = offset
self.xorSize = xorSize
def __repr__(self):
return (
f"{self.size>>20:4d} MB "
f"{self.name}"
)
#f"@{self.xorSize:08X} "
#f"@{self.offset:08X} "
class AI_FileReader:
def __init__(self, filehandle, size, keepOpen, xorLength):
self.filehandle = filehandle
self.size = size
self.xorLength = xorLength
self.pos = 0
self.keepOpen = keepOpen
def xorFF(self, block):
if isinstance(block, str):
block = block.encode("latin1")
return bytes(b ^ 0xFF for b in block)
def read(self, size=None):
if size is None:
size = self.size - self.pos
if size <= 0:
return b""
remaining = self.size - self.pos
if remaining <= 0:
return b""
size = min(size, remaining)
out = bytearray()
# Berechnung der XOR‑ und Normal‑Anteile in einem Schritt
xor_start = self.pos
xor_end = min(self.xorLength, self.pos + size)
xor_len = max(0, xor_end - xor_start)
normal_len = size - xor_len
# XOR‑Teil lesen
if xor_len > 0:
raw = self.filehandle.read(xor_len)
if raw:
out.extend(self.xorFF(raw))
self.pos += len(raw)
else:
return bytes(out)
# Normal‑Teil lesen
if normal_len > 0:
raw = self.filehandle.read(normal_len)
if raw:
out.extend(raw)
self.pos += len(raw)
return bytes(out)
def close(self):
# Nur temporäre Handles schließen
if self.keepOpen:
return
try:
self.filehandle.close()
except:
pass
def __enter__(self):
return self
def __exit__(self, *_):
self.close()
if __name__ == "__main__":
def parse_args():
# Define and parse command‑line arguments
import argparse
parser = argparse.ArgumentParser(description="Advanced Installer Extractor")
parser.add_argument("file", type=str, help="Advanced Installer executable to open")
parser.add_argument("files", type=str, nargs="*", help="Optional list of files to extract")
parser.add_argument("-x", "--extract", action="store_true", help="Extract selected files")
parser.add_argument("-l", "--list", action="store_true", help="List files inside the installer")
parser.add_argument("-v", "--verbose", action="store_true", help="Enable debug output")
return parser.parse_args()
def filter_files(infolist, consider):
# If no filter is provided, return all files
if not consider:
return infolist
# Return only files whose names match the filter set
return [f for f in infolist if f.name in consider]
def list_files(files):
# Print a formatted list of files inside the installer
print("\n ============ AI_Files ============")
for f in files:
print(f"\t{f}")
class Spinner:
def __init__(self):
self.counter = 0
def __enter__(self):
print(" [ ", end="", flush=True)
return self
def progress(self):
ch = "/-\\|"[self.counter & 3] if self.counter else "="
print( ch,
end="\b" if self.counter else "",
flush=True)
self.counter = (self.counter + 1) & 0xFF
def __exit__(self, exc_type, exc, tb):
if exc_type is KeyboardInterrupt:
print("X", flush=True)
else:
print("]", flush=True)
# returning False lets the KeyboardInterrupt propagate
return False
def extract_files(reader, files, buffer_size=0x20000):
# Extract files using a block‑based copy loop
import os
print("\n ============ AI_Extract ============")
for f in files:
# Normalize path separators
path = f.name .replace("\\", "/")
dirname = os.path.dirname(path)
# Create target directory if needed
if dirname:
os.makedirs(dirname, exist_ok=True)
print ( f"\t{path} \t " , end="")
# Copy file contents in chunks
with reader.open(f) as inf, open(path, "wb") as out:
print( f"{inf.size >>10} kB" , end="")
with Spinner() as sp:
while (block := inf.read(buffer_size)):
out.write(block)
sp.progress()
def main():
# Main execution flow: parse args, open installer, filter, list, extract
import sys
args = parse_args()
consider = set(args.files)
with AI_Reader(
args.file,
sys.stdout if args.verbose else None
) as ar:
# Read file index from the installer
infolist = ar.infolist()
# Apply filename filters
selected = filter_files(infolist, consider)
# Optional: list files
if args.list:
list_files(selected)
# Optional: extract files
if args.extract:
extract_files(ar, selected)
# Optional: print reader summary in verbose mode
if args.verbose:
print(ar)
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment