Skip to content

Instantly share code, notes, and snippets.

@ykai55
Created December 3, 2024 03:11
Show Gist options
  • Select an option

  • Save ykai55/5d805eab22ec16e668d3d72134cbfec4 to your computer and use it in GitHub Desktop.

Select an option

Save ykai55/5d805eab22ec16e668d3d72134cbfec4 to your computer and use it in GitHub Desktop.
PB reader
export enum PBType {
bool = 0,
bytes = 1,
double = 2,
float = 3,
int32 = 4,
int64 = 5,
string = 6,
uint32 = 7,
uint64 = 8,
byte = 9,
fq = 20,
map = 21,
}
export type Scalar = Exclude<PBType, PBType.fq | PBType.map>;
export interface ModelConfig<T> {
$f: Field[];
}
export type Field = // 使用数组节省代码量
| [string, number, Scalar] // scalar
| [string, number, Scalar, 0 | 1] // repeated scalar
| [string, number, PBType.int64 | PBType.uint64, 0 | 1, 'number' | 'string' | 'bigint'] // (repeated) int64 with config
| [string, number, PBType.fq, 0 | 1, ModelConfig<any>] // (repeated) model
| [string, number, PBType.map | 21, Field, Field] // map
export type BufferLike = Uint8Array | Int8Array | ArrayBuffer | number[];
enum Wire {
VARINT = 0,
I64 = 1,
LEN = 2,
I32 = 5,
}
class Reader {
private readonly dataView: DataView;
constructor(
public readonly bytes: Uint8Array,
public offset: number = 0,
public readonly length: number = bytes.byteLength,
) {
this.dataView = new DataView(this.bytes.buffer, this.bytes.byteOffset + offset, length);
}
readTagAndWire(): [number, Wire] {
const i = this.readInt();
const wire_i = i & 0b111;
const tag = i >> 3;
if (!(wire_i in Wire)) {
throw Error(`unknown wire type ${wire_i}`);
}
return [tag, wire_i];
}
readBool(): boolean {
return this.readInt() !== 0;
}
private bitRevStr(str: string): string {
return str.replace(/0/g, 'z')
.replace(/1/g, '0')
.replace(/z/g, '1');
}
readInt(): number {
const len = this.bytes.byteLength;
let res = 0, shift = 0, offset = this.offset, byte = 0, neg = false;
let bytes: number[] = [];
const MSB = 0x80, REST = 0x7F;
do {
if (offset >= len) {
throw new RangeError('could not decode varint: out of range')
}
byte = this.bytes[offset++];
if (offset - this.offset >= 10) {
if ((byte & 1) === 1) {
neg = true;
}
} else {
const v = byte & REST;
bytes.push(v);
shift < 28
? (res |= v << shift)
: (res += v * Math.pow(2, shift))
shift += 7
}
} while (byte >= MSB)
this.offset = offset;
if (neg) {
res = 0;
for (let i = 0; i < bytes.length; i++) {
const byte = bytes[i];
res += (~byte & 0b1111111) << (i * 7);
}
res = -(res + 1);
}
return res;
}
readBigint(): bigint {
const len = this.bytes.byteLength;
let res = '', offset = this.offset, byte = 0, neg = false;
const MSB = 0x80, REST = 0x7F;
do {
if (offset >= len) {
throw new RangeError('could not decode varint: out of range')
}
byte = this.bytes[offset++];
if (offset - this.offset >= 10) {
if ((byte & 1) === 1) {
res = this.bitRevStr(res);
neg = true;
}
break;
} else {
res = (byte & REST).toString(2).padStart(7, '0') + res;
}
} while (byte >= MSB)
this.offset = offset;
let bi = BigInt('0b' + res);
if (neg) {
bi = -(bi + BigInt('1'));
}
return bi;
}
readFloat(): number {
const f = this.dataView.getFloat32(this.offset, true);
this.offset += 4;
return f;
}
readDouble(): number {
const f = this.dataView.getFloat64(this.offset, true);
this.offset += 8;
return f;
}
readString(len: number): string {
const bs = this.readBytes(len);
return new TextDecoder().decode(bs);
}
readBytes(len: number): Uint8Array {
const bs = new Uint8Array(this.bytes.buffer, this.bytes.byteOffset + this.offset, len);
this.offset += len;
return bs;
}
skip(wire: Wire) {
if (wire === Wire.LEN) {
const len = this.readInt();
this.offset += len;
} else if (wire === Wire.I64) {
this.offset += 8;
} else if (wire === Wire.I32) {
this.offset += 4;
} else if (wire === Wire.VARINT) {
this.readInt();
} else {
throw Error(`unknown wire type ${wire}`);
}
}
eos(): boolean {
return this.offset >= this.bytes.byteLength;
}
}
interface DecodeState {
reader: Reader;
tagToField: Map<number, Field>;
decodedTag: number;
store: Record<string, any>;
}
export function decodeByFields(bytesOrReader: BufferLike | Reader, fields: Field[], lazy: boolean): any {
const proxy: Record<string, any> = {};
const store: Record<string, any> = {};
const reader = bytesOrReader instanceof Reader ? bytesOrReader : new Reader(new Uint8Array(bytesOrReader));
const tagToField = new Map<number, Field>();
const state: DecodeState = {
reader,
tagToField,
decodedTag: 0,
store
};
if (lazy) {
for (const field of fields) {
tagToField.set(field[1], field);
const [name, tag] = field;
let userSet = false;
Object.defineProperty(proxy, name, {
enumerable: true,
get(): any {
if (userSet) {
return store[name];
}
decodeTill(state, tag, lazy);
return store[name];
},
set(v: any) {
userSet = true;
store[name] = v;
}
});
}
return proxy;
} else {
let maxTag = 0;
for (const field of fields) {
tagToField.set(field[1], field);
if (field[1] > maxTag) {
maxTag = field[1];
}
}
decodeTill(state, maxTag, lazy);
return store;
}
}
function decodeTill(state: DecodeState, tillTag: number, lazy: boolean) {
const { reader, decodedTag, tagToField, store } = state;
if (decodedTag >= tillTag) {
return;
}
while (!reader.eos()) {
const [tag, wire] = reader.readTagAndWire();
const field = tagToField.get(tag);
if (!field) {
reader.skip(wire);
continue;
}
decodeField(reader, wire, field, store, lazy);
if (tag > tillTag) {
return;
}
}
}
function decodeField(reader: Reader, wire: Wire, field: Field, obj: Record<string, any>, lazy: boolean) {
const [name, _, type] = field;
if (wire === Wire.LEN && type !== PBType.bytes && type !== PBType.fq && type !== PBType.map && type !== PBType.string) {
// packed repeated scalar
const arr: any[] = [];
const len = reader.readInt();
const end = reader.offset + len;
while (reader.offset < end) {
arr.push(decodeFieldBasic(reader, field, lazy));
}
obj[name] = arr;
} else if (type === PBType.map) {
// map<K,V>
const currMap = obj[name];
const [key, value] = decodeFieldBasic(reader, field, lazy);
if (key === undefined || value === undefined) {
// pass
} else {
if (currMap instanceof Object) {
currMap[key] = value;
} else if (currMap === undefined) {
obj[name] = { [key]: value };
} else {
throw Error('current decoding message is not map');
}
}
} else {
// scalar | fq
const data = decodeFieldBasic(reader, field, lazy);
const repeated = field[3];
if (repeated) {
const existed = obj[name];
if (existed) {
existed.push(data);
} else {
obj[name] = [data];
}
} else {
obj[name] = data;
}
}
}
function decodeFieldBasic(reader: Reader, field: Field, lazy: boolean): any {
switch (field[2]) {
case PBType.bool:
return reader.readBool();
case PBType.byte:
return reader.readInt();
case PBType.int32:
case PBType.uint32:
return reader.readInt();
case PBType.uint64:
case PBType.int64:
const bi = reader.readBigint();
const type = field[4] ?? 'number';
if (type === 'string') {
return bi.toString();
} else if (type === 'bigint') {
return bi;
} else {
return Number(bi);
}
case PBType.double:
return reader.readDouble();
case PBType.float:
return reader.readFloat();
case PBType.string:
return reader.readString(reader.readInt());
case PBType.bytes:
return reader.readBytes(reader.readInt());
case PBType.fq:
return decodeByFields(reader.readBytes(reader.readInt()), field[4].$f, lazy);
case PBType.map:
const keyField: Field = [...field[3]];
const valueField: Field = [...field[4]];
keyField[0] = 'key';
valueField[0] = 'value';
const { key, value } = decodeByFields(reader.readBytes(reader.readInt()), [keyField, valueField], lazy);
return [key, value];
default:
throw Error(`unknown protobuf type: ${field[2]}`);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment