From a4c60603f61dd1a9f0ce420be9067965586dd694 Mon Sep 17 00:00:00 2001 From: Vladimir Azarov Date: Mon, 4 Aug 2025 22:13:17 +0200 Subject: Object assembly --- ccross.mlb | 2 + ccross.sml | 4 +- driver.fun | 21 ++++---- dynarray.sig | 1 + dynarray.sml | 20 +++++++- emit.fun | 146 +++++++++++++++++++++++++++++++++++++++++++++++++++++ emit.sig | 6 +++ il.fun | 36 ++++++++++++++ il.sig | 15 ++++++ parser.fun | 138 ++++++++++++++++++++++++++++++++++++++++----------- parser.sig | 160 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- tree.sig | 2 + tree.sml | 20 ++++++++ 13 files changed, 529 insertions(+), 42 deletions(-) create mode 100644 emit.fun create mode 100644 emit.sig create mode 100644 il.fun create mode 100644 il.sig diff --git a/ccross.mlb b/ccross.mlb index 3a4231c..d732b5e 100644 --- a/ccross.mlb +++ b/ccross.mlb @@ -18,6 +18,8 @@ in tokenizer.sig tokenizer.fun ppc.sig ppc.fun parser.sig parser.fun + il.sig il.fun + emit.sig emit.fun driver.sig driver.fun diff --git a/ccross.sml b/ccross.sml index d3dcf3c..46851e9 100644 --- a/ccross.sml +++ b/ccross.sml @@ -6,7 +6,9 @@ structure ccross:> CCROSS = struct structure Parser:> PARSER = Parser(structure Tree = Tree; structure P = ppc; structure D = Dynarray) - structure D:> DRIVER = Driver(Parser) + structure IL:> IL = IL(Parser) + structure Emit:> EMIT = Emit(IL) + structure D:> DRIVER = Driver(Emit) structure ExnHandler:> EXN_HANDLER = ExnHandler end diff --git a/driver.fun b/driver.fun index ac9f950..0f80f41 100644 --- a/driver.fun +++ b/driver.fun @@ -1,5 +1,6 @@ -functor Driver(P: PARSER): DRIVER = struct - structure P = P +functor Driver(E: EMIT): DRIVER = struct + structure I = E.I + structure P = E.I.P datatype execMode = Normal | DebugE | DebugT @@ -56,17 +57,19 @@ functor Driver(P: PARSER): DRIVER = struct fun collect ctx = let - val result = P.parseDef ctx + val (continue, ctx) = P.parseDef ctx in - case result of - NONE => () - | SOME (def, ctx) => ( - P.printDef def; + if continue then collect ctx - ) + else + P.finalize ctx end + + val parseCtx = collect parseCtx + val progInfo = P.explode parseCtx + val ilCtx = I.createCtx progInfo in - collect parseCtx + E.emit "/tmp/prog.s" ilCtx end | DebugT => P.P.T.debugPrint file | DebugE => P.P.debugPrint file (#includeDirs config) diff --git a/dynarray.sig b/dynarray.sig index 0aa8f0b..8e2da7c 100644 --- a/dynarray.sig +++ b/dynarray.sig @@ -12,4 +12,5 @@ signature DYNARRAY = sig val reset: 'a t -> unit val toVec: 'a t -> 'a vector + val appi: (int * 'a -> unit) -> 'a t -> unit end diff --git a/dynarray.sml b/dynarray.sml index b052989..371362e 100644 --- a/dynarray.sml +++ b/dynarray.sml @@ -17,7 +17,8 @@ structure Dynarray: DYNARRAY = struct let val (len, arr) = !dynarr in - if len = Array.length arr then + case Int.compare (len, Array.length arr) of + EQUAL => let val arr2 = Array.array (len * 2, NONE) in @@ -25,9 +26,11 @@ structure Dynarray: DYNARRAY = struct dynarr := (len, arr2); push dynarr v end - else + | LESS => ( Array.update (arr, len, SOME v); dynarr := (len + 1, arr) + ) + | GREATER => raise Unreachable end fun get dynarr n = @@ -70,4 +73,17 @@ structure Dynarray: DYNARRAY = struct in Vector.fromList l end + + fun appi f dynarr = + let + val (len, arr) = !dynarr + + fun loop idx = + if idx = len then + () + else + f (idx, valOf $ Array.sub (arr, idx)) + in + loop 0 + end end diff --git a/emit.fun b/emit.fun new file mode 100644 index 0000000..cb77061 --- /dev/null +++ b/emit.fun @@ -0,0 +1,146 @@ +functor Emit(I: IL) = struct + structure I = I + + structure P = I.P + structure D = P.D + structure PP = P.P + + val file = ref NONE + + local + fun output s = + let + val outstream = valOf $ !file + in + TextIO.output (outstream, s) + end + + val ctx = ((false, makePrintfBase output), + fn (_: bool * ((string -> unit) * (unit -> unit))) => ()) + in + fun fprint g = Fold.fold ctx g + end + + fun fprintt g = fprint `"\t" g + + fun fprinttn g = + fprintt (fn (a, _) => g (a, fn (_, out) => (Printf out `"\n" %))) + + fun handleBSS objsZI = + let + val () = fprint `"section .bss\n" % + + fun handleObj (id, _, t, _, _) = + let + val align = P.alignOfType t + val size = P.sizeOfType t + in + fprinttn `"align\t" W align %; + fprint PP.? id `":\tresb " W size `"\n" % + end + in + List.app handleObj objsZI + end + + fun dd size w = + let + val cmd = + case size of + 0w1 => "db" + | 0w2 => "dw" + | 0w4 => "dd" + | 0w8 => "dq" + | _ => raise Unreachable + in + fprint `cmd `" " W w % + end + + fun emitScalarIni size w = ( + fprint `"\t" %; dd size w; fprint `"\n" % + ) + + fun emitAggrLayout id = + let + val (_, size, layout) = D.get P.iniLayouts id + + val () = fprint `"\n" % + + fun getPadding offset t [] = size - (offset + P.sizeOfType t) + | getPadding offset t ({ offset = offset', ... } :: _) = + offset' - (offset + P.sizeOfType t) + + fun emitScalars ({ offset, t, value } :: tail) = + let + val () = fprint `"\t" % + val () = dd (P.sizeOfType t) value + val padding = getPadding offset t tail + in + if padding > 0w0 then + fprint `"\n\tresb " W padding `"\n" % + else + fprint `"\n" %; + emitScalars tail + end + | emitScalars [] = () + in + emitScalars layout + end + + fun handleData objs = + let + val () = fprint `"section .data\n" % + + fun emitLayout (id, _, t, ini, _) = + let + val align = P.alignOfType t + val size = P.sizeOfType t + val () = fprinttn `"align\t" W align % + val () = fprint PP.? id `":" % + in + case ini of + P.CiniConst w => emitScalarIni size w + | P.CiniLayout id => emitAggrLayout id + | P.CiniExpr _ => raise Unreachable + end + in + List.app emitLayout objs + end + + fun handleStrlits strlits = + let + fun f id = fprint `".S" I id `":\tdb " `(PP.?? id) `", 0\n" % + in + fprint `"\n" %; + List.app f strlits + end + + fun handleLocalIniLayouts () = + let + fun f (_, (true, _, _)) = () + | f (n, (false, _, _)) = ( + fprint `"\talign 16\n" %; + fprint `".I" I n `":" %; + emitAggrLayout n + ) + in + D.appi f P.iniLayouts + end + + fun openFile fname = file := SOME (TextIO.openOut fname) + + fun emit fname + (I.Ctx { globSyms, extSyms, objsZI, objs, strlits, ... }) = + let + val () = openFile fname + + val () = List.app (fn gs => fprint `"global " PP.? gs `"\n" %) globSyms + val () = List.app (fn es => fprint `"extern " PP.? es `"\n" %) extSyms + + val () = handleBSS objsZI + val () = handleData objs + val () = handleStrlits strlits + val () = handleLocalIniLayouts () + in + () + end +end diff --git a/emit.sig b/emit.sig new file mode 100644 index 0000000..9b0d88d --- /dev/null +++ b/emit.sig @@ -0,0 +1,6 @@ +signature EMIT = sig + + structure I: IL + + val emit: string -> I.ctx -> unit +end diff --git a/il.fun b/il.fun new file mode 100644 index 0000000..bcf4f29 --- /dev/null +++ b/il.fun @@ -0,0 +1,36 @@ +functor IL(P: PARSER) = struct + + structure P = P + structure PP = P.P + + datatype ctx = Ctx of { + objs: P.objDef list, + objsZI: P.objDef list, + extSyms: P.nid list, + globSyms: P.nid list, + funcs: P.funcInfo list, + strlits: int list + } + + fun createCtx ({ ext, glob, objsZI, objs, funcs, strlits }) = + Ctx { objs, objsZI, extSyms = ext, globSyms = glob, funcs, strlits } + + fun updateCtx (Ctx ctx) = fn z => + let + fun from objs objsZI extSyms globSyms funcs strlits = + { objs, objsZI, extSyms, globSyms, funcs, strlits } + fun to f { objs, objsZI, extSyms, globSyms, funcs, strlits } = + f objs objsZI extSyms globSyms funcs strlits + in + FRU.makeUpdate6 (from, from, to) ctx (fn (a, f) => z (a, Ctx o f)) + end + + fun register ctx (P.Objects revObjs) = + updateCtx ctx u#objs (fn objs => List.revAppend (revObjs, objs)) % + | register ctx (P.Definition _) = ctx + + (* + type objDef = int * P.tkPos * ctype * cini * linkage + type decl = P.tkPos * declClass * ctype * linkage + *) +end diff --git a/il.sig b/il.sig new file mode 100644 index 0000000..767c9b5 --- /dev/null +++ b/il.sig @@ -0,0 +1,15 @@ +signature IL = sig + + structure P: PARSER + + datatype ctx = Ctx of { + objs: P.objDef list, + objsZI: P.objDef list, + extSyms: P.nid list, + globSyms: P.nid list, + funcs: P.funcInfo list, + strlits: int list + } + + val createCtx: P.progInfo -> ctx +end diff --git a/parser.fun b/parser.fun index c0c9eaf..6081449 100644 --- a/parser.fun +++ b/parser.fun @@ -3,6 +3,7 @@ functor Parser(structure Tree: TREE; structure P: PPC; structure P = P structure T = P.T + structure D = D type nid = int @@ -143,7 +144,7 @@ functor Parser(structure Tree: TREE; structure P: PPC; datatype ini = IniExpr of exprAug | IniCompound of ini list datatype cini = CiniExpr of exprAug | CiniConst of word | - CiniLayout of { offset: word, t: ctype, value: word } list + CiniLayout of int datatype storageSpec = SpecTypedef | @@ -238,14 +239,20 @@ functor Parser(structure Tree: TREE; structure P: PPC; datatype taggedBody = EnumBody of (nid * P.tkPos * int) list | AggrBody of (nid * ctype) list + type decl = P.tkPos * declClass * ctype * linkage + datatype globalSym = - GsDecl of P.tkPos * declClass * ctype * linkage | + GsDecl of decl | GsEnumConst of int | GsTypedef of int val localVars: { name: nid, pos: P.tkPos, onStack: bool, t: ctype } D.t = D.create0 () + val iniLayouts: + (bool * word * { offset: word, t: ctype, value: word } list) D.t = + D.create0 () + datatype ctx = Ctx of { aggrTypeNames: scope, @@ -256,7 +263,10 @@ functor Parser(structure Tree: TREE; structure P: PPC; tokenBuf: P.t * (token * P.tkPos) list list, - loopLevel: int + loopLevel: int, + + defs: def list, + strlits: int list } val intCompare = fn a => fn b => Int.compare (a, b) @@ -266,17 +276,18 @@ functor Parser(structure Tree: TREE; structure P: PPC; fun updateCtx (Ctx ctx) = fn z => let fun from aggrTypeNames localScopes funcRetType globalSyms - tokenBuf loopLevel + tokenBuf loopLevel defs strlits = - { aggrTypeNames, localScopes, - funcRetType, globalSyms, tokenBuf, loopLevel } + { aggrTypeNames, localScopes, funcRetType, globalSyms, + tokenBuf, loopLevel, defs, strlits } - fun to f { aggrTypeNames, localScopes, - funcRetType, globalSyms, tokenBuf, loopLevel } + fun to f { aggrTypeNames, localScopes, funcRetType, globalSyms, + tokenBuf, loopLevel, defs, strlits } = - f aggrTypeNames localScopes funcRetType globalSyms tokenBuf loopLevel + f aggrTypeNames localScopes funcRetType globalSyms tokenBuf + loopLevel defs strlits in - FRU.makeUpdate6 (from, from, to) ctx (fn (a, f) => z (a, Ctx o f)) + FRU.makeUpdate8 (from, from, to) ctx (fn (a, f) => z (a, Ctx o f)) end datatype declParts = @@ -703,12 +714,13 @@ functor Parser(structure Tree: TREE; structure P: PPC; fun createCtx fname incDirs = Ctx { aggrTypeNames = Tree.empty, - localScopes = [], funcRetType = NONE, globalSyms = Tree.empty, tokenBuf = (P.create { fname, incDirs, debugMode = false }, []), - loopLevel = 0 + loopLevel = 0, + defs = [], + strlits = [] } fun loopWrapper ctx f = @@ -1220,8 +1232,12 @@ functor Parser(structure Tree: TREE; structure P: PPC; case tk of Tk (T.Id id) => wrap $ Eid (id, NONE) | Tk (T.Strlit (id, size)) => - (EA (Estrlit id, pos, false, - array_t (Word64.fromInt size, char_t)), ctx) + let + val ctx = updateCtx ctx u#strlits (fn l => id :: l) % + in + (EA (Estrlit id, pos, false, + array_t (Word64.fromInt size, char_t)), ctx) + end | Tk (T.CharConst (id, v)) => wrapNum id (int_t, Ninteger v) | Tk (T.Num id) => wrapNum id $ parseNumber pos $ P.?? id | TkParens list => @@ -1860,7 +1876,7 @@ functor Parser(structure Tree: TREE; structure P: PPC; | Eunop (_, _) => checkUnop check under E | EmemberByV _ => checkMemberAccessByV (check UNone) E | EmemberByP _ => checkMemberAccessByP (check UNone) E - | Econst _ | Estrlit _ => E + | Econst _ => E | Estrlit _ => E end and tryGetTypedefName (Ctx ctx) id = @@ -2813,8 +2829,9 @@ functor Parser(structure Tree: TREE; structure P: PPC; fun printIni _ (CiniExpr ea) out = Printf out A1 pea ea % | printIni _ (CiniConst w) out = Printf out W w % - | printIni off (CiniLayout layout) out = + | printIni off (CiniLayout id) out = let + val (_, _, layout) = D.get iniLayouts id fun pentry ({ offset, t, value }) out = Printf out R off `"\t" W offset `": " Pctype t `": " W value `"\n" % @@ -3168,6 +3185,7 @@ functor Parser(structure Tree: TREE; structure P: PPC; fun canonExprIni toplev t ea = if toplev then let + val () = printf `"Here\n" % val w = eval ea t in CiniConst w @@ -3193,8 +3211,11 @@ functor Parser(structure Tree: TREE; structure P: PPC; let val layout = calcOffsets 0w0 $ LcAux (0w0, computeTLayout t) val seq = flattenIni pos layout ini + + val id = D.length iniLayouts + val () = D.push iniLayouts (toplev, sizeOfType t, seq) in - CiniLayout seq + CiniLayout id end end @@ -3356,15 +3377,11 @@ functor Parser(structure Tree: TREE; structure P: PPC; val (prefix, ctx) = parseDeclPrefix ctx fun finishNormal acc = - let - val acc = rev acc - in if toplev then ObjDefs $ map (fn ToplevId v => v | _ => raise Unreachable) acc else LocalVarInits $ map (fn LocalId v => v | _ => raise Unreachable) - acc - end + (rev acc) fun collectDeclarators acc ctx = let @@ -3812,21 +3829,86 @@ functor Parser(structure Tree: TREE; structure P: PPC; Vector.appi (fn (i, var) => printf A2 pLocalVar i var %) localVars end + type decl = P.tkPos * declClass * ctype * linkage + + fun ctxAddDef ctx def = updateCtx ctx u#defs (fn l => def :: l) % + + type objDef = int * P.tkPos * ctype * cini * linkage + + fun finalize (C as Ctx { globalSyms, ... }) = + let + fun f id (GsDecl (pos, DeclTentative, t, linkage)) acc = + (id, pos, t, CiniLayout (~1), linkage) :: acc + | f _ _ acc = acc + + fun ch (GsDecl (pos, DeclTentative, t, linkage)) = + (GsDecl (pos, DeclDefined, t, linkage)) + | ch v = v + + val promoted = Tree.traverse globalSyms f [] + val globalSyms = Tree.changeV globalSyms ch + in + updateCtx C u#defs (fn l => Objects promoted :: rev l) + s#globalSyms globalSyms % + end + + type progInfo = { + ext: nid list, + glob: nid list, + objsZI: objDef list, + objs: objDef list, + funcs: funcInfo list, + strlits: int list + } + + fun explode (Ctx { globalSyms, defs, strlits, ... }) = + let + fun findExtAndGlob id (GsDecl (_, declType, _, LinkExternal)) + (ext, glob) + = ( + case declType of + DeclRegular => (id :: ext, glob) + | DeclDefined => (ext, id :: glob) + | DeclTentative => raise Unreachable + ) + | findExtAndGlob _ _ acc = acc + + val (ext, glob) = Tree.traverse globalSyms findExtAndGlob ([], []) + val objsZI = + case hd defs of + Objects objs => objs + | _ => raise Unreachable + + fun partition (objs, funcDefs) (Objects obj :: tail) = + partition (List.revAppend (obj, objs), funcDefs) tail + | partition (objs, funcDefs) (Definition fi :: tail) = + partition (objs, fi :: funcDefs) tail + | partition (objs, funcDefs) [] = (rev objs, rev funcDefs) + + val (objs, funcs) = partition ([], []) (tl defs) + in + { ext, glob, objsZI, objs, funcs, strlits } + end + fun parseDef ctx = let val (tk, _, _) = getTokenCtx ctx in case tk of - Tk T.EOS => NONE + Tk T.EOS => (false, ctx) | _ => let - val (toplev, ctx) = parseDeclaration ctx + val (toplev: toplev, ctx) = parseDeclaration ctx in - SOME (case toplev of - ObjDefs objDefList => (Objects objDefList, ctx) + case toplev of + ObjDefs objDefList => (true, ctxAddDef ctx (Objects objDefList)) | FuncDef (id, body) => - ctxWithLayer ctx body (parseFuncDefinition id) - | LocalVarInits _ => raise Unreachable) + let + val (def, ctx) = ctxWithLayer ctx body (parseFuncDefinition id) + in + (true, ctxAddDef ctx def) + end + | LocalVarInits _ => raise Unreachable end end end diff --git a/parser.sig b/parser.sig index 72def5f..3b93825 100644 --- a/parser.sig +++ b/parser.sig @@ -1,11 +1,167 @@ signature PARSER = sig structure P: PPC + structure D: DYNARRAY type ctx - type def + + type nid = int + + datatype ctype = + unknown_t | + void_t | + char_t | + uchar_t | + short_t | + ushort_t | + int_t | + uint_t | + long_t | + ulong_t | + longlong_t | + ulonglong_t | + + pointer_t of int * ctype | + function_t of ctype * ctype list | + array_t of Word64.word * ctype | + struct_t of + { name: nid, size: word, alignment: word, + fields: (nid * word * ctype) list } | + union_t of + { name: nid, size: word, alignment: word, + fields: (nid * word * ctype) list } | + + enum_t of nid * bool | (* is complete? *) + remote_t of int + + datatype unop = + UnopPreInc | + UnopPreDec | + UnopAddr | + UnopDeref | + UnopPos | + UnopNeg | + UnopComp | + UnopLogNeg | + UnopSizeof | + UnopCast | + + UnopPostInc | + UnopPostDec + + and binopReg = + BrSubscript | + + BrMul | + BrDiv | + BrMod | + BrSum | + BrSub | + BrShiftLeft | + BrShiftRight | + BrGreater | + BrLess | + BrLessEqual | + BrGreaterEqual | + BrEqual | + BrNotEqual | + BrBitAnd | + BrBitXor | + BrBitOr | + BrLogAnd | + BrLogOr | + + BrAssign | + BrMulAssign | + BrDivAssign | + BrModAssign | + BrSumAssign | + BrSubAssign | + BrLeftShiftAssign | + BrRightShiftAssign | + BrBitAndAssign | + BrBitXorAssign | + BrBitOrAssign | + + BrComma + + and cnum = + Ninteger of Word64.word + | Nfloat of Real32.real + | Ndouble of Real64.real + + and id = Lid of int | Gid of int + + and expr = + Eid of int * id option | + Econst of int * cnum | + Estrlit of int | + EmemberByV of exprAug * int | + EmemberByP of exprAug * int | + EfuncCall of exprAug * exprAug list | + Eternary of exprAug * exprAug * exprAug | + EsizeofType of ctype | + Eunop of unop * exprAug | + Ebinop of binop * exprAug * exprAug + + and exprAug = EA of expr * P.tkPos * bool * ctype + + and binop = BR of binopReg | BinopTernaryIncomplete of exprAug + + datatype linkage = LinkInternal | LinkExternal + + val iniLayouts: + (bool * word * { offset: word, t: ctype, value: word } list) D.t + + datatype cini = CiniExpr of exprAug | CiniConst of word | + CiniLayout of int + + type objDef = int * P.tkPos * ctype * cini * linkage + + datatype stmt = + StmtExpr of exprAug | + StmtCompound of (int * cini option) list * stmt list | + StmtIf of exprAug * stmt * stmt option | + StmtFor of exprAug option * exprAug option * exprAug option * stmt | + StmtWhile of exprAug * stmt | + StmtDoWhile of stmt * exprAug | + StmtReturn of exprAug option | + StmtBreak | + StmtContinue + + type funcInfo = { + name: int, + pos: P.tkPos, + t: ctype, + paramNum: int, + localVars: { name: nid, pos: P.tkPos, onStack: bool, t: ctype } vector, + stmt: stmt + } + + datatype declClass = DeclRegular | DeclTentative | DeclDefined + + type decl = P.tkPos * declClass * ctype * linkage + + (* Objects are in reverse order *) + datatype def = Objects of objDef list | Definition of funcInfo val createCtx: string -> string list -> ctx - val parseDef: ctx -> (def * ctx) option + val parseDef: ctx -> bool * ctx val printDef: def -> unit + + val alignOfType: ctype -> word + val sizeOfType: ctype -> word + + val finalize: ctx -> ctx + + type progInfo = { + ext: nid list, + glob: nid list, + objsZI: objDef list, + objs: objDef list, + funcs: funcInfo list, + strlits: int list + } + + val explode: ctx -> progInfo end diff --git a/tree.sig b/tree.sig index 8967f83..30cf52a 100644 --- a/tree.sig +++ b/tree.sig @@ -14,5 +14,7 @@ signature TREE = sig ('v option -> 'a * 'v option) -> 'a * ('k, 'v) t val print: ('k, 'v) t -> ('k -> string) -> ('v -> string) -> unit + val traverse: ('k, 'v) t -> ('k -> 'v -> 'a -> 'a) -> 'a -> 'a + val changeV: ('k, 'v) t -> ('v -> 'v) -> ('k, 'v) t val size: ('k, 'v) t -> int end diff --git a/tree.sml b/tree.sml index ee82485..7209690 100644 --- a/tree.sml +++ b/tree.sml @@ -126,6 +126,26 @@ structure Tree: TREE = struct print' 0 t end + fun traverse Empty _ acc = acc + | traverse (Node (k, v, left, right)) f acc = + let + val acc = traverse left f acc + val acc = f k v acc + val acc = traverse right f acc + in + acc + end + + fun changeV Empty _ = Empty + | changeV (Node (k, v, left, right)) f = + let + val left = changeV left f + val right = changeV right f + val v = f v + in + Node (k, v, left, right) + end + fun size Empty = 0 | size (Node(_, _, l, r)) = 1 + size l + size r end -- cgit v1.2.3