diff options
-rw-r--r-- | parser.fun | 331 | ||||
-rw-r--r-- | tokenizer.fun | 14 | ||||
-rw-r--r-- | tokenizer.sig | 2 |
3 files changed, 264 insertions, 83 deletions
@@ -59,8 +59,10 @@ functor Parser(structure Tree: TREE; structure P: PPC): PARSER = struct | Nfloat of Real32.real | Ndouble of Real64.real + and id = Lid of int | Gid of int + and expr = - Eid of int | + Eid of int * id option | Econst of int * cnum | Estrlit of int | EmemberByV of int * exprAug | @@ -71,7 +73,7 @@ functor Parser(structure Tree: TREE; structure P: PPC): PARSER = struct Eunop of unop * exprAug | Ebinop of binop * exprAug * exprAug - and exprAug = EA of expr * P.tkPos * ctype + and exprAug = EA of expr * P.tkPos * bool * ctype and binop = BR of binopReg | BinopTernaryIncomplete of exprAug @@ -93,7 +95,7 @@ functor Parser(structure Tree: TREE; structure P: PPC): PARSER = struct pointer_t of int * ctype | function_t of ctype * ctype list | - array_t of ctype + array_t of Word64.word * ctype val (ternaryOpPrio, ternaryOpLeftAssoc) = (2, false) @@ -133,13 +135,6 @@ functor Parser(structure Tree: TREE; structure P: PPC): PARSER = struct FRU.makeUpdate6 (from, from, to) end z - type declaredId = { - id: int, - pos: P.tkPos, - ctype: ctype, - ini: ini option - } - datatype stmt = StmtExpr of exprAug | StmtCompound of (int * ini option) list * stmt list | @@ -159,7 +154,6 @@ functor Parser(structure Tree: TREE; structure P: PPC): PARSER = struct datatype linkage = LinkInternal | LinkExternal datatype declClass = DeclRegular | DeclTentative | DeclDefined - (* datatype id = Lid of int | Gid of int *) type objDef = int * P.tkPos * ctype * ini * linkage @@ -206,7 +200,7 @@ functor Parser(structure Tree: TREE; structure P: PPC): PARSER = struct Id of int * P.tkPos | AbstructRoot of P.tkPos | FuncApp of (int option * P.tkPos * ctype) list | - ArrayApplication + ArrayApplication of Word64.word datatype abstructPolicy = APpermitted | APenforced | APprohibited @@ -273,10 +267,11 @@ functor Parser(structure Tree: TREE; structure P: PPC): PARSER = struct Printf out I plevel A2 pctype true t % else Printf out `"{" I plevel `"} " A2 pctype false t % - | function_t (ret, params) => Printf out - Plist (pctype short) params (if short then "" else ", ", true) - `(if short then "" else " -> ") A2 pctype short ret % - | array_t el => Printf out `"[] " A2 pctype short el % + | function_t (ret, params) => Printf out `"{" + Plist (pctype short) params (if short then "" else ", ", false) + `"}" `(if short then "" else " -> ") A2 pctype short ret % + | array_t (n, el) => + Printf out `"[" `(Word64.toString n) `"]" A2 pctype short el % end val Pctype = fn z => bind A1 (pctype false) z @@ -521,6 +516,29 @@ functor Parser(structure Tree: TREE; structure P: PPC): PARSER = struct Printf out Plist pToken l (",", false) % end + val isIntegral = fn + char_t | uchar_t | short_t | ushort_t | int_t | uint_t + | long_t | ulong_t | longlong_t | ulonglong_t => true + | _ => false + + fun isArith t = + case t of + float_t | double_t => true + | t => isIntegral t + + fun isScalar t = + case t of + pointer_t _ => true + | t => isArith t + + val isFunc = fn + function_t _ => true + | _ => false + + val isPointer = fn + pointer_t _ => true + | _ => false + fun createCtx fname incDirs = Ctx { localScopes = [], localVars = [], @@ -630,12 +648,15 @@ functor Parser(structure Tree: TREE; structure P: PPC): PARSER = struct SOME (_, tk, _, _) => Printf out P.Ptk tk % | NONE => raise Unreachable + and pid (Lid id) out = Printf out `"l" I id % + | pid (Gid _) out = Printf out `"gl" % + and pexpr e out = let fun mem (id, ea) s = Printf out A1 pea ea `s P.? id % in case e of - Eid id => Printf out P.? id % + Eid (nid, id) => Printf out P.? nid `"{" A3 poptN "none" pid id `"}" % | Econst (id, n) => ( case n of Ninteger _ => Printf out P.? id % @@ -662,7 +683,7 @@ functor Parser(structure Tree: TREE; structure P: PPC): PARSER = struct | Eunop (unop, ea) => Printf out A1 Punop unop `" " A1 pea ea % end - and pea (EA (e, _, t)) out = + and pea (EA (e, _, _, t)) out = let fun pType out = Printf out A2 pctype true t % fun exprPrinter e out = @@ -676,7 +697,7 @@ functor Parser(structure Tree: TREE; structure P: PPC): PARSER = struct Printf out A1 exprPrinter e % end - and isTypeInParens tk ctx = + and parseTypeInParens tk ctx = case tk of TkParens list => if isTypeNameStart (#1 $ hd list) handle Empty => false then @@ -712,7 +733,7 @@ functor Parser(structure Tree: TREE; structure P: PPC): PARSER = struct | _ => (NormalPrefix acc, ctx) ) | _ => ( - case isTypeInParens tk ctx' of + case parseTypeInParens tk ctx' of SOME (ctype, ctx) => if #1 (hd acc) = UnopSizeof handle Empty => false then (SizeofType (tl acc, ctype, #2 $ hd acc, ulong_t), ctx) @@ -763,6 +784,8 @@ functor Parser(structure Tree: TREE; structure P: PPC): PARSER = struct | _ => P.clerror pos [P.Cbinop] end + and makeEA e pos = EA (e, pos, false, unknown_t) + and parseFuncCall funcEa pos ctx = let fun collectArgs acc ctx = @@ -776,22 +799,20 @@ functor Parser(structure Tree: TREE; structure P: PPC): PARSER = struct end val (args, ctx) = collectArgs [] ctx in - (SOME $ EA (EfuncCall (funcEa, args), pos, unknown_t), ctx) + (SOME $ makeEA (EfuncCall (funcEa, args)) pos, ctx) end and parseExprSuffix1 eAug ctx = let val (tk, pos1, ctx1) = getTokenCtx ctx - fun formUnop1 unop = - (SOME $ EA (Eunop (unop, eAug), pos1, unknown_t), ctx1) + fun formUnop1 unop = (SOME $ makeEA (Eunop (unop, eAug)) pos1, ctx1) fun formMemberOp unop = let val (tk, pos2, ctx2) = getTokenCtx ctx1 in case tk of - Tk (T.Id id) => - (SOME $ EA (unop (id, eAug), pos1, unknown_t), ctx2) + Tk (T.Id id) => (SOME $ makeEA (unop (id, eAug)) pos1, ctx2) | _ => P.clerror pos2 [P.Cid] end in @@ -804,7 +825,7 @@ functor Parser(structure Tree: TREE; structure P: PPC): PARSER = struct let val ((_, ea), ctx) = ctxWithLayer ctx1 list (parseExpr []) - val ea = EA (Ebinop (BR BrSubscript, eAug, ea), pos1, unknown_t) + val ea = makeEA (Ebinop (BR BrSubscript, eAug, ea)) pos1 in (SOME ea, ctx) end @@ -1004,12 +1025,14 @@ functor Parser(structure Tree: TREE; structure P: PPC): PARSER = struct and parsePrimaryExpr ctx = let val (tk, pos, ctx) = getTokenCtx ctx - fun wrap e = (EA (e, pos, unknown_t), ctx) - fun wrapNum id (t, v) = (EA (Econst (id, v), pos, t), ctx) + fun wrap e = (makeEA e pos, ctx) + fun wrapNum id (t, v) = (EA (Econst (id, v), pos, false, t), ctx) in case tk of - Tk (T.Id id) => wrap $ Eid id - | Tk (T.Strlit id) => wrap $ Estrlit id + Tk (T.Id id) => wrap $ Eid (id, NONE) + | Tk (T.Strlit (id, size)) => + (EA (Estrlit id, pos, false, + array_t (Word64.fromInt size, char_t)), ctx) | Tk (T.CharConst (id, v)) => wrapNum id (int_t, Ninteger v) | Tk (T.Num id) => wrapNum id $ parseNumber pos $ P.?? id | TkParens list => @@ -1026,7 +1049,7 @@ functor Parser(structure Tree: TREE; structure P: PPC): PARSER = struct val (prefix, ctx) = parseUnaryPrefix ctx [] fun applyPrefix prefix ea = List.foldl (fn ((unop, pos, t), e) => - EA (Eunop (unop, e), pos, t)) ea prefix + EA (Eunop (unop, e), pos, false, t)) ea prefix in case prefix of NormalPrefix unopList => @@ -1037,7 +1060,8 @@ functor Parser(structure Tree: TREE; structure P: PPC): PARSER = struct (applyPrefix unopList ea, ctx) end | SizeofType (unopList, ctype, pos, resType) => - (applyPrefix unopList (EA (EsizeofType ctype, pos, resType)), ctx) + (applyPrefix unopList + (EA (EsizeofType ctype, pos, false, resType)), ctx) end and constructExpr parts = @@ -1062,7 +1086,7 @@ functor Parser(structure Tree: TREE; structure P: PPC): PARSER = struct | BinopTernaryIncomplete trueBody => Eternary(left, trueBody, right) in - (EA (head, pos, unknown_t) :: vstack, tl opstack) + (makeEA head pos :: vstack, tl opstack) end fun insert (Q as (binop, pos, p, _)) (vstack, opstack) = @@ -1100,10 +1124,173 @@ functor Parser(structure Tree: TREE; structure P: PPC): PARSER = struct val (eof, parts, ctx) = collect ctx true [] val expr = constructExpr parts + val expr = checkExpr ctx false expr in ((eof, expr), ctx) end + and convAggr sizeofOrAddr t = + if sizeofOrAddr then + t + else + case t of + function_t _ => pointer_t (1, t) + | array_t (_, el_t) => pointer_t (1, el_t) + | _ => t + + and findId (Ctx ctx) pos sizeofOrAddr id = + let + fun findLocal [] = NONE + | findLocal (scope :: scopes) = + let + val res = lookup scope id + in + case res of + SOME lid => + let + val locals = rev o #localVars $ ctx + val t = #3 $ List.nth (locals, lid) + in + SOME (Lid lid, true, t) + end + | NONE => findLocal scopes + end + in + case findLocal $ #localScopes ctx of + SOME p => p + | NONE => + let + val res = lookup (#globalDecls ctx) id + in + case res of + SOME (_, _, t, _) => (Gid id, false, convAggr sizeofOrAddr t) + | NONE => P.error pos `"unknown identifier" % + end + end + + and intRank t = + case t of + char_t | uchar_t => 0 + | short_t | ushort_t => 1 + | int_t | uint_t => 2 + | long_t | ulong_t => 3 + | longlong_t | ulonglong_t => 4 + | _ => raise Unreachable + + and convEA t (E as EA (_, pos, _, _)) = + EA (Eunop (UnopCast, E), pos, false, t) + + and promoteToInt (E as EA (_, _, _, t)) = + if intRank t < 2 then + convEA int_t E + else + E + + and isLvalue (EA (_, _, lvalue, _)) = lvalue + + and getT ea = case ea of EA (_, _, _, t) => t + + and checkUnop check sizeofOrAddr (EA (Eunop (unop, oper), pos, _, t)) = + let + val oper = check (unop = UnopSizeof orelse unop = UnopAddr) oper + fun finish lvalue t = EA (Eunop (unop, oper), pos, lvalue, t) + + val ot = getT oper + + fun toInt () = + let + val oper = promoteToInt oper + in + EA (Eunop (unop, oper), pos, false, getT oper) + end + in + case unop of + UnopPostInc | UnopPostDec | UnopPreInc | UnopPreDec => + raise Unimplemented + | UnopPos | UnopNeg => + if isArith ot then + toInt () + else + P.error pos `"operand of not arithmetic type" % + | UnopComp => + if isIntegral ot then + toInt () + else + P.error pos `"operand of not integral type" % + | UnopLogNeg => + if isScalar ot then + finish false int_t + else + P.error pos `"operand of not scalar type" % + | UnopSizeof => + if isFunc ot then + P.error pos `"sizeof argument has function type" % + else + finish false ulong_t + | UnopAddr => + if isFunc ot orelse isLvalue oper then + EA (Eunop (unop, oper), pos, false, pointer_t (1, getT oper)) + else + P.error pos `"expected function designator or lvalue operand" % + | UnopDeref => ( + case ot of + pointer_t (1, T as function_t _) => + finish false (if sizeofOrAddr then T else ot) + | pointer_t (1, t) => finish true t + | pointer_t (n, t) => finish true (pointer_t (n-1, t)) + | _ => P.error pos `"operand of not pointer type" % + ) + | UnopCast => + if t <> void_t andalso not (isScalar t) then + P.error pos `": cast to not scalar type or void" % + else if not (isScalar ot) then + P.error pos `"operand of not scalar type" % + else + finish false t + end + | checkUnop _ _ _ = raise Unreachable + + and checkSizeofType (EA (E as EsizeofType t, pos, _, _)) = + if isFunc t then + P.error pos `"operand of function type" % + else + EA (E, pos, false, ulong_t) + | checkSizeofType _ = raise Unreachable + + and checkBinop check (EA (Ebinop (binop, left, right), pos, _, t)) = + EA (Ebinop (binop, check left, check right), pos, false, t) + | checkBinop _ _ = raise Unreachable + + and checkFuncCall check (EA (EfuncCall (func, args), pos, _, _)) = + let + (* TODO: check arguments *) + val func = check func + in + case getT func of + pointer_t (1, function_t (rt, _)) => + EA (EfuncCall (func, args), pos, false, rt) + | _ => P.error pos `"expected pointer to function" % + end + | checkFuncCall _ _ = raise Unreachable + + and checkExpr ctx sizeofOrAddr (E as EA (e, pos, _, _)) = + let + val check = checkExpr ctx + in + case e of + Eid (id', _) => + let + val (id, lvalue, t) = findId ctx pos sizeofOrAddr id' + in + EA (Eid (id', SOME id), pos, lvalue, t) + end + | EsizeofType _ => checkSizeofType E + | EfuncCall _ => checkFuncCall (check false) E + | Ebinop (_, _, _) => checkBinop (check false) E + | Eunop (_, _) => checkUnop check sizeofOrAddr E + | _ => E + end + and tryGetSpec ctx = let val (tk, pos, ctx') = getTokenCtx ctx @@ -1164,7 +1351,7 @@ functor Parser(structure Tree: TREE; structure P: PPC): PARSER = struct | Id _ => Printf out `"id" % | AbstructRoot _ => Printf out `":root" % | FuncApp _ => Printf out `"()" % - | ArrayApplication => Printf out `"[]" % + | ArrayApplication _ => Printf out `"[]" % and isTypeNameStart tk = isSome $ List.find @@ -1233,7 +1420,7 @@ functor Parser(structure Tree: TREE; structure P: PPC): PARSER = struct case tk of TkParens list => % ctx' list parseFuncParams parts | TkBrackets _ => - collectDDeclaratorTail (ArrayApplication :: parts) untilEnd ctx' + collectDDeclaratorTail (ArrayApplication 0w0 :: parts) untilEnd ctx' | Tk T.EOS => (parts, ctx) | _ => if untilEnd then @@ -1246,26 +1433,22 @@ functor Parser(structure Tree: TREE; structure P: PPC): PARSER = struct let val (tk, pos, ctx') = getTokenCtx ctx - fun isEOS tk = case tk of Tk T.EOS => true | _ => false + val isEOS = fn Tk T.EOS => true | _ => false val (parts, ctx) = - case tk of - Tk (T.Id id) => - if absPolicy = APenforced then - P.error pos `"unexpected identifier in abstruct declarator" % - else - (Id (id, pos) :: parts, ctx') - | TkParens list => ctxWithLayer ctx' list + case (tk, absPolicy) of + (Tk (T.Id _), APenforced) => + P.error pos `"unexpected identifier in abstract declarator" % + | (Tk (T.Id id), _) => (Id (id, pos) :: parts, ctx') + | (TkParens list, _) => ctxWithLayer ctx' list (parseDeclarator (true, absPolicy) parts) - | _ => ( - case absPolicy of - APprohibited => P.clerror pos [P.Cid, P.Ctk T.LParen] - | _ => - if untilEnd andalso not (isEOS tk) then - P.error pos `"expected abstruct declarator end" % - else - (AbstructRoot pos :: parts, ctx) - ) + | (_, APprohibited) => + P.clerror pos [P.Cid, P.Ctk T.LParen] + | (_, _) => + if untilEnd andalso not (isEOS tk) then + P.error pos `"expected abstruct declarator end" % + else + (AbstructRoot pos :: parts, ctx) in collectDDeclaratorTail parts untilEnd ctx end @@ -1312,7 +1495,13 @@ functor Parser(structure Tree: TREE; structure P: PPC): PARSER = struct | _ => raise Unreachable fun complete (Pointer plevel :: tail) = - pointer_t (plevel, complete tail) + let + val t = complete tail + in + case t of + pointer_t (plevel', t) => pointer_t (plevel' + plevel, t) + | _ => pointer_t (plevel, t) + end | complete (FuncApp params :: tail) = let val () = checkParamUniqueness [] params @@ -1320,7 +1509,7 @@ functor Parser(structure Tree: TREE; structure P: PPC): PARSER = struct in function_t (complete tail, params) end - | complete (ArrayApplication :: tail) = array_t (complete tail) + | complete (ArrayApplication n :: tail) = array_t (n, complete tail) | complete [] = ctype | complete _ = raise Unreachable @@ -1338,12 +1527,6 @@ functor Parser(structure Tree: TREE; structure P: PPC): PARSER = struct | printIni (IniCompound inis) out = Printf out `"{" Plist (printIni) inis (", ", false) `"}" % - fun pDeclId off ({ id, ctype, ini, pos = _}: declaredId) out = - Printf out R off - P.?id `": " Pctype ctype - Popt (fn ini => fn out => - Printf out `" = " A1 printIni ini %) ini `"\n" % - fun dieExpTerms pos terms = P.clerror pos $ map P.Ctk terms fun parseCompoundInitializer ctx = @@ -1399,14 +1582,6 @@ functor Parser(structure Tree: TREE; structure P: PPC): PARSER = struct (status, updateRD rawId s#ini (SOME ini) %, ctx) end - val isFunc = fn - function_t _ => true - | _ => false - - val isScalar = fn - function_t _ | array_t _ => false - | _ => true - fun getLinkage ctx (D as { spec = NONE, t, ... }) = if isFunc t then getLinkage ctx (updateRD D s#spec (SOME SpecExtern) %) @@ -1815,12 +1990,12 @@ functor Parser(structure Tree: TREE; structure P: PPC): PARSER = struct val ctx = updateCtx ctx u#localScopes tl % in - (StmtCompound (rev inits, stmts), ctx) + (StmtCompound (inits, stmts), ctx) end fun pinit off (id, ini) out = Printf out R off - `"%" I id `" = " A3 poptN "alloc" printIni ini `"\n" % + `"%" I id `" <- " A3 poptN "alloc" printIni ini `"\n" % fun pstmt' off (StmtCompound (inits, stmts)) out = Printf out `"{\n" @@ -1877,14 +2052,20 @@ functor Parser(structure Tree: TREE; structure P: PPC): PARSER = struct val paramTypes = case t of function_t (_, ts) => ts | _ => raise Unreachable - fun createLocalVars acc [] [] = acc - | createLocalVars acc (t :: ts) ((id, pos) :: params) = - createLocalVars ((valOf id, pos, t) :: acc) ts params + fun createLocalVars (acc, scope) [] [] = (acc, scope) + | createLocalVars (acc, scope) (t :: ts) ((SOME id, pos) :: params) = + let + val localVar = (id, pos, t) + val (_, scope) = Tree.insert intCompare scope id $ length acc + in + createLocalVars (localVar :: acc, scope) ts params + end | createLocalVars _ _ _ = raise Unreachable - val localVars = createLocalVars [] paramTypes params + val (localVars, scope) = + createLocalVars ([], Tree.empty) paramTypes params in - updateCtx ctx s#localVars localVars s#localScopes [Tree.empty] % + updateCtx ctx s#localVars localVars s#localScopes [scope] % end fun finishLocalVars (Ctx ctx) = Vector.fromList o rev o #localVars $ ctx diff --git a/tokenizer.fun b/tokenizer.fun index 07bd08d..14f4289 100644 --- a/tokenizer.fun +++ b/tokenizer.fun @@ -14,7 +14,7 @@ struct Id of int | CharConst of int * Word64.word | Num of int | - Strlit of int | + Strlit of int * int | kwBreak | kwCase | @@ -280,7 +280,7 @@ struct | PpcInclude (dir, arg) => Printf out `"#include(" `dir `", " `arg `")" % | CharConst (repr, _) => Printf out ?repr % - | Strlit id => Printf out ?id % + | Strlit (id, _) => Printf out ?id % | v => case List.find (fn (x, _) => x = v) tokenRepr of SOME (_, repr) => @@ -606,21 +606,21 @@ struct val startOff = S.getOffset stream - 1 val (pos, stream) = S.getPosRaw startOff stream - fun collect stream = + fun collect size stream = let val (c, stream) = getMaybeBackslashed stream in case c of Reg #"\000" => error pos "unfinished string literal" - | Reg #"\"" => (S.getOffset stream, stream) - | _ => collect stream + | Reg #"\"" => (S.getOffset stream, size, stream) + | _ => collect (size + 1) stream end - val (endOff, stream) = collect stream + val (endOff, size, stream) = collect 0 stream val s = S.getSubstr startOff endOff stream val id = ST.getId symtab s in - (Strlit id, pos, stream) + (Strlit (id, size), pos, stream) end fun parseId symtab _ stream = diff --git a/tokenizer.sig b/tokenizer.sig index 5243642..958619a 100644 --- a/tokenizer.sig +++ b/tokenizer.sig @@ -11,7 +11,7 @@ signature TOKENIZER = sig Id of int | CharConst of int * Word64.word | Num of int | - Strlit of int | + Strlit of int * int | kwBreak | kwCase | |