diff options
author | Vladimir Azarov <avm@intermediate-node.net> | 2025-05-12 01:51:27 +0200 |
---|---|---|
committer | Vladimir Azarov <avm@intermediate-node.net> | 2025-05-12 01:51:27 +0200 |
commit | 52a6f8656e8a600a2c59fa2802fb46fafb30de45 (patch) | |
tree | 72511efdccc742709f40e52ca73b708a0c74c1c6 | |
parent | e99a8dc48ede26696be2ba75a8cb0d5122d94598 (diff) |
Object-like macros
-rw-r--r-- | ccross.mlb | 2 | ||||
-rw-r--r-- | ccross.sml | 6 | ||||
-rw-r--r-- | common.sml | 92 | ||||
-rw-r--r-- | cpp.fun | 286 | ||||
-rw-r--r-- | cpp.sig | 30 | ||||
-rw-r--r-- | driver.fun | 7 | ||||
-rw-r--r-- | exn_handler.fun | 5 | ||||
-rw-r--r-- | ppc.fun | 300 | ||||
-rw-r--r-- | ppc.sig | 31 | ||||
-rw-r--r-- | stream.sig | 36 | ||||
-rw-r--r-- | stream.sml | 159 | ||||
-rw-r--r-- | tokenizer.fun | 133 | ||||
-rw-r--r-- | tokenizer.sig | 11 |
13 files changed, 561 insertions, 537 deletions
@@ -9,7 +9,7 @@ in stream.sig stream.sml hashtable.sig hashtable.sml tokenizer.sig tokenizer.fun - cpp.sig cpp.fun + ppc.sig ppc.fun exn_handler.sig exn_handler.fun driver.sig driver.fun @@ -2,12 +2,12 @@ structure ccross:> CCROSS = struct structure T:> TOKENIZER = Tokenizer(structure H = Hashtable; structure S = Stream) - structure P:> CPP = Cpp(T) + structure ppc:> PPC = ppc(structure H = Hashtable; structure T = T) - structure D:> DRIVER = Driver(P) + structure D:> DRIVER = Driver(ppc) structure ExnHandler:> EXN_HANDLER = - ExnHandler(structure T = T; structure P = P) + ExnHandler(structure T = T; structure P = ppc) end val () = MLton.Exn.setTopLevelHandler ccross.ExnHandler.handler @@ -1,9 +1,35 @@ -exception Unreachable +exception Unreachable and Unimplemented fun $ (x, y) = x y infixr 0 $ -fun printLn s = (print s; print "\n") +fun assert truth = if not truth then raise Unreachable else () + +local + val lastIsNL = ref true + + fun endsWith s = + String.sub (s, String.size s - 1) = #"\n" handle + Subscript => raise Unreachable + + val print' = print +in + fun print s = ( + lastIsNL := endsWith s; + print' s + ) + + fun printLn s = ( + lastIsNL := true; + print' s; print "\n" + ) + + fun printOnNL s = ( + if not $ !lastIsNL then print' "\n" else (); + print' s; + lastIsNL := endsWith s + ) +end (* All global values which computations may raise an exception must be * wrapped in lazy, so that no exception is thrown before custom @@ -30,3 +56,65 @@ in | Evaluated v => v | Exn e => raise e end + +structure FRU = struct + fun fold (a, f) g = g (a, f) + fun step0 h (a, f) = fold (h a, f) + fun step2 h (a, f) b c = fold (h (b, c, a), f) + + fun next g (f, z) x = g (f x, z) + fun f1 (f, z) x = f (z x) + fun f2 z = next f1 z + fun f3 z = next f2 z + fun f4 z = next f3 z + fun f5 z = next f4 z + fun f6 z = next f5 z + fun f7 z = next f6 z + fun f8 z = next f7 z + fun f9 z = next f8 z + + fun c0 from = from + fun c1 from = c0 from f1 + fun c2 from = c1 from f2 + fun c3 from = c2 from f3 + fun c4 from = c3 from f4 + fun c5 from = c4 from f5 + fun c6 from = c5 from f6 + fun c7 from = c6 from f7 + fun c8 from = c7 from f8 + fun c9 from = c8 from f9 + + fun makeUpdate cX (from, from', to) record = + let + fun ops () = cX from' + fun vars f = to f record + in + fold ((vars, ops), fn (vars, _) => vars from) + end + fun makeUpdate0 z = makeUpdate c0 z + fun makeUpdate1 z = makeUpdate c1 z + fun makeUpdate2 z = makeUpdate c2 z + fun makeUpdate3 z = makeUpdate c3 z + fun makeUpdate4 z = makeUpdate c4 z + fun makeUpdate5 z = makeUpdate c5 z + fun makeUpdate6 z = makeUpdate c6 z + fun makeUpdate7 z = makeUpdate c7 z + fun makeUpdate8 z = makeUpdate c8 z + fun makeUpdate9 z = makeUpdate c9 z + + fun upd z = step2 + (fn (s, f, (vars, ops)) => + (fn out => vars (s (ops ()) (out, f)), ops)) z + fun set z = step2 + (fn (s, v, (vars, ops)) => + (fn out => vars (s (ops ()) (out, fn _ => v)), ops)) z + + fun set2 s v = step0 + (fn (vars, ops) => (fn out => vars (s (ops ()) (out, fn _ => v)), ops)) + fun upd2 s f = step0 + (fn (vars, ops) => (fn out => vars (s (ops ()) (out, f)), ops)) +end + +fun % (a, f) = f a +val s = FRU.set +val u = FRU.upd diff --git a/cpp.fun b/cpp.fun deleted file mode 100644 index 8884a59..0000000 --- a/cpp.fun +++ /dev/null @@ -1,286 +0,0 @@ -functor Cpp(T: TOKENIZER): CPP = struct - - structure T = T - - type tkPos = T.S.pos - type t = - { streams: T.S.t list, fileInfo: T.S.fileInfo list, - lastPos: tkPos option, firstId: T.S.fileId, - incDirs: string list } - - datatype tkExp = - Tk of T.token | - Id | - NumConst | - StrLiteral | - UnOp | - BinOp | - Op - - type tkExpectedVal = string * tkExp list - - exception StreamTooOld - exception TkExpected of tkExpectedVal - - type tkErrorVal = string * string - exception TkError of tkErrorVal - - fun create fname incDirs = - let - val stream = T.S.create fname - val info = T.S.convert stream - in - { streams = [stream] , fileInfo = [info], lastPos = NONE, - firstId = #1 info, incDirs } - end - - fun getLastPos ({ lastPos = NONE, ... }: t) = raise Unreachable - | getLastPos { lastPos = SOME p, ... } = p - - fun getTopFileInfo ({ streams = top :: _, ... }: t) = T.S.convert top - | getTopFileInfo _ = raise Unreachable - - val tkExp2str = fn - (Tk tk) => T.token2str tk - | Id: tkExp => "identifier" - | NumConst => "numeric constant" - | StrLiteral => "string literal" - | UnOp => "unary operator" - | BinOp => "binary operator" - | Op => "operator" - - fun tkPos2str stream (id, pos) = - let - val fileInfo = - case List.find (fn (id', _, _) => id' = id) $ #fileInfo stream of - NONE => raise StreamTooOld - | SOME fileInfo => fileInfo - in - T.S.ppos2str $ T.S.pos2pposWithFI (id, pos) fileInfo - end - - fun prepTkError stream pos msg = raise TkError (tkPos2str stream pos, msg) - fun prepLastTkError stream = prepTkError stream (getLastPos stream) - - fun prepTkExpected (stream: t) pos expList = - raise TkExpected (tkPos2str stream pos, expList) - fun prepLastTkExpected stream = prepTkExpected stream - (getLastPos stream) - - fun tkExpectedPrint (pos, expList) = - let - fun tkExps2str [e] [] = tkExp2str e - | tkExps2str [e] acc = - (String.concatWith ", " acc ^ " or ") ^ tkExp2str e - | tkExps2str (e :: ec) acc = - tkExps2str ec (tkExp2str e :: acc) - | tkExps2str [] _ = raise Unreachable - in - print pos; - print ":expected "; - printLn $ tkExps2str expList [] - end - - fun tkErrorPrint (pos, msg) = printLn $ pos ^ ": " ^ msg - - exception EmptyPath - - fun getDirOfCurFile ({ streams = top :: _, ... }: t) = - OS.Path.getParent $ T.S.getFname top - | getDirOfCurFile _ = raise Unreachable - - - (* TODO: properly handle Size and Path exceptions from concat *) - fun findPath _ "" _ = raise EmptyPath - | findPath stream fname true = - let - val path = OS.Path.concat (getDirOfCurFile stream, fname) - in - (path, TextIO.openIn path) - end - | findPath stream fname false = - let - fun try (dir :: tail) = - let - val fname = OS.Path.concat (dir, fname) - in - SOME (fname, TextIO.openIn fname) handle _ => try tail - end - | try [] = NONE - - val instream = try (#incDirs stream) - in - case instream of - NONE => findPath stream fname true - | SOME pair => pair - end - - fun predSkip stream pred = - let - val (c, stream) = T.S.getchar stream - in - case c of - NONE => (NONE, stream) - | SOME c => - if c = #"\n" then - (NONE, stream) - else if pred c then - (SOME $ #2 $ T.S.getPosAfterCharRead stream, stream) - else - predSkip stream pred - end - - fun tryCollect stream = - let - val top = hd $ #streams stream - - fun x f = f stream - - fun returnBack top = { - streams = top :: tl (#streams stream), - fileInfo = x#fileInfo, - lastPos = x#lastPos, - firstId = x#firstId, - incDirs = x#incDirs - } - - val (start, s) = - case predSkip top (fn c => c = #"<") of - (NONE, _) => raise Unreachable - | (SOME p, s) => (p, s) - - val (res, s) = predSkip s (fn c => c = #">") - in - case res of - NONE => (NONE, stream) - | SOME endOff => - (SOME $ T.S.getSubstr (start + 1) endOff s, returnBack s) - end - - fun getToken - ({ streams = stream :: tail, fileInfo, lastPos, firstId, incDirs }: t) = - let - val (tk, stream) = T.getToken stream - in - case tk of - NONE => getToken { streams = tail, fileInfo, lastPos, firstId, incDirs } - | SOME (pos, tk) => - if tk = T.CppInclude then - handleInclude { streams = stream :: tail, fileInfo, - lastPos, firstId, incDirs } - else - (tk, { streams = stream :: tail, fileInfo, - lastPos = SOME pos, firstId, incDirs }) - end - | getToken - { streams = [], fileInfo, lastPos = SOME lastPos, firstId, incDirs } = - let - val pos = SOME (#1 lastPos, ~1) (* EOF *) - in - (T.EOS, {streams = [], fileInfo, lastPos = pos, firstId, incDirs }) - end - | getToken { streams = [], fileInfo, lastPos = NONE, firstId, incDirs } = - (T.EOS, { streams = [], fileInfo, - lastPos = SOME (firstId, ~1), firstId, incDirs }) - - and handleInclude (stream: t) = - let - val (tk, streamNew) = getToken stream - - fun die () = - prepLastTkError streamNew - "#include with macro argument is not implemented" - in - case tk of - T.StringConst path => includeFile streamNew path true - | T.EOS => die () - | tk => - if String.sub (T.token2str tk, 0) = #"<" then - let - val (path, stream) = tryCollect stream - in - case path of - SOME path => includeFile stream path false - | NONE => die () - end - else - die () - end - - and includeFile stream fname localhdr = - let - val (fname, instream) = findPath stream fname localhdr - handle EmptyPath => - prepLastTkError stream "#include path can not be empty" - val newStream = T.S.createFromInstream fname instream - val newFileInfo = T.S.convert newStream - in - getToken { - streams = newStream :: #streams stream, - fileInfo = newFileInfo :: #fileInfo stream, - lastPos = #lastPos stream, - firstId = #firstId stream, - incDirs = #incDirs stream - } - end - - - fun debugPrintToken cache tk (line, col) printLineRegardless = - let - val ` = Int.toString - in - if printLineRegardless orelse T.S.pposCacheGetLine cache <> line then - print $ "\n" ^ T.S.pposCacheGetFname cache ^ ":" ^ `line ^ "\n\t" - else - (); - print $ `col ^ ":" ^ T.token2str tk ^ " " - end - - fun adjustCacheStack (s as (top :: rest)) pos stream = - let - fun idMatches cache = T.S.pposCacheGetId cache = #1 pos - fun metBefore [] = NONE - | metBefore (c :: cs) = - if idMatches c then - SOME (c :: cs) - else - metBefore cs - in - if idMatches top then - (false, s) - else - (true, case metBefore rest of - NONE => T.S.pposCacheInit (getTopFileInfo stream) :: s - | SOME stack => stack) - end - | adjustCacheStack _ _ _ = raise Unreachable - - fun debugPrint' cacheStack stream first = - let - val (tk, stream) = getToken stream - in - case tk of - T.NewLine => debugPrint' cacheStack stream first - | T.EOS => () - | tk => - let - val pos = getLastPos stream - val (stackChanged, cacheStack) = - adjustCacheStack cacheStack pos stream - - val oldTop = hd cacheStack - val (pair, top) = T.S.pposCacheAdvance pos oldTop - in - debugPrintToken oldTop tk pair (first orelse stackChanged); - debugPrint' (top :: tl cacheStack) stream false - end - end - - fun debugPrint stream = - let - val cache = T.S.pposCacheInit $ hd $ #fileInfo stream - in - debugPrint' [cache] stream true; - print "\n" - end -end diff --git a/cpp.sig b/cpp.sig deleted file mode 100644 index e842e13..0000000 --- a/cpp.sig +++ /dev/null @@ -1,30 +0,0 @@ -signature CPP = sig - structure T: TOKENIZER - - type t - type tkPos - - type tkExpectedVal - exception TkExpected of tkExpectedVal - - type tkErrorVal - exception TkError of tkErrorVal - - datatype tkExp = - Tk of T.token | - Id | - NumConst | - StrLiteral | - UnOp | - BinOp | - Op - - val create: string -> string list -> t - val getToken: t -> T.token * t - val getLastPos: t -> tkPos - - val tkExpectedPrint: tkExpectedVal -> unit - val tkErrorPrint: tkErrorVal -> unit - - val debugPrint: t -> unit -end @@ -1,4 +1,4 @@ -functor Driver(P: CPP): DRIVER = struct +functor Driver(P: PPC): DRIVER = struct structure P = P type config = { @@ -30,10 +30,9 @@ functor Driver(P: CPP): DRIVER = struct fun exec () = let val config = parseCmdArgs initConfig (CommandLine.arguments ()) - - val cpp = P.create (valOf $ #file config) (#includeDirs config) + val fname = valOf $ #file config in - P.debugPrint cpp + P.debugPrint fname (#includeDirs config) end end diff --git a/exn_handler.fun b/exn_handler.fun index 7e0aa4d..2d970cc 100644 --- a/exn_handler.fun +++ b/exn_handler.fun @@ -1,4 +1,4 @@ -functor ExnHandler(structure T: TOKENIZER; structure P: CPP): +functor ExnHandler(structure T: TOKENIZER; structure P: PPC): EXN_HANDLER = struct @@ -35,8 +35,7 @@ struct T.FsmTableIsTooSmall => eprint "fsm table is too small. Increate 'maxState' value" | IO.Io _ => ioExn e - | T.TkErrorAug (pos, msg) => eprint $ T.S.ppos2str pos ^ ": " ^ msg - | P.TkExpected v => P.tkExpectedPrint v + | T.TkErrorAug (pos, msg) => eprint $ T.S.pos2str pos ^ ": " ^ msg | P.TkError v => P.tkErrorPrint v | _ => otherExn e; exit 255) @@ -0,0 +1,300 @@ +functor ppc(structure H: HASHTABLE; structure T: TOKENIZER): PPC = +struct + + structure T = T + + datatype tkPos = TkPos of T.S.pos + + type t = { + streams: T.S.t list, + + buffer: (T.token * T.S.pos) list, + macros: (T.token * T.S.pos) list H.t, (* body is stored reversed *) + + EOSreached: bool, + incDirs: string list + } + + type tkErrorVal = tkPos * string + exception TkError of tkErrorVal + + datatype expTk = + ExpTk of T.token | + ExpId | + ExpConst | + ExpUnop | + ExpBinop | + ExpOp + + type tkErrorExpVal = tkPos * expTk list + exception TkErrorExp of tkErrorExpVal + + fun raiseTkError msg pos = raise TkError (pos, msg) + fun raiseTkErrorSPos pos msg = raiseTkError msg (TkPos pos) + + fun tkErrorPrint (TkPos pos, msg) = + printLn $ T.S.pos2str pos ^ ": " ^ msg + + fun raiseTkErrorExp pos exp = raise TkErrorExp (pos, exp) + fun raiseTkErrorExpSPos pos exp = raiseTkErrorExp (TkPos pos) exp + + fun tkErrorExpPrint (TkPos pos, exp) = + let + val printEtk = fn + ExpTk tk => T.printToken tk + | ExpId => print "identifier" + | ExpConst => print "constant" + | ExpUnop => print "unary operator" + | ExpBinop => print "binary operator" + | ExpOp => print "operator" + + fun printExpList [] = raise Unreachable + | printExpList [etk] = printEtk etk + | printExpList [etk1, etk2] = ( + printEtk etk1; + print " or "; + printEtk etk2 + ) + | printExpList (etk :: etks) = ( + printEtk etk; + print ", "; + printExpList etks + ) + in + print $ T.S.pos2str pos ^ ": expected "; + printExpList exp; + print "\n" + end + + val updatePpc = fn z => + let + fun from streams buffer macros EOSreached incDirs = + { streams, buffer, macros, EOSreached, incDirs } + fun to f { streams, buffer, macros, EOSreached, incDirs } = + f streams buffer macros EOSreached incDirs + in + FRU.makeUpdate5 (from, from, to) + end + z + + fun create { fname, incDirs } = + { streams = [T.S.create fname], buffer = [], macros = H.create 10, + EOSreached = false, incDirs } + + datatype IncludeArg = + LocalInc of string * T.S.pos | + ExternalInc of string * T.S.pos + + fun parseIncludeArg stream = + let + fun eat pred stream skipFirst acc = + let + val (c, stream') = T.S.getcharEx stream + in + if pred c then + eat pred stream' skipFirst (c :: acc) + else + (implode o rev $ acc, if skipFirst then stream' else stream) + end + val (_, stream) = eat Char.isSpace stream false [] + val (pos, stream) = T.S.getPos stream + val (start, stream) = T.S.getcharEx stream + val finish = + if start = #"\"" then + #"\"" + else if start = #"<" then + #">" + else + raiseTkErrorSPos pos "expected \" or <" + + val (arg, stream) = eat (fn c => c <> finish) stream true [] + val (_, stream) = + eat (fn c => c = #" " orelse c = #"\t") stream false [] + val (c, stream) = T.S.getcharEx stream + in + if c <> #"\n" then + let + val (pos, _) = T.S.getPosAfterChar stream + in + raiseTkErrorSPos pos "expected '\n'" + end + else + (if start = #"\"" then LocalInc (arg, pos) + else ExternalInc (arg, pos), stream) + end handle T.S.EOF => + let + val (pos, _) = T.S.EOFpos stream + in + raiseTkErrorSPos pos "unexpected EOF during #include argument parsing" + end + + fun findFile arg (stream, incDirs) = + case arg of + LocalInc (arg, pos) => ( + let + val dir = OS.Path.getParent o T.S.getFname $ stream + val path = OS.Path.concat (dir, arg) + + val (path, instream) = (path, TextIO.openIn path) + in + (path, instream) + end handle + OS.Path.Path => raiseTkErrorSPos pos "invalid argument" + | Size => raiseTkErrorSPos pos "resulting path is too long" + | IO.Io v => raise IO.Io v + ) + | ExternalInc (arg, pos) => + let + fun try (dir :: tail) = ( + let + val path = OS.Path.concat (dir, arg) + in + SOME (path, TextIO.openIn path) + end handle _ => try tail + ) + | try [] = NONE + in + case try incDirs of + SOME pair => pair + | NONE => raiseTkErrorSPos pos "unable to find header" + end + + fun handleInclude (P as { streams = head :: tail, incDirs, ... }: t) = + let + val (arg, oldHead) = parseIncludeArg head + val (path, instream) = findFile arg (head, incDirs) + val head = T.S.createFromInstream path instream + in + updatePpc P s#streams (head :: oldHead :: tail) % + end + | handleInclude _ = raise Unreachable + + fun handleDefine (P as { streams = head :: _, ... }: t) = + let + fun getName stream = + let + val (tk, pos, stream) = T.getToken stream + in + case tk of + T.Id id => (id, pos, stream) + | _ => raiseTkErrorExpSPos pos [ExpId] + end + + fun getBody stream acc = + let + val (tk, pos, stream) = T.getToken stream + in + case tk of + T.NewLine => (acc, stream) + | T.EOS => raiseTkErrorExpSPos pos [ExpTk T.NewLine] + | _ => getBody stream ((tk, pos) :: acc) + end + + val (macroName, pos, head) = getName head + val (macroBody, head) = getBody head [] + + val () = H.insert (#macros P) macroName macroBody handle + H.Exists => raiseTkErrorSPos pos "macro redefinition" + in + updatePpc P u#streams (fn s => head :: tl s) % + end + | handleDefine _ = raise Unreachable + + fun handleRegularToken tk pos P = + let + fun def () = (tk, TkPos pos, P) + in + case tk of + T.Id id => ( + case H.lookup (#macros P) id of + NONE => def () + | SOME body => + let + val mstart = (T.MacroStart id, pos) + val mend = (T.MacroEnd, + if body = [] then pos else (#2 o hd) body) + val revBody = List.concat [[mend], body, [mstart]] + in + getToken $ updatePpc P + u#buffer (fn buf => List.revAppend (revBody, buf)) % + end + ) + | _ => def () + end + + and getToken (P as { streams = [stream], EOSreached = true, ... }) = + let + val (pos, stream) = T.S.EOFpos stream + in + (T.EOS, TkPos pos, updatePpc P s#streams [stream] %) + end + | getToken (P as { buffer = (tk, pos) :: tail, ... }: t) = + handleRegularToken tk pos (updatePpc P s#buffer tail %) + | getToken (P as { streams = head :: tail, ... }: t) = + let + val (tk, pos, head) = T.getToken head + val directiveTable = [ + (T.CppInclude, handleInclude), + (T.CppDefine, handleDefine) + ] + in + if tk = T.EOS then + case tail of + [] => getToken $ updatePpc P s#EOSreached true % + | _ => getToken $ updatePpc P s#streams tail % + else + case List.find (fn (tk', _) => tk' = tk) directiveTable of + SOME (_, f) => getToken o f $ + updatePpc P s#streams (head :: tail) % + | NONE => + let + val ppc = updatePpc P u#streams (fn s => head :: tl s) % + in + handleRegularToken tk pos ppc + end + end + | getToken _ = raise Unreachable + + + fun debugPrint' ppc shouldPrintLine (fname, line) = + let + val (tk, pos, ppc) = getToken ppc + val TkPos (t as (T.S.Pos (fname', line', col))) = pos + + fun printLine () = + printOnNL $ fname' ^ ":" ^ Int.toString line' ^ "\n\t" + + fun finish shouldPrintLine = + if tk <> T.EOS then + debugPrint' ppc shouldPrintLine (fname', line') + else + () + in + case tk of + T.MacroStart macroName => ( + printOnNL $ "macro " ^ macroName ^ " (" ^ T.S.pos2str t ^ ") {"; + finish true + ) + | T.MacroEnd => (printOnNL "}\n"; finish true) + | _ => ( + if shouldPrintLine orelse fname' <> fname + orelse line' <> line + then + printLine() + else + (); + print $ Int.toString col ^ ":"; + T.printToken tk; + print " "; + finish false + ) + end + + fun debugPrint fname incDirs = + let + val ppc = create { fname, incDirs } + in + debugPrint' ppc true ("", 0); + print "\n" + end +end @@ -0,0 +1,31 @@ +signature PPC = sig + + structure T: TOKENIZER + + type t + type tkErrorVal + type tkErrorExpVal + type tkPos + + exception TkError of tkErrorVal + exception TkErrorExp of tkErrorExpVal + + datatype expTk = + ExpTk of T.token | + ExpId | + ExpConst | + ExpUnop | + ExpBinop | + ExpOp + + + val create: { fname: string, incDirs: string list } -> t + val debugPrint: string -> string list -> unit + + + val raiseTkError: string -> tkPos -> 'a + val tkErrorPrint: tkErrorVal -> unit + + val raiseTkErrorExp: tkPos -> expTk list -> 'a + val tkErrorExpPrint: tkErrorExpVal -> unit +end @@ -1,49 +1,33 @@ signature STREAM = sig type fileId = int type fileOffset = int - type pos = fileId * fileOffset - type ppos (* pretty pos *) - type pposCache + + datatype pos = Pos of string * int * int type t - type fileInfo = fileId * string * string - val convert: t -> fileInfo + exception EOF - val ppos2str: ppos -> string + val pos2str: pos -> string val getchar: t -> char option * t + val getcharEx: t -> char * t (* throws EOF *) (* Will throw UngetcError, if applied at the beginning of the stream. * Can be always avoided, so is not provided in sig file *) val ungetc: t -> t - val getPos: t -> pos - val getPosAfterCharRead: t -> pos - - (* pos must come from t, see pos2pposWithFI *) - val pos2ppos: pos -> t -> ppos - (* #id pos must be equal to fileId of fileInfo, - * otherwise InvalidFileInfo is thrown *) - val pos2pposWithFI: pos -> fileInfo -> ppos - - (* Assumed to be called once for given pos, so will throw Unreachable on - * second call *) - val pposWithoutCol: ppos -> ppos + val EOFpos: t -> pos * t val getSubstr: fileOffset -> fileOffset -> t -> string val getFname: t -> string - val isFirstOnLine: pos -> t -> bool (* both throw IO.Io *) val create: string -> t val createFromInstream: string -> TextIO.instream -> t - val pposCacheInit: fileInfo -> pposCache - - val pposCacheGetId: pposCache -> fileId - val pposCacheGetLine: pposCache -> int - val pposCacheGetFname: pposCache -> string - - val pposCacheAdvance: pos -> pposCache -> (int * int) * pposCache + val getOffset: t -> fileOffset + val getPosRaw: fileOffset -> t -> pos * t + val getPos: t -> pos * t + val getPosAfterChar: t -> pos * t end @@ -1,82 +1,57 @@ structure Stream :> STREAM = struct type fileId = int type fileOffset = int - type fileInfo = fileId * string * string - type t = fileId * string * fileOffset * string + datatype pos = Pos of string * int * int - type pos = fileId * fileOffset - type ppos = string * int * int option + type t = { + id: fileOffset, + fname: string, + off: fileOffset, + contents: string, - type pposCache = - { id: fileId, fname: string, contents: string, - offset: fileOffset, line: int, col: int } + (* offset * line * col *) + cache: fileOffset * int * int + } + exception EOF exception UngetcError - exception InvalidFileInfo - fun ppos2str (pos, line, col) = + val updateStream = fn z => + let + fun from id fname off contents cache = + { id, fname, off, contents, cache } + fun to f { id, fname, off, contents, cache } = + f id fname off contents cache + in + FRU.makeUpdate5 (from, from, to) + end + z + + fun pos2str (Pos (pos, line, col)) = let val % = Int.toString in - case col of - SOME col => pos ^ ":" ^ %line ^ ":" ^ %col - | NONE => pos ^ ":" ^ %line + pos ^ ":" ^ %line ^ ":" ^ %col end - fun convert (fid, fname, _, contents) = (fid, fname, contents) - - fun calcFilePos (startOff, startPos) contents destOff = - let - fun calc offset (line, col) = - if offset = destOff then - (line, col) - else - calc (offset + 1) (if String.sub (contents, offset) = #"\n" - then (line + 1, 1) else (line, col + 1)) - in - calc startOff startPos - end + fun getcharSure (S as { contents, off, ... }: t) = + (String.sub (contents, off), updateStream S s#off (off + 1) %) - val calcFilePosFromStart = calcFilePos (0, (1, 1)) + fun getchar stream = + (fn (c, s) => (SOME c, s)) $ getcharSure stream handle + Subscript => (NONE, stream) - fun getchar (S as (fid, fname, off, contents)) = - if off < String.size contents then - (SOME $ String.sub (contents, off), (fid, fname, off + 1, contents)) - else - (NONE, S) + fun getcharEx stream = getcharSure stream handle Subscript => raise EOF - fun ungetc (fid, fname, off, contents) = - if off = 0 then + fun ungetc ({ off = 0, ... }: t) = raise UngetcError - else - (fid, fname, off - 1, contents) - - fun getPosAfterCharRead (fid, _, off, _) = (fid, off - 1) - - fun pos2pposWithFI (id, pos) (id', fname, contents) = - if id <> id' then - raise InvalidFileInfo - else - let - val (line, col) = calcFilePosFromStart contents pos - in - (fname, line, SOME col) - end - - fun pos2ppos pos stream = pos2pposWithFI pos (convert stream) + | ungetc stream = updateStream stream u#off (fn off => off - 1) % - fun pposWithoutCol (fname, line, SOME _) = (fname, line, NONE) - | pposWithoutCol (_, _, NONE) = raise Unreachable - - fun getPos (id, _, off, _) = (id, off) - - fun getSubstr startOff endOff (_, _, _, contents) = + fun getSubstr startOff endOff ({ contents, ... }: t) = String.substring (contents, startOff, endOff - startOff) - fun getFname (stream: t) = #2 stream - - val lastUsedId = ref ~1 + fun getFname ({ fname, ... }: t) = fname fun createFromInstream fname instream = let @@ -84,47 +59,39 @@ structure Stream :> STREAM = struct val contents = inputAll instream val () = closeIn instream in - lastUsedId := !lastUsedId + 1; - (!lastUsedId, fname, 0, contents) + { id = 0, fname, off = 0, contents, cache = (0, 1, 1) } end fun create fname = createFromInstream fname (TextIO.openIn fname) - fun isFirstOnLine (_, offset) ((_, _, _, contents) : t) = - let - fun returnToNL ~1 = true - | returnToNL offset = - let - val chr = String.sub (contents, offset) - in - if chr = #"\n" then - true - else if Char.isSpace chr then - returnToNL (offset - 1) - else - false - end - in - returnToNL (offset - 1) - end + fun getOffset ({ off, ... }: t) = off + + fun getPosRaw off (S as { cache = (prevOff, line, col), fname, + contents, ... }: t) = + let + (* + val () = printLn $ "raw: " ^ Int.toString off ^ ", " ^ Int.toString prevOff + *) + + fun calcPos curOff (line, col) = + if curOff = off then + (line, col) + else + calcPos (curOff + 1) + (if String.sub (contents,curOff) = #"\n" then (line + 1, 1) + else (line, col + 1)) + val (line, col) = calcPos prevOff (line, col) + in + assert $ off >= prevOff; + (Pos (fname, line, col), updateStream S s#cache (off, line, col) %) + end + + fun getPos (S as { off, ... }: t) = + getPosRaw off S + + fun EOFpos (S as { contents, ... }: t) = + getPosRaw (String.size contents) S - fun pposCacheInit (id, fname, contents) = - { id, fname, contents, offset = 0, line = 1, col = 1 } - - fun pposCacheAdvance (id, pos) (cache: pposCache) = - if id <> #id cache then - raise Unreachable - else - let - fun ` f = f cache - val p as (line, col) = calcFilePos (` #offset, (` #line, ` #col)) - (` #contents) pos - in - (p, { id = ` #id, fname = ` #fname, contents = ` #contents, - offset = pos, line, col }) - end - - fun pposCacheGetId (cache: pposCache) = #id cache - fun pposCacheGetLine (cache: pposCache) = #line cache - fun pposCacheGetFname (cache: pposCache) = #fname cache + fun getPosAfterChar stream = + getPosRaw (getOffset stream -1) stream end diff --git a/tokenizer.fun b/tokenizer.fun index 4cb5d1d..5162308 100644 --- a/tokenizer.fun +++ b/tokenizer.fun @@ -16,6 +16,8 @@ struct Invalid | EOS | NewLine | + MacroStart of string | + MacroEnd | Num of numConst | @@ -125,15 +127,10 @@ struct CppError | CppPragma - val kwPrefix = #"`" - val cppPrefix = #"$" - - type fullToken = S.pos * token - datatype tkErrorAuxInfo = TkiEOF | TkiDx of int | TkiStart exception TkError of tkErrorAuxInfo * string - exception TkErrorAug of S.ppos * string + exception TkErrorAug of S.pos * string exception ExpectedCppDir (* handled in postprocess *) @@ -143,13 +140,19 @@ struct exception TokenWithoutRepr exception SuffixWithoutRepr + val kwPrefix = #"`" + val cppPrefix = #"$" + val otherPrefix = #"@" + val tokenRepr = let fun & repr = str kwPrefix ^ repr fun % repr = str cppPrefix ^ repr in [ - (NewLine, "NewLine"), + (NewLine, "@NewLine"), + (EOS, "@EOS"), + (MacroEnd, "@Mend"), (kwBreak, &"break"), (kwCase, &"case"), @@ -280,6 +283,7 @@ struct val token2str = fn Id s => s + | MacroStart macro => "m(" ^ macro ^ ")" | Num (IntConst (it, str, sfx)) => let val intType = @@ -348,12 +352,12 @@ struct val () = List.app (fn (_, repr) => update (lookupTable, firstChr repr, true)) $ List.filter - (fn (tk, repr) => + (fn (_, repr) => let val c = String.sub (repr, 0) in c <> kwPrefix andalso c <> cppPrefix - andalso tk <> NewLine + andalso c <> otherPrefix end) tokenRepr @@ -437,7 +441,7 @@ struct fun fsmEat stream = let open Array - val pos = S.getPos stream + val (pos, stream) = S.getPos stream fun get curState stream = let @@ -456,14 +460,15 @@ struct get nextState stream end end + val (tk, stream) = get 0 stream in - (fn (tk, stream) => ((pos, tk), stream)) $ get 0 stream + (tk, pos, stream) end fun tkError2aug stream (dx, msg) = let - val (id, pos) = S.getPosAfterCharRead stream - val pos = S.pos2ppos (id, pos + dx) stream + val off = S.getOffset stream - 1 + dx + val (pos, _) = S.getPosRaw off stream in TkErrorAug (pos, msg) end @@ -471,22 +476,20 @@ struct fun parserWrapper stream parser acc = let val stream = S.ungetc stream - val P as (_, startOff) = S.getPos stream - fun parse' stream acc = let + val startOff = S.getOffset stream + val (pos, stream) = S.getPos stream + + fun parse' stream acc = + let val (c, stream) = S.getchar stream val (acc, tk, stream) = parser acc (stream, startOff) c handle TkError (TkiDx dx, msg) => raise tkError2aug stream (dx, msg) - | TkError (TkiStart, msg) => - let - val startPos = S.pos2ppos P stream - in - raise TkErrorAug (startPos, msg) - end + | TkError (TkiStart, msg) => raise TkErrorAug (pos, msg) | TkError (TkiEOF, msg) => let - val pos = S.pposWithoutCol $ S.pos2ppos P stream + val (pos, _) = S.EOFpos stream in raise TkErrorAug (pos, msg) end @@ -498,16 +501,11 @@ struct val (tk, stream) = parse' stream acc in - ((P, tk): fullToken, stream) + (tk, pos, stream) end fun finishSeqRead startOff stream = - let - val (_, endOff) = S.getPos stream - val s = S.getSubstr startOff endOff stream - in - s - end + S.getSubstr startOff (S.getOffset stream) stream fun keywordHashtableGen () = let @@ -871,19 +869,14 @@ struct | formCppDir kwIf = CppIf | formCppDir _ = raise ExpectedCppDir - fun handleCppDir tk prevPos stream = - let - val (pos, tk') = tk - in - (prevPos, formCppDir tk') handle + fun handleCppDir (pos, tk) = + formCppDir tk handle ExpectedCppDir => - raise TkErrorAug (S.pos2ppos pos stream, - "expected preprocessor directive") - end + raise TkErrorAug (pos, "expected preprocessor directive") fun unexpectedCharRaise stream c = let - val pos = S.pos2ppos (S.getPosAfterCharRead stream) stream + val (pos, _) = S.getPosAfterChar stream val repr = if isPrintable c then str c @@ -899,13 +892,8 @@ struct let val (c, stream) = case S.getchar stream of - (NONE, _) => - let - val pos = S.pos2ppos pos stream - in - raise TkErrorAug (pos, "unfinished comment") - end - | (SOME c, stream) => (c, stream) + (NONE, _) => raise TkErrorAug (pos, "unfinished comment") + | (SOME c, stream) => (c, stream) in if prevIsAsterisk andalso c = #"/" then stream @@ -922,8 +910,7 @@ struct val raiseErr = fn () => let - val pos = S.getPosAfterCharRead stream - val pos = S.pos2ppos pos stream + val (pos, _) = S.getPosAfterChar stream in raise TkErrorAug (pos, "expected \\n after backslash") end @@ -939,40 +926,39 @@ struct fun processSymbol stream = let - val (T as (p, tk), stream) = fsmEat $ S.ungetc stream + val (tk, pos, stream) = fsmEat $ S.ungetc stream + val S.Pos (_, _, col) = pos in case tk of - CommentStart => getToken $ skipComment stream p - | DoubleDot => (SOME (p, Dot), S.ungetc stream) + CommentStart => getToken $ skipComment stream pos + | DoubleDot => (Dot, pos, S.ungetc stream) | Hash => - if S.isFirstOnLine p stream then + if col = 1 then let - val (tk, stream) = getToken stream + val (tk, pos', stream) = getToken stream in - case tk of - NONE => - raise TkErrorAug (S.pos2ppos p stream, - "unfinished preprecessor directive") - | SOME tk => - (SOME $ handleCppDir tk p stream, stream) + if tk = EOS then + raise TkErrorAug (pos, "unfinished preprecessor directive") + else + (handleCppDir (pos', tk), pos, stream) end else - (SOME T, stream) - | _ => (SOME T, stream) + (tk, pos, stream) + | _ => (tk, pos, stream) end and getToken stream = let val (c, stream) = S.getchar stream - fun @-> parser acc = - (fn (tk, s) => (SOME tk, s)) $ parserWrapper stream parser acc + fun conv tk (pos, stream) = (tk, pos, stream) + fun @-> parser acc = parserWrapper stream parser acc in case c of - NONE => (NONE, stream) + NONE => conv EOS $ S.EOFpos stream | SOME c => if c = #"\n" then - (SOME (S.getPosAfterCharRead stream, NewLine), stream) + conv NewLine $ S.getPosAfterChar stream else if Char.isSpace c then getToken stream else if isIdStart c then @@ -991,25 +977,12 @@ struct unexpectedCharRaise stream c end - fun tokenize stream = - let - fun aux acc stream = - let - val (tk, stream) = getToken stream - in - case tk of - NONE => rev acc - | SOME tk => aux (tk :: acc) stream - end - in - aux [] stream - end - + (* TODO: remove *) fun debugPrint tkl fname = let - fun print' line _ ((_, NewLine) :: tks) = + fun print' line _ ((NewLine, _) :: tks) = print' (line + 1) true tks - | print' line firstOnLine ((_, tk) :: tks) = ( + | print' line firstOnLine ((tk, _) :: tks) = ( if firstOnLine then ( print "\n"; printLn $ fname ^ ":" ^ Int.toString line; diff --git a/tokenizer.sig b/tokenizer.sig index d34f934..7ea6f63 100644 --- a/tokenizer.sig +++ b/tokenizer.sig @@ -13,6 +13,8 @@ signature TOKENIZER = sig Invalid | EOS | NewLine | + MacroStart of string | + MacroEnd | Num of numConst | @@ -122,17 +124,14 @@ signature TOKENIZER = sig CppError | CppPragma - type fullToken = S.pos * token - (* Fatal. both may be thrown by tokenize *) exception FsmTableIsTooSmall - exception TkErrorAug of S.ppos * string + exception TkErrorAug of S.pos * string - val getToken: S.t -> fullToken option * S.t + val getToken: S.t -> token * S.pos * S.t - val tokenize: S.t -> fullToken list val token2str: token -> string val printToken: token -> unit - val debugPrint: fullToken list -> string -> unit + val debugPrint: (token * S.pos) list -> string -> unit end |