summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ccross.mlb2
-rw-r--r--ccross.sml6
-rw-r--r--common.sml92
-rw-r--r--cpp.fun286
-rw-r--r--cpp.sig30
-rw-r--r--driver.fun7
-rw-r--r--exn_handler.fun5
-rw-r--r--ppc.fun300
-rw-r--r--ppc.sig31
-rw-r--r--stream.sig36
-rw-r--r--stream.sml159
-rw-r--r--tokenizer.fun133
-rw-r--r--tokenizer.sig11
13 files changed, 561 insertions, 537 deletions
diff --git a/ccross.mlb b/ccross.mlb
index 322b1e4..db831ea 100644
--- a/ccross.mlb
+++ b/ccross.mlb
@@ -9,7 +9,7 @@ in
stream.sig stream.sml
hashtable.sig hashtable.sml
tokenizer.sig tokenizer.fun
- cpp.sig cpp.fun
+ ppc.sig ppc.fun
exn_handler.sig exn_handler.fun
driver.sig driver.fun
diff --git a/ccross.sml b/ccross.sml
index d42368d..1dab748 100644
--- a/ccross.sml
+++ b/ccross.sml
@@ -2,12 +2,12 @@ structure ccross:> CCROSS = struct
structure T:> TOKENIZER =
Tokenizer(structure H = Hashtable; structure S = Stream)
- structure P:> CPP = Cpp(T)
+ structure ppc:> PPC = ppc(structure H = Hashtable; structure T = T)
- structure D:> DRIVER = Driver(P)
+ structure D:> DRIVER = Driver(ppc)
structure ExnHandler:> EXN_HANDLER =
- ExnHandler(structure T = T; structure P = P)
+ ExnHandler(structure T = T; structure P = ppc)
end
val () = MLton.Exn.setTopLevelHandler ccross.ExnHandler.handler
diff --git a/common.sml b/common.sml
index cb4652a..d3643c7 100644
--- a/common.sml
+++ b/common.sml
@@ -1,9 +1,35 @@
-exception Unreachable
+exception Unreachable and Unimplemented
fun $ (x, y) = x y
infixr 0 $
-fun printLn s = (print s; print "\n")
+fun assert truth = if not truth then raise Unreachable else ()
+
+local
+ val lastIsNL = ref true
+
+ fun endsWith s =
+ String.sub (s, String.size s - 1) = #"\n" handle
+ Subscript => raise Unreachable
+
+ val print' = print
+in
+ fun print s = (
+ lastIsNL := endsWith s;
+ print' s
+ )
+
+ fun printLn s = (
+ lastIsNL := true;
+ print' s; print "\n"
+ )
+
+ fun printOnNL s = (
+ if not $ !lastIsNL then print' "\n" else ();
+ print' s;
+ lastIsNL := endsWith s
+ )
+end
(* All global values which computations may raise an exception must be
* wrapped in lazy, so that no exception is thrown before custom
@@ -30,3 +56,65 @@ in
| Evaluated v => v
| Exn e => raise e
end
+
+structure FRU = struct
+ fun fold (a, f) g = g (a, f)
+ fun step0 h (a, f) = fold (h a, f)
+ fun step2 h (a, f) b c = fold (h (b, c, a), f)
+
+ fun next g (f, z) x = g (f x, z)
+ fun f1 (f, z) x = f (z x)
+ fun f2 z = next f1 z
+ fun f3 z = next f2 z
+ fun f4 z = next f3 z
+ fun f5 z = next f4 z
+ fun f6 z = next f5 z
+ fun f7 z = next f6 z
+ fun f8 z = next f7 z
+ fun f9 z = next f8 z
+
+ fun c0 from = from
+ fun c1 from = c0 from f1
+ fun c2 from = c1 from f2
+ fun c3 from = c2 from f3
+ fun c4 from = c3 from f4
+ fun c5 from = c4 from f5
+ fun c6 from = c5 from f6
+ fun c7 from = c6 from f7
+ fun c8 from = c7 from f8
+ fun c9 from = c8 from f9
+
+ fun makeUpdate cX (from, from', to) record =
+ let
+ fun ops () = cX from'
+ fun vars f = to f record
+ in
+ fold ((vars, ops), fn (vars, _) => vars from)
+ end
+ fun makeUpdate0 z = makeUpdate c0 z
+ fun makeUpdate1 z = makeUpdate c1 z
+ fun makeUpdate2 z = makeUpdate c2 z
+ fun makeUpdate3 z = makeUpdate c3 z
+ fun makeUpdate4 z = makeUpdate c4 z
+ fun makeUpdate5 z = makeUpdate c5 z
+ fun makeUpdate6 z = makeUpdate c6 z
+ fun makeUpdate7 z = makeUpdate c7 z
+ fun makeUpdate8 z = makeUpdate c8 z
+ fun makeUpdate9 z = makeUpdate c9 z
+
+ fun upd z = step2
+ (fn (s, f, (vars, ops)) =>
+ (fn out => vars (s (ops ()) (out, f)), ops)) z
+ fun set z = step2
+ (fn (s, v, (vars, ops)) =>
+ (fn out => vars (s (ops ()) (out, fn _ => v)), ops)) z
+
+ fun set2 s v = step0
+ (fn (vars, ops) => (fn out => vars (s (ops ()) (out, fn _ => v)), ops))
+ fun upd2 s f = step0
+ (fn (vars, ops) => (fn out => vars (s (ops ()) (out, f)), ops))
+end
+
+fun % (a, f) = f a
+val s = FRU.set
+val u = FRU.upd
diff --git a/cpp.fun b/cpp.fun
deleted file mode 100644
index 8884a59..0000000
--- a/cpp.fun
+++ /dev/null
@@ -1,286 +0,0 @@
-functor Cpp(T: TOKENIZER): CPP = struct
-
- structure T = T
-
- type tkPos = T.S.pos
- type t =
- { streams: T.S.t list, fileInfo: T.S.fileInfo list,
- lastPos: tkPos option, firstId: T.S.fileId,
- incDirs: string list }
-
- datatype tkExp =
- Tk of T.token |
- Id |
- NumConst |
- StrLiteral |
- UnOp |
- BinOp |
- Op
-
- type tkExpectedVal = string * tkExp list
-
- exception StreamTooOld
- exception TkExpected of tkExpectedVal
-
- type tkErrorVal = string * string
- exception TkError of tkErrorVal
-
- fun create fname incDirs =
- let
- val stream = T.S.create fname
- val info = T.S.convert stream
- in
- { streams = [stream] , fileInfo = [info], lastPos = NONE,
- firstId = #1 info, incDirs }
- end
-
- fun getLastPos ({ lastPos = NONE, ... }: t) = raise Unreachable
- | getLastPos { lastPos = SOME p, ... } = p
-
- fun getTopFileInfo ({ streams = top :: _, ... }: t) = T.S.convert top
- | getTopFileInfo _ = raise Unreachable
-
- val tkExp2str = fn
- (Tk tk) => T.token2str tk
- | Id: tkExp => "identifier"
- | NumConst => "numeric constant"
- | StrLiteral => "string literal"
- | UnOp => "unary operator"
- | BinOp => "binary operator"
- | Op => "operator"
-
- fun tkPos2str stream (id, pos) =
- let
- val fileInfo =
- case List.find (fn (id', _, _) => id' = id) $ #fileInfo stream of
- NONE => raise StreamTooOld
- | SOME fileInfo => fileInfo
- in
- T.S.ppos2str $ T.S.pos2pposWithFI (id, pos) fileInfo
- end
-
- fun prepTkError stream pos msg = raise TkError (tkPos2str stream pos, msg)
- fun prepLastTkError stream = prepTkError stream (getLastPos stream)
-
- fun prepTkExpected (stream: t) pos expList =
- raise TkExpected (tkPos2str stream pos, expList)
- fun prepLastTkExpected stream = prepTkExpected stream
- (getLastPos stream)
-
- fun tkExpectedPrint (pos, expList) =
- let
- fun tkExps2str [e] [] = tkExp2str e
- | tkExps2str [e] acc =
- (String.concatWith ", " acc ^ " or ") ^ tkExp2str e
- | tkExps2str (e :: ec) acc =
- tkExps2str ec (tkExp2str e :: acc)
- | tkExps2str [] _ = raise Unreachable
- in
- print pos;
- print ":expected ";
- printLn $ tkExps2str expList []
- end
-
- fun tkErrorPrint (pos, msg) = printLn $ pos ^ ": " ^ msg
-
- exception EmptyPath
-
- fun getDirOfCurFile ({ streams = top :: _, ... }: t) =
- OS.Path.getParent $ T.S.getFname top
- | getDirOfCurFile _ = raise Unreachable
-
-
- (* TODO: properly handle Size and Path exceptions from concat *)
- fun findPath _ "" _ = raise EmptyPath
- | findPath stream fname true =
- let
- val path = OS.Path.concat (getDirOfCurFile stream, fname)
- in
- (path, TextIO.openIn path)
- end
- | findPath stream fname false =
- let
- fun try (dir :: tail) =
- let
- val fname = OS.Path.concat (dir, fname)
- in
- SOME (fname, TextIO.openIn fname) handle _ => try tail
- end
- | try [] = NONE
-
- val instream = try (#incDirs stream)
- in
- case instream of
- NONE => findPath stream fname true
- | SOME pair => pair
- end
-
- fun predSkip stream pred =
- let
- val (c, stream) = T.S.getchar stream
- in
- case c of
- NONE => (NONE, stream)
- | SOME c =>
- if c = #"\n" then
- (NONE, stream)
- else if pred c then
- (SOME $ #2 $ T.S.getPosAfterCharRead stream, stream)
- else
- predSkip stream pred
- end
-
- fun tryCollect stream =
- let
- val top = hd $ #streams stream
-
- fun x f = f stream
-
- fun returnBack top = {
- streams = top :: tl (#streams stream),
- fileInfo = x#fileInfo,
- lastPos = x#lastPos,
- firstId = x#firstId,
- incDirs = x#incDirs
- }
-
- val (start, s) =
- case predSkip top (fn c => c = #"<") of
- (NONE, _) => raise Unreachable
- | (SOME p, s) => (p, s)
-
- val (res, s) = predSkip s (fn c => c = #">")
- in
- case res of
- NONE => (NONE, stream)
- | SOME endOff =>
- (SOME $ T.S.getSubstr (start + 1) endOff s, returnBack s)
- end
-
- fun getToken
- ({ streams = stream :: tail, fileInfo, lastPos, firstId, incDirs }: t) =
- let
- val (tk, stream) = T.getToken stream
- in
- case tk of
- NONE => getToken { streams = tail, fileInfo, lastPos, firstId, incDirs }
- | SOME (pos, tk) =>
- if tk = T.CppInclude then
- handleInclude { streams = stream :: tail, fileInfo,
- lastPos, firstId, incDirs }
- else
- (tk, { streams = stream :: tail, fileInfo,
- lastPos = SOME pos, firstId, incDirs })
- end
- | getToken
- { streams = [], fileInfo, lastPos = SOME lastPos, firstId, incDirs } =
- let
- val pos = SOME (#1 lastPos, ~1) (* EOF *)
- in
- (T.EOS, {streams = [], fileInfo, lastPos = pos, firstId, incDirs })
- end
- | getToken { streams = [], fileInfo, lastPos = NONE, firstId, incDirs } =
- (T.EOS, { streams = [], fileInfo,
- lastPos = SOME (firstId, ~1), firstId, incDirs })
-
- and handleInclude (stream: t) =
- let
- val (tk, streamNew) = getToken stream
-
- fun die () =
- prepLastTkError streamNew
- "#include with macro argument is not implemented"
- in
- case tk of
- T.StringConst path => includeFile streamNew path true
- | T.EOS => die ()
- | tk =>
- if String.sub (T.token2str tk, 0) = #"<" then
- let
- val (path, stream) = tryCollect stream
- in
- case path of
- SOME path => includeFile stream path false
- | NONE => die ()
- end
- else
- die ()
- end
-
- and includeFile stream fname localhdr =
- let
- val (fname, instream) = findPath stream fname localhdr
- handle EmptyPath =>
- prepLastTkError stream "#include path can not be empty"
- val newStream = T.S.createFromInstream fname instream
- val newFileInfo = T.S.convert newStream
- in
- getToken {
- streams = newStream :: #streams stream,
- fileInfo = newFileInfo :: #fileInfo stream,
- lastPos = #lastPos stream,
- firstId = #firstId stream,
- incDirs = #incDirs stream
- }
- end
-
-
- fun debugPrintToken cache tk (line, col) printLineRegardless =
- let
- val ` = Int.toString
- in
- if printLineRegardless orelse T.S.pposCacheGetLine cache <> line then
- print $ "\n" ^ T.S.pposCacheGetFname cache ^ ":" ^ `line ^ "\n\t"
- else
- ();
- print $ `col ^ ":" ^ T.token2str tk ^ " "
- end
-
- fun adjustCacheStack (s as (top :: rest)) pos stream =
- let
- fun idMatches cache = T.S.pposCacheGetId cache = #1 pos
- fun metBefore [] = NONE
- | metBefore (c :: cs) =
- if idMatches c then
- SOME (c :: cs)
- else
- metBefore cs
- in
- if idMatches top then
- (false, s)
- else
- (true, case metBefore rest of
- NONE => T.S.pposCacheInit (getTopFileInfo stream) :: s
- | SOME stack => stack)
- end
- | adjustCacheStack _ _ _ = raise Unreachable
-
- fun debugPrint' cacheStack stream first =
- let
- val (tk, stream) = getToken stream
- in
- case tk of
- T.NewLine => debugPrint' cacheStack stream first
- | T.EOS => ()
- | tk =>
- let
- val pos = getLastPos stream
- val (stackChanged, cacheStack) =
- adjustCacheStack cacheStack pos stream
-
- val oldTop = hd cacheStack
- val (pair, top) = T.S.pposCacheAdvance pos oldTop
- in
- debugPrintToken oldTop tk pair (first orelse stackChanged);
- debugPrint' (top :: tl cacheStack) stream false
- end
- end
-
- fun debugPrint stream =
- let
- val cache = T.S.pposCacheInit $ hd $ #fileInfo stream
- in
- debugPrint' [cache] stream true;
- print "\n"
- end
-end
diff --git a/cpp.sig b/cpp.sig
deleted file mode 100644
index e842e13..0000000
--- a/cpp.sig
+++ /dev/null
@@ -1,30 +0,0 @@
-signature CPP = sig
- structure T: TOKENIZER
-
- type t
- type tkPos
-
- type tkExpectedVal
- exception TkExpected of tkExpectedVal
-
- type tkErrorVal
- exception TkError of tkErrorVal
-
- datatype tkExp =
- Tk of T.token |
- Id |
- NumConst |
- StrLiteral |
- UnOp |
- BinOp |
- Op
-
- val create: string -> string list -> t
- val getToken: t -> T.token * t
- val getLastPos: t -> tkPos
-
- val tkExpectedPrint: tkExpectedVal -> unit
- val tkErrorPrint: tkErrorVal -> unit
-
- val debugPrint: t -> unit
-end
diff --git a/driver.fun b/driver.fun
index 78ca877..02eadcc 100644
--- a/driver.fun
+++ b/driver.fun
@@ -1,4 +1,4 @@
-functor Driver(P: CPP): DRIVER = struct
+functor Driver(P: PPC): DRIVER = struct
structure P = P
type config = {
@@ -30,10 +30,9 @@ functor Driver(P: CPP): DRIVER = struct
fun exec () =
let
val config = parseCmdArgs initConfig (CommandLine.arguments ())
-
- val cpp = P.create (valOf $ #file config) (#includeDirs config)
+ val fname = valOf $ #file config
in
- P.debugPrint cpp
+ P.debugPrint fname (#includeDirs config)
end
end
diff --git a/exn_handler.fun b/exn_handler.fun
index 7e0aa4d..2d970cc 100644
--- a/exn_handler.fun
+++ b/exn_handler.fun
@@ -1,4 +1,4 @@
-functor ExnHandler(structure T: TOKENIZER; structure P: CPP):
+functor ExnHandler(structure T: TOKENIZER; structure P: PPC):
EXN_HANDLER =
struct
@@ -35,8 +35,7 @@ struct
T.FsmTableIsTooSmall =>
eprint "fsm table is too small. Increate 'maxState' value"
| IO.Io _ => ioExn e
- | T.TkErrorAug (pos, msg) => eprint $ T.S.ppos2str pos ^ ": " ^ msg
- | P.TkExpected v => P.tkExpectedPrint v
+ | T.TkErrorAug (pos, msg) => eprint $ T.S.pos2str pos ^ ": " ^ msg
| P.TkError v => P.tkErrorPrint v
| _ => otherExn e;
exit 255)
diff --git a/ppc.fun b/ppc.fun
new file mode 100644
index 0000000..0b67e39
--- /dev/null
+++ b/ppc.fun
@@ -0,0 +1,300 @@
+functor ppc(structure H: HASHTABLE; structure T: TOKENIZER): PPC =
+struct
+
+ structure T = T
+
+ datatype tkPos = TkPos of T.S.pos
+
+ type t = {
+ streams: T.S.t list,
+
+ buffer: (T.token * T.S.pos) list,
+ macros: (T.token * T.S.pos) list H.t, (* body is stored reversed *)
+
+ EOSreached: bool,
+ incDirs: string list
+ }
+
+ type tkErrorVal = tkPos * string
+ exception TkError of tkErrorVal
+
+ datatype expTk =
+ ExpTk of T.token |
+ ExpId |
+ ExpConst |
+ ExpUnop |
+ ExpBinop |
+ ExpOp
+
+ type tkErrorExpVal = tkPos * expTk list
+ exception TkErrorExp of tkErrorExpVal
+
+ fun raiseTkError msg pos = raise TkError (pos, msg)
+ fun raiseTkErrorSPos pos msg = raiseTkError msg (TkPos pos)
+
+ fun tkErrorPrint (TkPos pos, msg) =
+ printLn $ T.S.pos2str pos ^ ": " ^ msg
+
+ fun raiseTkErrorExp pos exp = raise TkErrorExp (pos, exp)
+ fun raiseTkErrorExpSPos pos exp = raiseTkErrorExp (TkPos pos) exp
+
+ fun tkErrorExpPrint (TkPos pos, exp) =
+ let
+ val printEtk = fn
+ ExpTk tk => T.printToken tk
+ | ExpId => print "identifier"
+ | ExpConst => print "constant"
+ | ExpUnop => print "unary operator"
+ | ExpBinop => print "binary operator"
+ | ExpOp => print "operator"
+
+ fun printExpList [] = raise Unreachable
+ | printExpList [etk] = printEtk etk
+ | printExpList [etk1, etk2] = (
+ printEtk etk1;
+ print " or ";
+ printEtk etk2
+ )
+ | printExpList (etk :: etks) = (
+ printEtk etk;
+ print ", ";
+ printExpList etks
+ )
+ in
+ print $ T.S.pos2str pos ^ ": expected ";
+ printExpList exp;
+ print "\n"
+ end
+
+ val updatePpc = fn z =>
+ let
+ fun from streams buffer macros EOSreached incDirs =
+ { streams, buffer, macros, EOSreached, incDirs }
+ fun to f { streams, buffer, macros, EOSreached, incDirs } =
+ f streams buffer macros EOSreached incDirs
+ in
+ FRU.makeUpdate5 (from, from, to)
+ end
+ z
+
+ fun create { fname, incDirs } =
+ { streams = [T.S.create fname], buffer = [], macros = H.create 10,
+ EOSreached = false, incDirs }
+
+ datatype IncludeArg =
+ LocalInc of string * T.S.pos |
+ ExternalInc of string * T.S.pos
+
+ fun parseIncludeArg stream =
+ let
+ fun eat pred stream skipFirst acc =
+ let
+ val (c, stream') = T.S.getcharEx stream
+ in
+ if pred c then
+ eat pred stream' skipFirst (c :: acc)
+ else
+ (implode o rev $ acc, if skipFirst then stream' else stream)
+ end
+ val (_, stream) = eat Char.isSpace stream false []
+ val (pos, stream) = T.S.getPos stream
+ val (start, stream) = T.S.getcharEx stream
+ val finish =
+ if start = #"\"" then
+ #"\""
+ else if start = #"<" then
+ #">"
+ else
+ raiseTkErrorSPos pos "expected \" or <"
+
+ val (arg, stream) = eat (fn c => c <> finish) stream true []
+ val (_, stream) =
+ eat (fn c => c = #" " orelse c = #"\t") stream false []
+ val (c, stream) = T.S.getcharEx stream
+ in
+ if c <> #"\n" then
+ let
+ val (pos, _) = T.S.getPosAfterChar stream
+ in
+ raiseTkErrorSPos pos "expected '\n'"
+ end
+ else
+ (if start = #"\"" then LocalInc (arg, pos)
+ else ExternalInc (arg, pos), stream)
+ end handle T.S.EOF =>
+ let
+ val (pos, _) = T.S.EOFpos stream
+ in
+ raiseTkErrorSPos pos "unexpected EOF during #include argument parsing"
+ end
+
+ fun findFile arg (stream, incDirs) =
+ case arg of
+ LocalInc (arg, pos) => (
+ let
+ val dir = OS.Path.getParent o T.S.getFname $ stream
+ val path = OS.Path.concat (dir, arg)
+
+ val (path, instream) = (path, TextIO.openIn path)
+ in
+ (path, instream)
+ end handle
+ OS.Path.Path => raiseTkErrorSPos pos "invalid argument"
+ | Size => raiseTkErrorSPos pos "resulting path is too long"
+ | IO.Io v => raise IO.Io v
+ )
+ | ExternalInc (arg, pos) =>
+ let
+ fun try (dir :: tail) = (
+ let
+ val path = OS.Path.concat (dir, arg)
+ in
+ SOME (path, TextIO.openIn path)
+ end handle _ => try tail
+ )
+ | try [] = NONE
+ in
+ case try incDirs of
+ SOME pair => pair
+ | NONE => raiseTkErrorSPos pos "unable to find header"
+ end
+
+ fun handleInclude (P as { streams = head :: tail, incDirs, ... }: t) =
+ let
+ val (arg, oldHead) = parseIncludeArg head
+ val (path, instream) = findFile arg (head, incDirs)
+ val head = T.S.createFromInstream path instream
+ in
+ updatePpc P s#streams (head :: oldHead :: tail) %
+ end
+ | handleInclude _ = raise Unreachable
+
+ fun handleDefine (P as { streams = head :: _, ... }: t) =
+ let
+ fun getName stream =
+ let
+ val (tk, pos, stream) = T.getToken stream
+ in
+ case tk of
+ T.Id id => (id, pos, stream)
+ | _ => raiseTkErrorExpSPos pos [ExpId]
+ end
+
+ fun getBody stream acc =
+ let
+ val (tk, pos, stream) = T.getToken stream
+ in
+ case tk of
+ T.NewLine => (acc, stream)
+ | T.EOS => raiseTkErrorExpSPos pos [ExpTk T.NewLine]
+ | _ => getBody stream ((tk, pos) :: acc)
+ end
+
+ val (macroName, pos, head) = getName head
+ val (macroBody, head) = getBody head []
+
+ val () = H.insert (#macros P) macroName macroBody handle
+ H.Exists => raiseTkErrorSPos pos "macro redefinition"
+ in
+ updatePpc P u#streams (fn s => head :: tl s) %
+ end
+ | handleDefine _ = raise Unreachable
+
+ fun handleRegularToken tk pos P =
+ let
+ fun def () = (tk, TkPos pos, P)
+ in
+ case tk of
+ T.Id id => (
+ case H.lookup (#macros P) id of
+ NONE => def ()
+ | SOME body =>
+ let
+ val mstart = (T.MacroStart id, pos)
+ val mend = (T.MacroEnd,
+ if body = [] then pos else (#2 o hd) body)
+ val revBody = List.concat [[mend], body, [mstart]]
+ in
+ getToken $ updatePpc P
+ u#buffer (fn buf => List.revAppend (revBody, buf)) %
+ end
+ )
+ | _ => def ()
+ end
+
+ and getToken (P as { streams = [stream], EOSreached = true, ... }) =
+ let
+ val (pos, stream) = T.S.EOFpos stream
+ in
+ (T.EOS, TkPos pos, updatePpc P s#streams [stream] %)
+ end
+ | getToken (P as { buffer = (tk, pos) :: tail, ... }: t) =
+ handleRegularToken tk pos (updatePpc P s#buffer tail %)
+ | getToken (P as { streams = head :: tail, ... }: t) =
+ let
+ val (tk, pos, head) = T.getToken head
+ val directiveTable = [
+ (T.CppInclude, handleInclude),
+ (T.CppDefine, handleDefine)
+ ]
+ in
+ if tk = T.EOS then
+ case tail of
+ [] => getToken $ updatePpc P s#EOSreached true %
+ | _ => getToken $ updatePpc P s#streams tail %
+ else
+ case List.find (fn (tk', _) => tk' = tk) directiveTable of
+ SOME (_, f) => getToken o f $
+ updatePpc P s#streams (head :: tail) %
+ | NONE =>
+ let
+ val ppc = updatePpc P u#streams (fn s => head :: tl s) %
+ in
+ handleRegularToken tk pos ppc
+ end
+ end
+ | getToken _ = raise Unreachable
+
+
+ fun debugPrint' ppc shouldPrintLine (fname, line) =
+ let
+ val (tk, pos, ppc) = getToken ppc
+ val TkPos (t as (T.S.Pos (fname', line', col))) = pos
+
+ fun printLine () =
+ printOnNL $ fname' ^ ":" ^ Int.toString line' ^ "\n\t"
+
+ fun finish shouldPrintLine =
+ if tk <> T.EOS then
+ debugPrint' ppc shouldPrintLine (fname', line')
+ else
+ ()
+ in
+ case tk of
+ T.MacroStart macroName => (
+ printOnNL $ "macro " ^ macroName ^ " (" ^ T.S.pos2str t ^ ") {";
+ finish true
+ )
+ | T.MacroEnd => (printOnNL "}\n"; finish true)
+ | _ => (
+ if shouldPrintLine orelse fname' <> fname
+ orelse line' <> line
+ then
+ printLine()
+ else
+ ();
+ print $ Int.toString col ^ ":";
+ T.printToken tk;
+ print " ";
+ finish false
+ )
+ end
+
+ fun debugPrint fname incDirs =
+ let
+ val ppc = create { fname, incDirs }
+ in
+ debugPrint' ppc true ("", 0);
+ print "\n"
+ end
+end
diff --git a/ppc.sig b/ppc.sig
new file mode 100644
index 0000000..10b90d4
--- /dev/null
+++ b/ppc.sig
@@ -0,0 +1,31 @@
+signature PPC = sig
+
+ structure T: TOKENIZER
+
+ type t
+ type tkErrorVal
+ type tkErrorExpVal
+ type tkPos
+
+ exception TkError of tkErrorVal
+ exception TkErrorExp of tkErrorExpVal
+
+ datatype expTk =
+ ExpTk of T.token |
+ ExpId |
+ ExpConst |
+ ExpUnop |
+ ExpBinop |
+ ExpOp
+
+
+ val create: { fname: string, incDirs: string list } -> t
+ val debugPrint: string -> string list -> unit
+
+
+ val raiseTkError: string -> tkPos -> 'a
+ val tkErrorPrint: tkErrorVal -> unit
+
+ val raiseTkErrorExp: tkPos -> expTk list -> 'a
+ val tkErrorExpPrint: tkErrorExpVal -> unit
+end
diff --git a/stream.sig b/stream.sig
index 1307a60..7925285 100644
--- a/stream.sig
+++ b/stream.sig
@@ -1,49 +1,33 @@
signature STREAM = sig
type fileId = int
type fileOffset = int
- type pos = fileId * fileOffset
- type ppos (* pretty pos *)
- type pposCache
+
+ datatype pos = Pos of string * int * int
type t
- type fileInfo = fileId * string * string
- val convert: t -> fileInfo
+ exception EOF
- val ppos2str: ppos -> string
+ val pos2str: pos -> string
val getchar: t -> char option * t
+ val getcharEx: t -> char * t (* throws EOF *)
(* Will throw UngetcError, if applied at the beginning of the stream.
* Can be always avoided, so is not provided in sig file *)
val ungetc: t -> t
- val getPos: t -> pos
- val getPosAfterCharRead: t -> pos
-
- (* pos must come from t, see pos2pposWithFI *)
- val pos2ppos: pos -> t -> ppos
- (* #id pos must be equal to fileId of fileInfo,
- * otherwise InvalidFileInfo is thrown *)
- val pos2pposWithFI: pos -> fileInfo -> ppos
-
- (* Assumed to be called once for given pos, so will throw Unreachable on
- * second call *)
- val pposWithoutCol: ppos -> ppos
+ val EOFpos: t -> pos * t
val getSubstr: fileOffset -> fileOffset -> t -> string
val getFname: t -> string
- val isFirstOnLine: pos -> t -> bool
(* both throw IO.Io *)
val create: string -> t
val createFromInstream: string -> TextIO.instream -> t
- val pposCacheInit: fileInfo -> pposCache
-
- val pposCacheGetId: pposCache -> fileId
- val pposCacheGetLine: pposCache -> int
- val pposCacheGetFname: pposCache -> string
-
- val pposCacheAdvance: pos -> pposCache -> (int * int) * pposCache
+ val getOffset: t -> fileOffset
+ val getPosRaw: fileOffset -> t -> pos * t
+ val getPos: t -> pos * t
+ val getPosAfterChar: t -> pos * t
end
diff --git a/stream.sml b/stream.sml
index 4134293..c41eae7 100644
--- a/stream.sml
+++ b/stream.sml
@@ -1,82 +1,57 @@
structure Stream :> STREAM = struct
type fileId = int
type fileOffset = int
- type fileInfo = fileId * string * string
- type t = fileId * string * fileOffset * string
+ datatype pos = Pos of string * int * int
- type pos = fileId * fileOffset
- type ppos = string * int * int option
+ type t = {
+ id: fileOffset,
+ fname: string,
+ off: fileOffset,
+ contents: string,
- type pposCache =
- { id: fileId, fname: string, contents: string,
- offset: fileOffset, line: int, col: int }
+ (* offset * line * col *)
+ cache: fileOffset * int * int
+ }
+ exception EOF
exception UngetcError
- exception InvalidFileInfo
- fun ppos2str (pos, line, col) =
+ val updateStream = fn z =>
+ let
+ fun from id fname off contents cache =
+ { id, fname, off, contents, cache }
+ fun to f { id, fname, off, contents, cache } =
+ f id fname off contents cache
+ in
+ FRU.makeUpdate5 (from, from, to)
+ end
+ z
+
+ fun pos2str (Pos (pos, line, col)) =
let
val % = Int.toString
in
- case col of
- SOME col => pos ^ ":" ^ %line ^ ":" ^ %col
- | NONE => pos ^ ":" ^ %line
+ pos ^ ":" ^ %line ^ ":" ^ %col
end
- fun convert (fid, fname, _, contents) = (fid, fname, contents)
-
- fun calcFilePos (startOff, startPos) contents destOff =
- let
- fun calc offset (line, col) =
- if offset = destOff then
- (line, col)
- else
- calc (offset + 1) (if String.sub (contents, offset) = #"\n"
- then (line + 1, 1) else (line, col + 1))
- in
- calc startOff startPos
- end
+ fun getcharSure (S as { contents, off, ... }: t) =
+ (String.sub (contents, off), updateStream S s#off (off + 1) %)
- val calcFilePosFromStart = calcFilePos (0, (1, 1))
+ fun getchar stream =
+ (fn (c, s) => (SOME c, s)) $ getcharSure stream handle
+ Subscript => (NONE, stream)
- fun getchar (S as (fid, fname, off, contents)) =
- if off < String.size contents then
- (SOME $ String.sub (contents, off), (fid, fname, off + 1, contents))
- else
- (NONE, S)
+ fun getcharEx stream = getcharSure stream handle Subscript => raise EOF
- fun ungetc (fid, fname, off, contents) =
- if off = 0 then
+ fun ungetc ({ off = 0, ... }: t) =
raise UngetcError
- else
- (fid, fname, off - 1, contents)
-
- fun getPosAfterCharRead (fid, _, off, _) = (fid, off - 1)
-
- fun pos2pposWithFI (id, pos) (id', fname, contents) =
- if id <> id' then
- raise InvalidFileInfo
- else
- let
- val (line, col) = calcFilePosFromStart contents pos
- in
- (fname, line, SOME col)
- end
-
- fun pos2ppos pos stream = pos2pposWithFI pos (convert stream)
+ | ungetc stream = updateStream stream u#off (fn off => off - 1) %
- fun pposWithoutCol (fname, line, SOME _) = (fname, line, NONE)
- | pposWithoutCol (_, _, NONE) = raise Unreachable
-
- fun getPos (id, _, off, _) = (id, off)
-
- fun getSubstr startOff endOff (_, _, _, contents) =
+ fun getSubstr startOff endOff ({ contents, ... }: t) =
String.substring (contents, startOff, endOff - startOff)
- fun getFname (stream: t) = #2 stream
-
- val lastUsedId = ref ~1
+ fun getFname ({ fname, ... }: t) = fname
fun createFromInstream fname instream =
let
@@ -84,47 +59,39 @@ structure Stream :> STREAM = struct
val contents = inputAll instream
val () = closeIn instream
in
- lastUsedId := !lastUsedId + 1;
- (!lastUsedId, fname, 0, contents)
+ { id = 0, fname, off = 0, contents, cache = (0, 1, 1) }
end
fun create fname = createFromInstream fname (TextIO.openIn fname)
- fun isFirstOnLine (_, offset) ((_, _, _, contents) : t) =
- let
- fun returnToNL ~1 = true
- | returnToNL offset =
- let
- val chr = String.sub (contents, offset)
- in
- if chr = #"\n" then
- true
- else if Char.isSpace chr then
- returnToNL (offset - 1)
- else
- false
- end
- in
- returnToNL (offset - 1)
- end
+ fun getOffset ({ off, ... }: t) = off
+
+ fun getPosRaw off (S as { cache = (prevOff, line, col), fname,
+ contents, ... }: t) =
+ let
+ (*
+ val () = printLn $ "raw: " ^ Int.toString off ^ ", " ^ Int.toString prevOff
+ *)
+
+ fun calcPos curOff (line, col) =
+ if curOff = off then
+ (line, col)
+ else
+ calcPos (curOff + 1)
+ (if String.sub (contents,curOff) = #"\n" then (line + 1, 1)
+ else (line, col + 1))
+ val (line, col) = calcPos prevOff (line, col)
+ in
+ assert $ off >= prevOff;
+ (Pos (fname, line, col), updateStream S s#cache (off, line, col) %)
+ end
+
+ fun getPos (S as { off, ... }: t) =
+ getPosRaw off S
+
+ fun EOFpos (S as { contents, ... }: t) =
+ getPosRaw (String.size contents) S
- fun pposCacheInit (id, fname, contents) =
- { id, fname, contents, offset = 0, line = 1, col = 1 }
-
- fun pposCacheAdvance (id, pos) (cache: pposCache) =
- if id <> #id cache then
- raise Unreachable
- else
- let
- fun ` f = f cache
- val p as (line, col) = calcFilePos (` #offset, (` #line, ` #col))
- (` #contents) pos
- in
- (p, { id = ` #id, fname = ` #fname, contents = ` #contents,
- offset = pos, line, col })
- end
-
- fun pposCacheGetId (cache: pposCache) = #id cache
- fun pposCacheGetLine (cache: pposCache) = #line cache
- fun pposCacheGetFname (cache: pposCache) = #fname cache
+ fun getPosAfterChar stream =
+ getPosRaw (getOffset stream -1) stream
end
diff --git a/tokenizer.fun b/tokenizer.fun
index 4cb5d1d..5162308 100644
--- a/tokenizer.fun
+++ b/tokenizer.fun
@@ -16,6 +16,8 @@ struct
Invalid |
EOS |
NewLine |
+ MacroStart of string |
+ MacroEnd |
Num of numConst |
@@ -125,15 +127,10 @@ struct
CppError |
CppPragma
- val kwPrefix = #"`"
- val cppPrefix = #"$"
-
- type fullToken = S.pos * token
-
datatype tkErrorAuxInfo = TkiEOF | TkiDx of int | TkiStart
exception TkError of tkErrorAuxInfo * string
- exception TkErrorAug of S.ppos * string
+ exception TkErrorAug of S.pos * string
exception ExpectedCppDir (* handled in postprocess *)
@@ -143,13 +140,19 @@ struct
exception TokenWithoutRepr
exception SuffixWithoutRepr
+ val kwPrefix = #"`"
+ val cppPrefix = #"$"
+ val otherPrefix = #"@"
+
val tokenRepr =
let
fun & repr = str kwPrefix ^ repr
fun % repr = str cppPrefix ^ repr
in
[
- (NewLine, "NewLine"),
+ (NewLine, "@NewLine"),
+ (EOS, "@EOS"),
+ (MacroEnd, "@Mend"),
(kwBreak, &"break"),
(kwCase, &"case"),
@@ -280,6 +283,7 @@ struct
val token2str = fn
Id s => s
+ | MacroStart macro => "m(" ^ macro ^ ")"
| Num (IntConst (it, str, sfx)) =>
let
val intType =
@@ -348,12 +352,12 @@ struct
val () = List.app
(fn (_, repr) => update (lookupTable, firstChr repr, true))
$ List.filter
- (fn (tk, repr) =>
+ (fn (_, repr) =>
let
val c = String.sub (repr, 0)
in
c <> kwPrefix andalso c <> cppPrefix
- andalso tk <> NewLine
+ andalso c <> otherPrefix
end)
tokenRepr
@@ -437,7 +441,7 @@ struct
fun fsmEat stream =
let
open Array
- val pos = S.getPos stream
+ val (pos, stream) = S.getPos stream
fun get curState stream =
let
@@ -456,14 +460,15 @@ struct
get nextState stream
end
end
+ val (tk, stream) = get 0 stream
in
- (fn (tk, stream) => ((pos, tk), stream)) $ get 0 stream
+ (tk, pos, stream)
end
fun tkError2aug stream (dx, msg) =
let
- val (id, pos) = S.getPosAfterCharRead stream
- val pos = S.pos2ppos (id, pos + dx) stream
+ val off = S.getOffset stream - 1 + dx
+ val (pos, _) = S.getPosRaw off stream
in
TkErrorAug (pos, msg)
end
@@ -471,22 +476,20 @@ struct
fun parserWrapper stream parser acc =
let
val stream = S.ungetc stream
- val P as (_, startOff) = S.getPos stream
- fun parse' stream acc = let
+ val startOff = S.getOffset stream
+ val (pos, stream) = S.getPos stream
+
+ fun parse' stream acc =
+ let
val (c, stream) = S.getchar stream
val (acc, tk, stream) = parser acc (stream, startOff) c handle
TkError (TkiDx dx, msg) => raise tkError2aug stream (dx, msg)
- | TkError (TkiStart, msg) =>
- let
- val startPos = S.pos2ppos P stream
- in
- raise TkErrorAug (startPos, msg)
- end
+ | TkError (TkiStart, msg) => raise TkErrorAug (pos, msg)
| TkError (TkiEOF, msg) =>
let
- val pos = S.pposWithoutCol $ S.pos2ppos P stream
+ val (pos, _) = S.EOFpos stream
in
raise TkErrorAug (pos, msg)
end
@@ -498,16 +501,11 @@ struct
val (tk, stream) = parse' stream acc
in
- ((P, tk): fullToken, stream)
+ (tk, pos, stream)
end
fun finishSeqRead startOff stream =
- let
- val (_, endOff) = S.getPos stream
- val s = S.getSubstr startOff endOff stream
- in
- s
- end
+ S.getSubstr startOff (S.getOffset stream) stream
fun keywordHashtableGen () =
let
@@ -871,19 +869,14 @@ struct
| formCppDir kwIf = CppIf
| formCppDir _ = raise ExpectedCppDir
- fun handleCppDir tk prevPos stream =
- let
- val (pos, tk') = tk
- in
- (prevPos, formCppDir tk') handle
+ fun handleCppDir (pos, tk) =
+ formCppDir tk handle
ExpectedCppDir =>
- raise TkErrorAug (S.pos2ppos pos stream,
- "expected preprocessor directive")
- end
+ raise TkErrorAug (pos, "expected preprocessor directive")
fun unexpectedCharRaise stream c =
let
- val pos = S.pos2ppos (S.getPosAfterCharRead stream) stream
+ val (pos, _) = S.getPosAfterChar stream
val repr =
if isPrintable c then
str c
@@ -899,13 +892,8 @@ struct
let
val (c, stream) =
case S.getchar stream of
- (NONE, _) =>
- let
- val pos = S.pos2ppos pos stream
- in
- raise TkErrorAug (pos, "unfinished comment")
- end
- | (SOME c, stream) => (c, stream)
+ (NONE, _) => raise TkErrorAug (pos, "unfinished comment")
+ | (SOME c, stream) => (c, stream)
in
if prevIsAsterisk andalso c = #"/" then
stream
@@ -922,8 +910,7 @@ struct
val raiseErr = fn () =>
let
- val pos = S.getPosAfterCharRead stream
- val pos = S.pos2ppos pos stream
+ val (pos, _) = S.getPosAfterChar stream
in
raise TkErrorAug (pos, "expected \\n after backslash")
end
@@ -939,40 +926,39 @@ struct
fun processSymbol stream =
let
- val (T as (p, tk), stream) = fsmEat $ S.ungetc stream
+ val (tk, pos, stream) = fsmEat $ S.ungetc stream
+ val S.Pos (_, _, col) = pos
in
case tk of
- CommentStart => getToken $ skipComment stream p
- | DoubleDot => (SOME (p, Dot), S.ungetc stream)
+ CommentStart => getToken $ skipComment stream pos
+ | DoubleDot => (Dot, pos, S.ungetc stream)
| Hash =>
- if S.isFirstOnLine p stream then
+ if col = 1 then
let
- val (tk, stream) = getToken stream
+ val (tk, pos', stream) = getToken stream
in
- case tk of
- NONE =>
- raise TkErrorAug (S.pos2ppos p stream,
- "unfinished preprecessor directive")
- | SOME tk =>
- (SOME $ handleCppDir tk p stream, stream)
+ if tk = EOS then
+ raise TkErrorAug (pos, "unfinished preprecessor directive")
+ else
+ (handleCppDir (pos', tk), pos, stream)
end
else
- (SOME T, stream)
- | _ => (SOME T, stream)
+ (tk, pos, stream)
+ | _ => (tk, pos, stream)
end
and getToken stream =
let
val (c, stream) = S.getchar stream
- fun @-> parser acc =
- (fn (tk, s) => (SOME tk, s)) $ parserWrapper stream parser acc
+ fun conv tk (pos, stream) = (tk, pos, stream)
+ fun @-> parser acc = parserWrapper stream parser acc
in
case c of
- NONE => (NONE, stream)
+ NONE => conv EOS $ S.EOFpos stream
| SOME c =>
if c = #"\n" then
- (SOME (S.getPosAfterCharRead stream, NewLine), stream)
+ conv NewLine $ S.getPosAfterChar stream
else if Char.isSpace c then
getToken stream
else if isIdStart c then
@@ -991,25 +977,12 @@ struct
unexpectedCharRaise stream c
end
- fun tokenize stream =
- let
- fun aux acc stream =
- let
- val (tk, stream) = getToken stream
- in
- case tk of
- NONE => rev acc
- | SOME tk => aux (tk :: acc) stream
- end
- in
- aux [] stream
- end
-
+ (* TODO: remove *)
fun debugPrint tkl fname =
let
- fun print' line _ ((_, NewLine) :: tks) =
+ fun print' line _ ((NewLine, _) :: tks) =
print' (line + 1) true tks
- | print' line firstOnLine ((_, tk) :: tks) = (
+ | print' line firstOnLine ((tk, _) :: tks) = (
if firstOnLine then (
print "\n";
printLn $ fname ^ ":" ^ Int.toString line;
diff --git a/tokenizer.sig b/tokenizer.sig
index d34f934..7ea6f63 100644
--- a/tokenizer.sig
+++ b/tokenizer.sig
@@ -13,6 +13,8 @@ signature TOKENIZER = sig
Invalid |
EOS |
NewLine |
+ MacroStart of string |
+ MacroEnd |
Num of numConst |
@@ -122,17 +124,14 @@ signature TOKENIZER = sig
CppError |
CppPragma
- type fullToken = S.pos * token
-
(* Fatal. both may be thrown by tokenize *)
exception FsmTableIsTooSmall
- exception TkErrorAug of S.ppos * string
+ exception TkErrorAug of S.pos * string
- val getToken: S.t -> fullToken option * S.t
+ val getToken: S.t -> token * S.pos * S.t
- val tokenize: S.t -> fullToken list
val token2str: token -> string
val printToken: token -> unit
- val debugPrint: fullToken list -> string -> unit
+ val debugPrint: (token * S.pos) list -> string -> unit
end