diff options
-rw-r--r-- | .gitignore | 4 | ||||
-rw-r--r-- | ccross.mlb | 9 | ||||
-rw-r--r-- | ccross.sig | 4 | ||||
-rw-r--r-- | ccross.sml | 14 | ||||
-rw-r--r-- | cpp.fun (renamed from cpp.sml) | 26 | ||||
-rw-r--r-- | cpp.sig | 6 | ||||
-rw-r--r-- | exn_handler.fun (renamed from exn_handler.sml) | 16 | ||||
-rw-r--r-- | exn_handler.sig | 3 | ||||
-rw-r--r-- | tokenizer.fun (renamed from tokenizer.sml) | 97 | ||||
-rw-r--r-- | tokenizer.sig | 10 |
10 files changed, 107 insertions, 82 deletions
@@ -1,5 +1,5 @@ -test* +test ccross -todo* +doc/todo.txt mlmon.out* *dot @@ -13,11 +13,14 @@ in hashtable.sml tokenizer.sig - tokenizer.sml + tokenizer.fun cpp.sig - cpp.sml + cpp.fun - exn_handler.sml + exn_handler.sig + exn_handler.fun + + ccross.sig ccross.sml end diff --git a/ccross.sig b/ccross.sig new file mode 100644 index 0000000..3bfaf9e --- /dev/null +++ b/ccross.sig @@ -0,0 +1,4 @@ +signature CCROSS = sig + structure P: CPP + structure ExnHandler: EXN_HANDLER +end @@ -1,4 +1,16 @@ -fun main [fname] = Cpp.debugPrint fname +structure ccross:> CCROSS = struct + structure T:> TOKENIZER = + Tokenizer(structure H = Hashtable; structure S = Stream) + + structure P:> CPP = Cpp(T) + + structure ExnHandler:> EXN_HANDLER = + ExnHandler(structure T = T; structure P = P) +end + +val () = MLton.Exn.setTopLevelHandler ccross.ExnHandler.handler + +fun main [fname] = ccross.P.debugPrint fname | main _ = printLn "Expected a single argument: file name" val () = main $ CommandLine.arguments () @@ -1,12 +1,14 @@ -structure Cpp:> CPP = struct - type tkPos = Stream.pos +functor Cpp(T: TOKENIZER): CPP = struct + structure T = T + + type tkPos = T.S.pos type t = - { streams: Stream.t list, fileInfo: Stream.fileInfo list, - lastPos: tkPos option, firstId: Stream.fileId }; + { streams: T.S.t list, fileInfo: T.S.fileInfo list, + lastPos: tkPos option, firstId: T.S.fileId }; datatype tkExp = - Tk of Tokenizer.token | + Tk of T.token | Id | NumConst | StrLiteral | @@ -21,8 +23,8 @@ structure Cpp:> CPP = struct fun create fname = let - val stream = Stream.create fname - val info = Stream.convert stream + val stream = T.S.create fname + val info = T.S.convert stream in { streams = [stream] , fileInfo = [info], lastPos = NONE, firstId = #1 info } end @@ -30,7 +32,7 @@ structure Cpp:> CPP = struct fun getToken ({ streams = stream :: tail, fileInfo, lastPos, firstId }: t) = let - val (tk, stream) = Tokenizer.getToken stream + val (tk, stream) = T.getToken stream in case tk of NONE => getToken { streams = tail, fileInfo, lastPos, firstId } @@ -43,17 +45,17 @@ structure Cpp:> CPP = struct let val pos = SOME (#1 lastPos, ~1) (* EOF *) in - (Tokenizer.EOS, {streams = [], fileInfo, lastPos = pos, firstId }) + (T.EOS, {streams = [], fileInfo, lastPos = pos, firstId }) end | getToken { streams = [], fileInfo, lastPos = NONE, firstId } = - (Tokenizer.EOS, { streams = [], fileInfo, + (T.EOS, { streams = [], fileInfo, lastPos = SOME (firstId, ~1), firstId }) fun getLastPos ({ lastPos = NONE, ... }: t) = raise Unreachable | getLastPos { lastPos = SOME p, ... } = p val tkExp2str = fn - (Tk tk) => Tokenizer.token2str tk + (Tk tk) => T.token2str tk | Id: tkExp => "identifier" | NumConst => "numeric constant" | StrLiteral => "string literal" @@ -67,7 +69,7 @@ structure Cpp:> CPP = struct case List.find (fn (id', _, _) => id' = id) $ #fileInfo stream of NONE => raise StreamTooOld | SOME fileInfo => fileInfo - val pos = Stream.ppos2str $ Stream.pos2pposWithFI (id, pos) fileInfo + val pos = T.S.ppos2str $ T.S.pos2pposWithFI (id, pos) fileInfo in raise TkExpected (pos, expList) end @@ -1,4 +1,6 @@ signature CPP = sig + structure T: TOKENIZER + type t type tkPos type tkExpectedValue @@ -6,7 +8,7 @@ signature CPP = sig exception TkExpected of tkExpectedValue datatype tkExp = - Tk of Tokenizer.token | + Tk of T.token | Id | NumConst | StrLiteral | @@ -15,7 +17,7 @@ signature CPP = sig Op val create: string -> t - val getToken: t -> Tokenizer.token * t + val getToken: t -> T.token * t val getLastPos: t -> tkPos val prepAndRaise: t -> tkPos -> tkExp list -> 'a diff --git a/exn_handler.sml b/exn_handler.fun index 50fb8dc..6e069d6 100644 --- a/exn_handler.sml +++ b/exn_handler.fun @@ -1,4 +1,6 @@ -structure GlobalExnHandler: sig val handler: exn -> unit end = struct +functor ExnHandler(structure T: TOKENIZER; structure P: CPP): + EXN_HANDLER = +struct fun eprint s = printLn $ "error: " ^ s @@ -29,18 +31,12 @@ structure GlobalExnHandler: sig val handler: exn -> unit end = struct | ioExn _ = (printLn "ioExn: unreachable"; exit 254) fun handler e = - let - open Tokenizer Cpp - in (case e of - FsmTableIsTooSmall => + T.FsmTableIsTooSmall => eprint "fsm table is too small. Increate 'maxState' value" | IO.Io _ => ioExn e - | TkErrorAug (pos, msg) => eprint $ Stream.ppos2str pos ^ ": " ^ msg - | TkExpected v => tkExpectedPrint v + | T.TkErrorAug (pos, msg) => eprint $ T.S.ppos2str pos ^ ": " ^ msg + | P.TkExpected v => P.tkExpectedPrint v | _ => otherExn e; exit 255) - end end - -val () = MLton.Exn.setTopLevelHandler GlobalExnHandler.handler diff --git a/exn_handler.sig b/exn_handler.sig new file mode 100644 index 0000000..ac5c574 --- /dev/null +++ b/exn_handler.sig @@ -0,0 +1,3 @@ +signature EXN_HANDLER = sig + val handler: exn -> unit +end diff --git a/tokenizer.sml b/tokenizer.fun index 10c2e77..5cca203 100644 --- a/tokenizer.sml +++ b/tokenizer.fun @@ -1,4 +1,8 @@ -structure Tokenizer:> TOKENIZER = struct +functor Tokenizer(structure H: HASHTABLE; structure S: STREAM) + : TOKENIZER = +struct + + structure S = S datatype intType = ItDec | ItOct | ItHex datatype intSfx = IsNone | IsU | IsL | IsUL | IsLL | IsULL @@ -120,12 +124,12 @@ structure Tokenizer:> TOKENIZER = struct val kwPrefix = #"@" val cppPrefix = #"$" - type fullToken = Stream.pos * token + type fullToken = S.pos * token datatype tkErrorAuxInfo = TkiEOF | TkiDx of int | TkiStart exception TkError of tkErrorAuxInfo * string - exception TkErrorAug of Stream.ppos * string + exception TkErrorAug of S.ppos * string exception ExpectedCppDir (* handled in postprocess *) @@ -425,11 +429,11 @@ structure Tokenizer:> TOKENIZER = struct fun fsmEat stream = let open Array - val pos = Stream.getPos stream + val pos = S.getPos stream fun get curState stream = let - val (c, stream) = Stream.getchar stream + val (c, stream) = S.getchar stream in case c of NONE => (#1 $ sub (#2 $ fsmTable (), curState), stream) @@ -439,7 +443,7 @@ structure Tokenizer:> TOKENIZER = struct val nextState = sub (row, ord c) in if nextState = ~1 then - (tk, Stream.ungetc stream) + (tk, S.ungetc stream) else get nextState stream end @@ -450,32 +454,31 @@ structure Tokenizer:> TOKENIZER = struct fun tkError2aug stream (dx, msg) = let - val (id, pos) = Stream.getPosAfterCharRead stream - val pos = Stream.pos2ppos (id, pos + dx) stream + val (id, pos) = S.getPosAfterCharRead stream + val pos = S.pos2ppos (id, pos + dx) stream in TkErrorAug (pos, msg) end - fun parseGeneric stream parser acc = + fun parserWrapper stream parser acc = let - val stream = Stream.ungetc stream - val P as (_, startOff) = Stream.getPos stream + val stream = S.ungetc stream + val P as (_, startOff) = S.getPos stream fun parse' stream acc = let - val (c, stream) = Stream.getchar stream + val (c, stream) = S.getchar stream val (acc, tk, stream) = parser acc (stream, startOff) c handle TkError (TkiDx dx, msg) => raise tkError2aug stream (dx, msg) | TkError (TkiStart, msg) => let - val startPos = Stream.pos2ppos P stream + val startPos = S.pos2ppos P stream in raise TkErrorAug (startPos, msg) end | TkError (TkiEOF, msg) => let - open Stream - val pos = pposWithoutCol $ pos2ppos P stream + val pos = S.pposWithoutCol $ S.pos2ppos P stream in raise TkErrorAug (pos, msg) end @@ -492,21 +495,20 @@ structure Tokenizer:> TOKENIZER = struct fun finishSeqRead startOff stream = let - val (_, endOff) = Stream.getPos stream - val s = Stream.getSubstr startOff endOff stream + val (_, endOff) = S.getPos stream + val s = S.getSubstr startOff endOff stream in s end fun keywordHashtableGen () = let - open Hashtable - val table = create 128 + val table = H.create 128 val () = List.app (fn (tk, repr) => if String.sub (repr, 0) = kwPrefix then - insert table (String.extract (repr, 1, NONE)) tk + H.insert table (String.extract (repr, 1, NONE)) tk else ()) tokenRepr @@ -517,7 +519,7 @@ structure Tokenizer:> TOKENIZER = struct val keywordHashtable = lazy keywordHashtableGen fun findKeyword str = - case Hashtable.lookup (keywordHashtable ()) str of + case H.lookup (keywordHashtable ()) str of NONE => Id str | SOME tk => tk @@ -537,7 +539,7 @@ structure Tokenizer:> TOKENIZER = struct if isIdBody c then ((), NONE, stream) else - finalize (Stream.ungetc stream) + finalize (S.ungetc stream) end datatype intMode = ImDec | ImOct | ImInvalidOct | ImHex @@ -547,7 +549,7 @@ structure Tokenizer:> TOKENIZER = struct fun getLongestSeq acc pred stream = let - val (c, stream') = Stream.getchar stream + val (c, stream') = S.getchar stream in if isSome c andalso pred $ valOf c then getLongestSeq (valOf c :: acc) pred stream' @@ -588,7 +590,7 @@ structure Tokenizer:> TOKENIZER = struct fun numParser NpInit (stream, _) (SOME c) = if c = #"0" then let - val (c, stream') = Stream.getchar stream + val (c, stream') = S.getchar stream in case c of NONE => @@ -617,7 +619,7 @@ structure Tokenizer:> TOKENIZER = struct fun finish () = let val () = checkAndRaise ImInvalidOct "invalid octal constant" - val stream = Stream.ungetc stream + val stream = S.ungetc stream val str = finishSeqRead (startOff + offset) stream val (sfx, stream) = getIntSuffix stream in @@ -640,9 +642,9 @@ structure Tokenizer:> TOKENIZER = struct end | numParser (FloatMode FmDot) (stream, startOff) _ = let - val (len, stream) = skipDigitSeq 0 $ Stream.ungetc stream + val (len, stream) = skipDigitSeq 0 $ S.ungetc stream - val (c, stream') = Stream.getchar stream + val (c, stream') = S.getchar stream fun finish () = let @@ -666,7 +668,7 @@ structure Tokenizer:> TOKENIZER = struct if c = NONE then raise TkError (TkiDx 0, "expected digit") else if valOf c <> #"+" andalso valOf c <> #"-" then - (0, Stream.ungetc stream) + (0, S.ungetc stream) else (~1, stream) @@ -694,7 +696,7 @@ structure Tokenizer:> TOKENIZER = struct (SOME $ chr acc, stream) else let - val (c, stream) = Stream.getchar stream + val (c, stream) = S.getchar stream in case c of NONE => (SOME $ chr acc, stream) @@ -702,7 +704,7 @@ structure Tokenizer:> TOKENIZER = struct if isOctal c then follow stream (acc * 8 + chrIntVal c) (count + 1) else - (SOME $ chr acc, Stream.ungetc stream) + (SOME $ chr acc, S.ungetc stream) end in follow stream (chrIntVal c) 1 @@ -712,7 +714,7 @@ structure Tokenizer:> TOKENIZER = struct let fun follow stream acc count = let - val (c, stream) = Stream.getchar stream + val (c, stream) = S.getchar stream val noHex = TkError (TkiDx 0, "\\x without hex digits") in @@ -732,7 +734,7 @@ structure Tokenizer:> TOKENIZER = struct if count = 0 then raise noHex else - (SOME $ chr acc, Stream.ungetc stream) + (SOME $ chr acc, S.ungetc stream) end in follow stream 0 0 @@ -742,7 +744,7 @@ structure Tokenizer:> TOKENIZER = struct let fun raiseErr0 msg = raise TkError (TkiDx 0, msg) - val (c, stream) = Stream.getchar stream + val (c, stream) = S.getchar stream val c = case c of NONE => raiseErr0 "unfinished escape sequence" @@ -866,14 +868,13 @@ structure Tokenizer:> TOKENIZER = struct in (prevPos, formCppDir tk') handle ExpectedCppDir => - raise TkErrorAug (Stream.pos2ppos pos stream, + raise TkErrorAug (S.pos2ppos pos stream, "expected preprocessor directive") end fun unexpectedCharRaise stream c = let - open Stream - val pos = pos2ppos (getPosAfterCharRead stream) stream + val pos = S.pos2ppos (S.getPosAfterCharRead stream) stream val repr = if isPrintable c then str c @@ -888,10 +889,10 @@ structure Tokenizer:> TOKENIZER = struct fun skip prevIsAsterisk stream = let val (c, stream) = - case Stream.getchar stream of + case S.getchar stream of (NONE, _) => let - val pos = Stream.pos2ppos pos stream + val pos = S.pos2ppos pos stream in raise TkErrorAug (pos, "unfinished comment") end @@ -908,12 +909,12 @@ structure Tokenizer:> TOKENIZER = struct fun handleBackslash stream = let - val (c, stream) = Stream.getchar stream + val (c, stream) = S.getchar stream val raiseErr = fn () => let - val pos = Stream.getPosAfterCharRead stream - val pos = Stream.pos2ppos pos stream + val pos = S.getPosAfterCharRead stream + val pos = S.pos2ppos pos stream in raise TkErrorAug (pos, "expected \\n after backslash") end @@ -929,19 +930,19 @@ structure Tokenizer:> TOKENIZER = struct fun processSymbol stream = let - val (T as (p, tk), stream) = fsmEat $ Stream.ungetc stream + val (T as (p, tk), stream) = fsmEat $ S.ungetc stream in case tk of CommentStart => getToken $ skipComment stream p - | DoubleDot => (SOME (p, Dot), Stream.ungetc stream) + | DoubleDot => (SOME (p, Dot), S.ungetc stream) | Hash => - if Stream.isFirstOnLine p stream then + if S.isFirstOnLine p stream then let val (tk, stream) = getToken stream in case tk of NONE => - raise TkErrorAug (Stream.pos2ppos p stream, + raise TkErrorAug (S.pos2ppos p stream, "unfinished preprecessor directive") | SOME tk => (SOME $ handleCppDir tk p stream, stream) @@ -953,16 +954,16 @@ structure Tokenizer:> TOKENIZER = struct and getToken stream = let - val (c, stream) = Stream.getchar stream + val (c, stream) = S.getchar stream fun @-> parser acc = - (fn (tk, s) => (SOME tk, s)) $ parseGeneric stream parser acc + (fn (tk, s) => (SOME tk, s)) $ parserWrapper stream parser acc in case c of NONE => (NONE, stream) | SOME c => if c = #"\n" then - (SOME (Stream.getPosAfterCharRead stream, NewLine), stream) + (SOME (S.getPosAfterCharRead stream, NewLine), stream) else if Char.isSpace c then getToken stream else if isIdStart c then diff --git a/tokenizer.sig b/tokenizer.sig index 245cfbe..c9626e0 100644 --- a/tokenizer.sig +++ b/tokenizer.sig @@ -1,4 +1,6 @@ signature TOKENIZER = sig + structure S: STREAM + datatype intType = ItDec | ItOct | ItHex datatype intSfx = IsNone | IsU | IsL | IsUL | IsLL | IsULL datatype floatSfx = FsNone | FsF | FsL @@ -116,15 +118,15 @@ signature TOKENIZER = sig CppError | CppPragma - type fullToken = Stream.pos * token + type fullToken = S.pos * token (* Fatal. both may be thrown by tokenize *) exception FsmTableIsTooSmall - exception TkErrorAug of Stream.ppos * string + exception TkErrorAug of S.ppos * string - val getToken: Stream.t -> fullToken option * Stream.t + val getToken: S.t -> fullToken option * S.t - val tokenize: Stream.t -> fullToken list + val tokenize: S.t -> fullToken list val token2str: token -> string val printToken: token -> unit |