summaryrefslogtreecommitdiff
path: root/cpp.sml
diff options
context:
space:
mode:
Diffstat (limited to 'cpp.sml')
-rw-r--r--cpp.sml1023
1 files changed, 4 insertions, 1019 deletions
diff --git a/cpp.sml b/cpp.sml
index cb04406..32fcd3c 100644
--- a/cpp.sml
+++ b/cpp.sml
@@ -1,1025 +1,10 @@
-fun $ (x, y) = x y
-infixr 0 $
-
-fun printLn s = (print s; print "\n")
-
-datatype includeArg = IARel of string | IAFromRef of string
-
-datatype token =
- Invalid |
- Number of string |
- Id of string |
- CharConst of string * int |
- StringConst of string |
-
- kwBreak |
- kwCase |
- kwChar |
- kwConst |
- kwContinue |
- kwDefault |
- kwDouble |
- kwElse |
- kwEnum |
- kwExtern |
- kwFloat |
- kwFor |
- kwGoto |
- kwInt |
- kwLong |
- kwRegister |
- kwReturn |
- kwShort |
- kwSigned |
- kwSizeof |
- kwStruct |
- kwSwitch |
- kwTypedef |
- kwUnion |
- kwUnsigned |
- kwVoid |
- kwVolatile |
-
- LParen |
- RParen |
- LBracket |
- RBracket |
- LBrace |
- RBrace |
-
- QuestionMark |
- Colon |
- Coma |
- Semicolon |
-
- Arrow |
- Plus |
- DoublePlus|
- Minus |
- DoubleMinus |
- Ampersand |
- Asterisk |
- Slash |
- Tilde |
- ExclMark |
- Percent |
- DoubleGreater |
- DoubleLess |
- Greater |
- Less |
- EqualSign |
- LessEqualSign |
- GreaterEqualSign |
- DoubleEqualSign |
- ExclMarkEqualSign |
- Cap |
- VerticalBar |
- DoubleAmpersand |
- DoubleVerticalBar |
-
- AsteriskEqualSign |
- SlashEqualSign |
- PercentEqualSign |
- PlusEqualSign |
- MinusEqualSign |
- DoubleLessEqualSign |
- DoubleGreaterEqualSign |
- AmpersandEqualSign |
- CapEqualSign |
- VerticalBarEqualSign |
-
- Hash |
- DoubleHash |
-
- Dot |
- DoubleDot |
- TripleDot |
-
- CommentStart |
-
- CppInclude of includeArg |
- CppDefine |
- CppUndef |
- CppIf |
- CppIfdef |
- CppIfndef |
- CppElse |
- CppElif |
- CppEndif |
- CppPragma
-
-val kwPrefix = #"@"
-val cppPrefix = #"$"
-
-type fileId = int
-type fileOffset = int
-type pos = fileId * fileOffset
-type convPos = string * int * int option
-type fullToken = pos * token
-
-datatype tkErrorAuxInfo = TkiEOF | TkiDx of int | TkiStart
-
-exception TkError of tkErrorAuxInfo * string
-exception TkErrorAug of convPos * string (* main exception *)
-
-exception ExpectedCppDir (* handled in postprocess *)
-
-(* handled in tokenizer *)
-exception EndOfStream
-exception EndOfFile
-
-(* Unreachable (should be) *)
-exception Unreachable
-exception FsmTableIsTooSmall
-exception InvalidStream
-exception InvalidStreamAdvance
-exception TokenWithoutRepr
-
-(* handled in readIncludeArg *)
-exception LineWithoutNl
-
-fun pos2str (pos, line, col) =
-let
- val % = Int.toString
-in
- case col of
- SOME col => pos ^ ":" ^ %line ^ ":" ^ %col
- | NONE => pos ^ ":" ^ %line
-end
-fun eprint s = printLn $ "error: " ^ s
-
-fun otherExn e =
-let
- val hist = MLton.Exn.history e
-in
- eprint $ "exception " ^ exnMessage e ^ " was raised";
- if hist = [] then
- (printLn "No stack trace is avaliable";
- printLn "Recompile with -const \"Exn.keepHistory true\"")
- else
- List.app (fn x => printLn $ "\t" ^ x) hist
-end
-
-fun exit code = Posix.Process.exit $ Word8.fromInt code
-
-fun ioExn (IO.Io { name, function = _, cause }) =
-let
- open OS
- val prefix = name ^ ": "
- val reason =
- case cause of
- SysErr (str, _) => str
- | _ => exnMessage cause
-in
- printLn $ prefix ^ reason
-end
- | ioExn _ = (printLn "ioExn: unreachable"; exit 254)
-
-fun globalExnHandler e =
- (case e of
- FsmTableIsTooSmall =>
- eprint "fsm table is too small. Increate 'maxState' value"
- | IO.Io _ => ioExn e
- | TkErrorAug (pos, msg) => eprint $ pos2str pos ^ ": " ^ msg
- | _ => otherExn e;
- exit 255)
-
-val () = MLton.Exn.setTopLevelHandler globalExnHandler
-
-val tokenRepr =
-let
- fun & repr = str kwPrefix ^ repr
- fun % repr = str cppPrefix ^ repr
-in
- [
- (kwBreak, &"break"),
- (kwCase, &"case"),
- (kwChar, &"char"),
- (kwConst, &"const"),
- (kwContinue, &"continue"),
- (kwDefault, &"default"),
- (kwDouble, &"double"),
- (kwElse, &"else"),
- (kwEnum, &"enum"),
- (kwExtern, &"extern"),
- (kwFloat, &"float"),
- (kwFor, &"for"),
- (kwGoto, &"goto"),
- (kwInt, &"int"),
- (kwLong, &"long"),
- (kwRegister, &"register"),
- (kwReturn, &"return"),
- (kwShort, &"short"),
- (kwSigned, &"signed"),
- (kwSizeof, &"sizeof"),
- (kwStruct, &"struct"),
- (kwSwitch, &"switch"),
- (kwTypedef, &"typedef"),
- (kwUnion, &"union"),
- (kwUnsigned, &"unsigned"),
- (kwVoid, &"void"),
- (kwVolatile, &"volatile"),
-
- (LParen, "("),
- (RParen, ")"),
- (LBracket, "["),
- (RBracket, "]"),
- (LBrace, "{"),
- (RBrace, "}"),
-
- (QuestionMark, "?"),
- (Colon, ":"),
- (Coma, ","),
- (Semicolon, ";"),
-
- (Arrow, "->"),
- (Plus, "+"),
- (DoublePlus, "++"),
- (Minus, "-"),
- (DoubleMinus, "--"),
- (Ampersand, "&"),
- (Asterisk, "*"),
- (Slash, "/"),
- (Tilde, "~"),
- (ExclMark, "!"),
- (Percent, "%"),
- (DoubleLess, "<<"),
- (DoubleGreater, ">>"),
- (Less, "<"),
- (Greater, ">"),
- (EqualSign, "="),
- (LessEqualSign, "<="),
- (GreaterEqualSign, ">="),
- (DoubleEqualSign, "=="),
- (ExclMarkEqualSign, "!="),
- (Cap, "^"),
- (VerticalBar, "|"),
- (DoubleAmpersand, "&&"),
- (DoubleVerticalBar, "||"),
-
- (AsteriskEqualSign, "*="),
- (SlashEqualSign, "/="),
- (PercentEqualSign, "%="),
- (PlusEqualSign, "+="),
- (MinusEqualSign, "-="),
- (DoubleLessEqualSign, "<<="),
- (DoubleGreaterEqualSign, ">>="),
- (AmpersandEqualSign, "&="),
- (CapEqualSign, "^="),
- (VerticalBarEqualSign, "|="),
-
- (Hash, "#"),
- (DoubleHash, "##"),
-
- (Dot, "."),
- (DoubleDot, ".."),
- (TripleDot, "..."),
-
- (CommentStart, "/*"),
-
- (CppDefine, %"define"),
- (CppUndef, %"undef"),
- (CppIf, %"if"),
- (CppIfdef, %"ifdef"),
- (CppIfndef, %"ifndef"),
- (CppElse, %"else"),
- (CppElif, %"elif"),
- (CppEndif, %"endif"),
- (CppPragma, %"pragma")
-]
-end
-
-val printToken = fn
- Number s => printLn $ "Num: " ^ s
- | Id s => printLn $ "Id: " ^ s
- | CharConst (repr, _) => printLn repr
- | CppInclude arg =>
- let
- val (start, end', arg) =
- case arg of
- IARel v => ("\"", "\"", v)
- | IAFromRef v => ("<", ">", v)
- in
- printLn $ (str cppPrefix) ^ "include " ^ start ^ arg ^ end'
- end
- | StringConst s =>
- printLn $ "\"" ^ s ^ "\""
- | v =>
- case List.find (fn (x, _) => x = v) tokenRepr of
- SOME (_, repr) => printLn repr
- | NONE => raise TokenWithoutRepr
-
-type stream = {
- (* list of file ids, file names and file contents *)
- allFiles: (fileId * string * string) list,
- (* stack of file ids, file offsets and file contents *)
- stack: (fileId * fileOffset * string) list
-}
-
-fun calcFilePos s offset =
-let
- fun calc s cur offset (line, col) =
- if cur = offset then
- (line, col)
- else
- calc s (cur + 1) offset
- (if String.sub (s, cur) = #"\n" then (line + 1, 1)
- else (line, col + 1))
-in
- calc s 0 offset (1, 1)
-end
-
-fun printPos fileList (id, pos) =
-let
- val triple = List.find (fn (fid, _, _) => fid = id) fileList
-in
- case triple of
- NONE => raise InvalidStream
- | SOME (_, fname, contents) =>
- let
- val (line, col) = calcFilePos contents pos
- val line = Int.toString line
- val col = Int.toString col
- in
- print $ fname ^ ":" ^ line ^ ":" ^ col ^ ": "
- end
-end
-
-fun readFile fname =
-let
- open TextIO
- val h = openIn fname
- val s = inputAll h
- val () = closeIn h
-in
- s
-end
-
-fun getchar
- ({ stack = (id, off, contents) :: rest, allFiles }: stream)
- : (char * stream) =
- if off < String.size contents then
- (String.sub (contents, off),
- { stack = (id, off + 1, contents) :: rest, allFiles })
- else
- raise EndOfFile
- | getchar _ = raise EndOfStream
-
-fun ungetc
- ({ stack = (id, off, contents) :: rest, allFiles }) =
- if off = 0 then
- raise InvalidStream
- else
- { stack = (id, off - 1, contents) :: rest, allFiles }
- | ungetc _ = raise InvalidStream
-
-fun readline { stack = (fid, off, contents) :: rest, allFiles } =
- let
- val prevIsSlash =
- off > 0 andalso String.sub (contents, off - 1) = #"\\"
-
- open String
- fun read prevIsSlash offset acc =
- let
- val c = sub (contents, offset)
- in
- if offset = size contents then
- raise LineWithoutNl
- else if c = #"\n" then
- if prevIsSlash then
- read (c = #"\\") (offset + 1) (#" " :: tl acc)
- else
- (implode $ rev acc, offset + 1)
- else
- read (c = #"\\") (offset + 1) (c :: acc)
- end
-
- val (arg, newOffset) = read prevIsSlash off []
- in
- (arg, { stack = (fid, newOffset, contents) :: rest, allFiles })
- end
- | readline _ = raise InvalidStream
-
-fun getOffset ({ stack = (_, off, _) :: _, ... }: stream) = off
- | getOffset _ = raise InvalidStream
-
-fun getPosAfterCharRead
- ({ stack = (id, off, _) :: _, ... }: stream) = (id, off - 1)
- | getPosAfterCharRead _ = raise InvalidStream
-
-fun getPposFromPos (id, pos)
- { stack = (_, _, _) :: _, allFiles} =
-let
- val (fname, contents) =
- case List.find (fn (fid, _, _) => fid = id) allFiles of
- NONE => raise InvalidStream
- | SOME (_, fname, contents) => (fname, contents)
-
- val (line, col) = calcFilePos contents pos
-in
- (fname, line, SOME col)
-end
- | getPposFromPos _ _ = raise InvalidStream
-
-fun getPos ({ stack = (id, off, _) :: _, ... }: stream) = (id, off)
- | getPos _ = raise InvalidStream
-
-fun getSubstr startOff endOff
- ({ stack = (_, _, contents) :: _, ... }: stream) =
- String.substring (contents, startOff, endOff - startOff)
- | getSubstr _ _ _ = raise InvalidStream
-
-fun advanceToNewFile
- ({ stack = (_, off, contents) :: rest, allFiles }: stream) =
- if off = String.size contents then
- { stack = rest, allFiles }
- else
- raise InvalidStreamAdvance
- | advanceToNewFile _ = raise InvalidStreamAdvance
-
-fun streamInit fname =
-let
- val contents = readFile fname
-in
- { allFiles = [(0, fname, contents)], stack = [(0, 0, contents)] }
-end
-
-fun isSpace c = c = #" " orelse c = #"\t" orelse c = #"\n"
-fun isLetter c =
- ord c >= ord #"a" andalso ord c <= ord #"z" orelse
- ord c >= ord #"A" andalso ord c <= ord #"Z"
-fun isIdStart c = c = #"_" orelse isLetter c
-fun isDigit c = ord c >= ord #"0" andalso ord c <= ord #"9"
-fun isIdBody c = isIdStart c orelse isDigit c
-
-fun isPrintable c = Char.isPrint c andalso c <> #" "
-
-(* FSM for parsing symbols *)
-
-val maxStates = 51
-
-fun fsmInsert (nextState, buf) curState tk (c :: cs) =
-let
- open Array
-
- val (_, row) = sub (buf, curState)
- val potNextState = sub (row, ord c)
-in
- if potNextState <> ~1 then
- fsmInsert (nextState, buf) potNextState tk cs
- else (
- update (row, ord c, !nextState);
- nextState := !nextState + 1;
- fsmInsert (nextState, buf) (!nextState - 1) tk cs
- )
-end
- | fsmInsert (_, buf) curState tk [] =
-let
- open Array
- val (_, row) = sub (buf, curState)
-in
- update (buf, curState, (tk, row))
-end
-
-fun isStartForFsmGen () =
-let
- open Array
-
- val lookupTable = array (128, false)
-
- fun firstChr s = ord $ String.sub (s, 0)
- val () = List.app
- (fn (_, repr) => update (lookupTable, firstChr repr, true))
- $ List.filter
- (fn (_, repr) =>
- let
- val c = String.sub (repr, 0)
- in
- c <> kwPrefix andalso c <> cppPrefix
- end)
- tokenRepr
-
-in
- fn c => sub (lookupTable, ord c)
-end
-
-val isStartForFsm = isStartForFsmGen ()
-
-fun fsmTableCreate () =
-let
- open Array
-
- val T as (nextState, buf) =
- (ref 1, array (maxStates, (Invalid, array (128, ~1))))
- val r = ref 1
-
- fun filterNeeded [] acc = acc
- | filterNeeded ((T as (_, repr)) :: tks) acc =
- filterNeeded tks
- (if isStartForFsm $ String.sub (repr, 0) then
- T :: acc
- else
- acc)
-
- val tokenRepr = filterNeeded tokenRepr []
-
- fun fsmInsert' T curState tk repr = fsmInsert T curState tk repr handle
- Subscript => raise FsmTableIsTooSmall
-in
- while !r < length buf do (
- update (buf, !r, (Invalid, array(128, ~1)));
- r := !r + 1
- );
-
- List.app (fn (v, p) => fsmInsert' T 0 v $ explode p) tokenRepr;
- if !nextState <> maxStates then
- printLn $ "note: Fsm table size can be smaller: "
- ^ Int.toString (!nextState) ^ " is enough"
- else ();
- T
-end
-
-(* Unused right now
-fun printTable (nextState, buf) =
-let
- fun printRow i row =
- if i = length row then
- print "\n"
- else
- let
- val state = sub (row, i)
- in
- if state = ~1 then
- ()
- else
- print ((str (chr i)) ^ ": " ^ (Int.toString state) ^ ", ");
- printRow (i + 1) row
- end
-
- fun print' rowNum buf =
- if rowNum = !nextState then
- ()
- else
- let
- val (tk, row) = sub (buf, rowNum)
- in
- print ((token2string tk) ^ ": ");
- printRow 0 row;
- print' (rowNum + 1) buf
- end
-in
- print ("NextState: " ^ Int.toString (!nextState) ^ "\n");
- print' 0 buf;
- print "\n"
-end
-*)
-
-val fsmTable = fsmTableCreate ()
-
-fun fsmEat stream =
-let
- open Array
- val stream = ungetc stream
- val pos = getPos stream
-
- fun get curState stream =
- let
- val (c, stream) = (fn (c, s) => (SOME c, s)) $ getchar stream handle
- _ => (NONE, stream)
- in
- case c of
- NONE => (#1 $ sub (#2 fsmTable, curState), stream)
- | SOME c =>
- let
- val (tk, row) = sub (#2 fsmTable, curState)
- val nextState = sub (row, ord c)
- in
- if nextState = ~1 then
- (tk, ungetc stream)
- else
- get nextState stream
- end
- end
-in
- (fn (tk, stream) => ((pos, tk), stream)) $ get 0 stream
-end
-
-fun isFirstOnLine (tk: fullToken) (stream: stream) =
-let
- val (id, offset) = #1 tk
-in
- case List.find (fn (fid, _, _) => id = fid) (#allFiles stream) of
- NONE => raise InvalidStream
- | SOME (_, _, contents) =>
- let
- fun returnToNL ~1 = true
- | returnToNL offset =
- let
- val chr = String.sub (contents, offset)
- in
- if chr = #"\n" then
- true
- else if isSpace chr then
- returnToNL (offset - 1)
- else
- false
- end
- in
- returnToNL (offset - 1)
- end
-end
-
-fun tkError2aug stream (dx, msg) =
-let
- val (id, pos) = getPosAfterCharRead stream
- val pos = getPposFromPos (id, pos + dx) stream
-in
- TkErrorAug (pos, msg)
-end
-
-fun parseGeneric stream parser acc =
-let
- val stream = ungetc stream
- val P as (_, startOff) = getPos stream
-
- fun parse' stream acc = let
- val (c, stream) = (fn (c, s) => (SOME c, s)) $ getchar stream handle
- _ => (NONE, stream)
-
- val (acc, tk, stream) = parser acc (stream, startOff) c handle
- TkError (TkiDx dx, msg) => raise tkError2aug stream (dx, msg)
- | TkError (TkiStart, msg) =>
- let
- val startPos = getPposFromPos P stream
- in
- raise TkErrorAug (startPos, msg)
- end
- | TkError (TkiEOF, msg) =>
- let
- val (file, line, _) = getPposFromPos P stream
- in
- raise TkErrorAug ((file, line, NONE), msg)
- end
- in
- case tk of
- NONE => parse' stream acc
- | _ => (valOf tk, stream)
- end
-
- val (tk, stream) = parse' stream acc
-in
- ((P, tk): fullToken, stream)
-end
-
-fun finishSeqRead startOff stream =
-let
- val (_, endOff) = getPos stream
- val s = getSubstr startOff endOff stream
-in
- s
-end
-
-fun findKeyword s =
- case List.find
- (fn (_, repr) =>
- String.sub (repr, 0) = kwPrefix andalso
- String.extract (repr, 1, NONE) = s)
- tokenRepr
- of
- SOME (tk, _) => tk
- | NONE => Id s
-
-fun idParser () (stream, startOff) c =
-let
- fun finalize stream =
- let
- val s = finishSeqRead startOff stream
- val tk = findKeyword s
- in
- ((), SOME tk, stream)
- end
-in
- case c of
- NONE => finalize stream
- | SOME c =>
- if isIdBody c then
- ((), NONE, stream)
- else
- finalize (ungetc stream)
-end
-
-fun formCppDir (Id s) =
-let
- open String
-in
- case List.find
- (fn (_, repr) =>
- sub (repr, 0) = cppPrefix andalso
- extract (repr, 1, NONE) = s)
- tokenRepr
- of
- SOME (tk, _) => tk
- | NONE => raise ExpectedCppDir
-end
- | formCppDir kwElse = CppElse
- | formCppDir _ = raise ExpectedCppDir
-
-fun numParser () (stream, startOff) c =
-let
- fun finalize stream =
- ((), SOME $ Number (finishSeqRead startOff stream), stream)
-in
- case c of
- NONE => finalize stream
- | SOME c =>
- if isDigit c then
- ((), NONE, stream)
- else
- finalize (ungetc stream)
-end
-
-fun eatEscSeq stream =
-let
- val (c, stream) = getchar stream handle
- _ => raise TkError (TkiDx 0, "unfinished escape sequence")
-in
- (case c of
- #"\\" => #"\\"
- | #"t" => #"\t"
- | #"n" => #"\n"
- | c => c,
- stream)
-end
-
-fun stringCut s = String.extract (s, 1, SOME $ String.size s - 2)
-
-datatype SeqParseState = SeqInit | SeqStart | SeqValue of int | SeqTerm
-
-datatype seqParseMode = SpmChr | SpmStr
-
-fun seqBound SpmChr = #"'"
- | seqBound SpmStr = #"\""
-
-fun seqExnConv mode (TkError (v, msg)) =
-let
- val bound = if mode = SpmChr then "'" else "\""
- val msg =
- String.translate (fn c => if c = #"%" then bound else str c) msg
-in
- TkError (v, msg)
-end
- | seqExnConv _ _ = raise Unreachable
-
-fun unfinishedSeq SpmChr = "unfinished character constant"
- | unfinishedSeq SpmStr = "unfinished string literal"
-
-fun seqParser mode SeqInit (stream, _) (SOME c) =
- if seqBound mode = c then
- (SeqStart, NONE, stream)
- else
- raise Unreachable
- | seqParser mode SeqStart (stream, _) (SOME c) =
- if c <> seqBound mode then
- let
- val (c, stream) =
- if c <> #"\\" then (c, stream) else eatEscSeq stream
- in
- (SeqValue (ord c), NONE, stream)
- end
- else if mode = SpmStr then
- (SeqTerm, SOME $ StringConst "", stream)
- else
- raise seqExnConv SpmChr $ TkError (TkiDx 0, "expected value after %")
- | seqParser mode (SeqValue v) (stream, startOff) (SOME c) =
- if seqBound mode = c then
- let
- fun term s v =
- if mode = SpmChr then
- CharConst (s, v)
- else
- StringConst $ stringCut s
- in
- (SeqTerm, SOME $ term (finishSeqRead startOff stream) v, stream)
- end
- else if mode = SpmStr then
- let
- val (_, stream) =
- if c <> #"\\" then (c, stream) else eatEscSeq stream
- in
- (SeqValue v, NONE, stream)
- end
- else
- raise seqExnConv SpmChr $ TkError (TkiDx 0, "expected % after value")
- | seqParser _ SeqTerm _ (SOME _) =
- raise Unreachable
- | seqParser mode state (_, _) NONE =
- raise case state of
- SeqInit => Unreachable
- | SeqStart => seqExnConv mode $ TkError (TkiStart, unfinishedSeq mode)
- | SeqValue _ =>
- seqExnConv mode $ TkError (TkiStart, unfinishedSeq mode)
- | SeqTerm => Unreachable
-
-val charParser = seqParser SpmChr
-val strParser = seqParser SpmStr
-
-fun readIncludeArg stream =
-let
- open String
-
- fun triml s idx =
- if idx = size s then
- ""
- else if isSpace $ sub (s, idx) then
- triml s (idx + 1)
- else
- extract (s, idx, NONE)
-
- fun trimr s idx =
- if idx = 0 then
- ""
- else if isSpace $ sub (s, idx) then
- trimr s (idx - 1)
- else
- extract (s, 0, SOME $ idx + 1)
-
- fun trim s = triml (trimr s (size s - 1)) 0
-
- fun getLinePos () =
- let
- val (fname, line, _) = getPposFromPos (getPos stream) stream
- in
- (fname, line, NONE)
- end
-
- fun determineType s =
- let
- fun --> msg = raise TkErrorAug (getLinePos (), msg)
- fun isLast c = sub (s, size s - 1) = c
- in
- if s = "" then
- --> "#include argument is empty"
- else
- case sub (s, 0) of
- #"<" =>
- if isLast #">" then
- IAFromRef $ stringCut s
- else
- --> "expected > at #include argument end"
- | #"\"" =>
- if isLast #"\"" then
- IARel $ stringCut s
- else
- --> "expected \" at #include argument end"
- | _ => --> "#include argument should start with \" or <"
- end
-
- val (arg, stream) = readline stream handle
- LineWithoutNl =>
- raise TkErrorAug (getLinePos (),
- "#include line does not end with \\n")
-in
- (determineType $ trim arg, stream)
-end
-
-fun postprocessCppDir tk tkl stream =
-let
- val isCppDir =
- (fn Hash => true | _ => false) (#2 $ hd tkl)
- andalso isFirstOnLine (hd tkl) stream
- handle Empty => false
- val (pos, tk') = tk
-
- fun conv tk = ((#1 $ hd tkl, tk) :: tl tkl)
-in
- if isCppDir andalso tk' = Id "include" then
- let
- val (arg, stream) = readIncludeArg stream
- in
- (conv $ CppInclude arg, stream)
- end
- else if isCppDir then
- (conv $ formCppDir tk', stream) handle
- ExpectedCppDir =>
- raise TkErrorAug (getPposFromPos pos stream,
- "expected preprocessor directive")
-
- else
- (tk :: tkl, stream)
-end
-
-fun unexpectedCharRaise stream c =
-let
- val (id, pos) = getPosAfterCharRead stream
- val pos = getPposFromPos (id, pos) stream
- val repr =
- if isPrintable c then
- str c
- else
- "<" ^ Int.toString (ord c) ^ ">"
-in
- raise TkErrorAug (pos, "unexpected character " ^ repr)
-end
-
-fun skipComment stream pos =
-let
- fun skip prevIsAsterisk stream =
- let
- val (c, stream) = getchar stream
- in
- if prevIsAsterisk andalso c = #"/" then
- stream
- else
- skip (c = #"*") stream
- end
-in
- skip false stream handle
- EndOfFile =>
- let
- val pos = getPposFromPos pos stream
- in
- raise TkErrorAug (pos, "unfinished comment")
- end
-end
-
-fun handleBackslash stream =
-let
- val (c, stream) = (fn (c, s) => (SOME c, s)) $ getchar stream handle
- _ => (NONE, stream)
-
- val raiseErr = fn () =>
- let
- val pos = getPosAfterCharRead stream
- val pos = getPposFromPos pos stream
- in
- raise TkErrorAug (pos, "expected \\n after backslash")
- end
-in
- case c of
- SOME c =>
- if c = #"\n" then
- stream
- else
- raiseErr ()
- | NONE => raiseErr ()
-end
-
-fun processSymbol stream tkl =
-let
- val (T as (p as (fid, off), tk), stream) = fsmEat stream
-in
- case tk of
- CommentStart => tokenize (skipComment stream p) tkl
- | DoubleDot => tokenize stream (((fid, off + 1), Dot) :: (p, Dot) :: tkl)
- | _ => tokenize stream (T :: tkl)
-end
-
-and tokenize stream tkl =
-let
- fun getcharSkipEof stream = getchar stream handle
- EndOfFile => getcharSkipEof (advanceToNewFile stream)
-
- val (c, stream) = (fn (c, s) => (SOME c, s)) $ getcharSkipEof stream
- handle
- EndOfStream => (NONE, stream)
-
- fun cont (tk, stream) = tokenize stream (tk :: tkl)
- fun @-> parser acc = cont $ parseGeneric stream parser acc
-in
- case c of
- NONE => (rev tkl, #allFiles stream)
- | SOME c =>
- if isSpace c then
- tokenize stream tkl
- else if isIdStart c then
- let
- val (tk, stream) = parseGeneric stream idParser ()
- val (tkl, stream) = postprocessCppDir tk tkl stream
- in
- tokenize stream tkl
- end
- else if isDigit c then
- @-> numParser ()
- else if c = #"'" then
- @-> charParser SeqInit
- else if c = #"\"" then
- @-> strParser SeqInit
- else if isStartForFsm c then
- processSymbol stream tkl
- else if c = #"\\" then
- tokenize (handleBackslash stream) tkl
- else if c = #"\f" then
- tokenize stream tkl
- else
- unexpectedCharRaise stream c
-end
-
fun main [fname] =
let
- val stream = streamInit fname
- val (tkl, fileList) = tokenize stream []
+ val stream = Stream.streamInit fname
+ val (tkl, fileList) = Tokenizer.tokenize stream []
in
- List.app (fn (p, x) => (printPos fileList p; printToken x)) tkl
+ List.app
+ (fn (p, x) => (Stream.printPos fileList p; Tokenizer.printToken x)) tkl
end
| main _ = printLn "Expected a signle argument: file name"