diff options
Diffstat (limited to 'tokenizer.sml')
-rw-r--r-- | tokenizer.sml | 133 |
1 files changed, 27 insertions, 106 deletions
diff --git a/tokenizer.sml b/tokenizer.sml index 53bb396..b40e03f 100644 --- a/tokenizer.sml +++ b/tokenizer.sml @@ -1,6 +1,4 @@ structure Tokenizer:> TOKENIZER = struct - datatype includeArg = IARel of string | IAFromRef of string - datatype token = Invalid | Number of string | @@ -93,7 +91,7 @@ structure Tokenizer:> TOKENIZER = struct CommentStart | - CppInclude of includeArg | + CppInclude | CppDefine | CppUndef | CppIf | @@ -112,14 +110,13 @@ structure Tokenizer:> TOKENIZER = struct datatype tkErrorAuxInfo = TkiEOF | TkiDx of int | TkiStart exception TkError of tkErrorAuxInfo * string - exception TkErrorAug of Stream.convPos * string + exception TkErrorAug of Stream.ppos * string exception ExpectedCppDir (* handled in postprocess *) exception FsmTableIsTooSmall (* Unreachable (should be) *) - exception Unreachable exception TokenWithoutRepr val tokenRepr = @@ -213,6 +210,7 @@ structure Tokenizer:> TOKENIZER = struct (CommentStart, "/*"), + (CppInclude, %"include"), (CppDefine, %"define"), (CppUndef, %"undef"), (CppIf, %"if"), @@ -229,15 +227,6 @@ structure Tokenizer:> TOKENIZER = struct Number s => printLn $ "Num: " ^ s | Id s => printLn $ "Id: " ^ s | CharConst (repr, _) => printLn repr - | CppInclude arg => - let - val (start, end', arg) = - case arg of - IARel v => ("\"", "\"", v) - | IAFromRef v => ("<", ">", v) - in - printLn $ (str cppPrefix) ^ "include " ^ start ^ arg ^ end' - end | StringConst s => printLn $ "\"" ^ s ^ "\"" | v => @@ -380,9 +369,7 @@ structure Tokenizer:> TOKENIZER = struct fun get curState stream = let - val (c, stream) = (fn (c, s) => (SOME c, s)) $ Stream.getchar stream - handle - _ => (NONE, stream) + val (c, stream) = Stream.getchar stream in case c of NONE => (#1 $ sub (#2 $ fsmTable (), curState), stream) @@ -415,8 +402,7 @@ structure Tokenizer:> TOKENIZER = struct val P as (_, startOff) = Stream.getPos stream fun parse' stream acc = let - val (c, stream) = (fn (c, s) => (SOME c, s)) $ Stream.getchar stream handle - _ => (NONE, stream) + val (c, stream) = Stream.getchar stream val (acc, tk, stream) = parser acc (stream, startOff) c handle TkError (TkiDx dx, msg) => raise tkError2aug stream (dx, msg) @@ -428,9 +414,10 @@ structure Tokenizer:> TOKENIZER = struct end | TkError (TkiEOF, msg) => let - val (file, line, _) = Stream.getPposFromPos P stream + open Stream + val pos = pposWithoutCol $ getPposFromPos P stream in - raise TkErrorAug ((file, line, NONE), msg) + raise TkErrorAug (pos, msg) end in case tk of @@ -512,8 +499,11 @@ structure Tokenizer:> TOKENIZER = struct fun eatEscSeq stream = let - val (c, stream) = Stream.getchar stream handle - _ => raise TkError (TkiDx 0, "unfinished escape sequence") + val (c, stream) = Stream.getchar stream + val c = + case c of + NONE => raise TkError (TkiDx 0, "unfinished escape sequence") + | SOME c => c in (case c of #"\\" => #"\\" @@ -595,65 +585,6 @@ structure Tokenizer:> TOKENIZER = struct val charParser = seqParser SpmChr val strParser = seqParser SpmStr - fun readIncludeArg stream = - let - open String - - fun triml s idx = - if idx = size s then - "" - else if Char.isSpace $ sub (s, idx) then - triml s (idx + 1) - else - extract (s, idx, NONE) - - fun trimr s idx = - if idx = 0 then - "" - else if Char.isSpace $ sub (s, idx) then - trimr s (idx - 1) - else - extract (s, 0, SOME $ idx + 1) - - fun trim s = triml (trimr s (size s - 1)) 0 - - fun getLinePos () = - let - val (fname, line, _) = Stream.getPposFromPos (Stream.getPos stream) stream - in - (fname, line, NONE) - end - - fun determineType s = - let - fun --> msg = raise TkErrorAug (getLinePos (), msg) - fun isLast c = sub (s, size s - 1) = c - in - if s = "" then - --> "#include argument is empty" - else - case sub (s, 0) of - #"<" => - if isLast #">" then - IAFromRef $ stringCut s - else - --> "expected > at #include argument end" - | #"\"" => - if isLast #"\"" then - IARel $ stringCut s - else - --> "expected \" at #include argument end" - | _ => --> "#include argument should start with \" or <" - end - - val (arg, stream) = Stream.readline stream handle - Stream.LineWithoutNl => - raise TkErrorAug (getLinePos (), - "#include line does not end with \\n") - in - (determineType $ trim arg, stream) - end - fun postprocessCppDir tk tkl stream = let val isCppDir = @@ -664,13 +595,7 @@ structure Tokenizer:> TOKENIZER = struct fun conv tk = ((#1 $ hd tkl, tk) :: tl tkl) in - if isCppDir andalso tk' = Id "include" then - let - val (arg, stream) = readIncludeArg stream - in - (conv $ CppInclude arg, stream) - end - else if isCppDir then + if isCppDir then (conv $ formCppDir tk', stream) handle ExpectedCppDir => raise TkErrorAug (Stream.getPposFromPos pos stream, @@ -697,7 +622,15 @@ structure Tokenizer:> TOKENIZER = struct let fun skip prevIsAsterisk stream = let - val (c, stream) = Stream.getchar stream + val (c, stream) = + case Stream.getchar stream of + (NONE, _) => + let + val pos = Stream.getPposFromPos pos stream + in + raise TkErrorAug (pos, "unfinished comment") + end + | (SOME c, stream) => (c, stream) in if prevIsAsterisk andalso c = #"/" then stream @@ -705,19 +638,12 @@ structure Tokenizer:> TOKENIZER = struct skip (c = #"*") stream end in - skip false stream handle - Stream.EndOfFile => - let - val pos = Stream.getPposFromPos pos stream - in - raise TkErrorAug (pos, "unfinished comment") - end + skip false stream end fun handleBackslash stream = let - val (c, stream) = (fn (c, s) => (SOME c, s)) $ Stream.getchar stream handle - _ => (NONE, stream) + val (c, stream) = Stream.getchar stream val raiseErr = fn () => let @@ -748,18 +674,13 @@ structure Tokenizer:> TOKENIZER = struct and tokenize stream tkl = let - fun getcharSkipEof stream = Stream.getchar stream handle - Stream.EndOfFile => getcharSkipEof (Stream.advanceToNewFile stream) - - val (c, stream) = (fn (c, s) => (SOME c, s)) $ getcharSkipEof stream - handle - Stream.EndOfStream => (NONE, stream) + val (c, stream) = Stream.getchar stream fun cont (tk, stream) = tokenize stream (tk :: tkl) fun @-> parser acc = cont $ parseGeneric stream parser acc in case c of - NONE => (rev tkl, Stream.extractFilesInfo stream) + NONE => (rev tkl, Stream.recycle stream) | SOME c => if Char.isSpace c then tokenize stream tkl |