summaryrefslogtreecommitdiff
path: root/tokenizer.sml
diff options
context:
space:
mode:
Diffstat (limited to 'tokenizer.sml')
-rw-r--r--tokenizer.sml133
1 files changed, 27 insertions, 106 deletions
diff --git a/tokenizer.sml b/tokenizer.sml
index 53bb396..b40e03f 100644
--- a/tokenizer.sml
+++ b/tokenizer.sml
@@ -1,6 +1,4 @@
structure Tokenizer:> TOKENIZER = struct
- datatype includeArg = IARel of string | IAFromRef of string
-
datatype token =
Invalid |
Number of string |
@@ -93,7 +91,7 @@ structure Tokenizer:> TOKENIZER = struct
CommentStart |
- CppInclude of includeArg |
+ CppInclude |
CppDefine |
CppUndef |
CppIf |
@@ -112,14 +110,13 @@ structure Tokenizer:> TOKENIZER = struct
datatype tkErrorAuxInfo = TkiEOF | TkiDx of int | TkiStart
exception TkError of tkErrorAuxInfo * string
- exception TkErrorAug of Stream.convPos * string
+ exception TkErrorAug of Stream.ppos * string
exception ExpectedCppDir (* handled in postprocess *)
exception FsmTableIsTooSmall
(* Unreachable (should be) *)
- exception Unreachable
exception TokenWithoutRepr
val tokenRepr =
@@ -213,6 +210,7 @@ structure Tokenizer:> TOKENIZER = struct
(CommentStart, "/*"),
+ (CppInclude, %"include"),
(CppDefine, %"define"),
(CppUndef, %"undef"),
(CppIf, %"if"),
@@ -229,15 +227,6 @@ structure Tokenizer:> TOKENIZER = struct
Number s => printLn $ "Num: " ^ s
| Id s => printLn $ "Id: " ^ s
| CharConst (repr, _) => printLn repr
- | CppInclude arg =>
- let
- val (start, end', arg) =
- case arg of
- IARel v => ("\"", "\"", v)
- | IAFromRef v => ("<", ">", v)
- in
- printLn $ (str cppPrefix) ^ "include " ^ start ^ arg ^ end'
- end
| StringConst s =>
printLn $ "\"" ^ s ^ "\""
| v =>
@@ -380,9 +369,7 @@ structure Tokenizer:> TOKENIZER = struct
fun get curState stream =
let
- val (c, stream) = (fn (c, s) => (SOME c, s)) $ Stream.getchar stream
- handle
- _ => (NONE, stream)
+ val (c, stream) = Stream.getchar stream
in
case c of
NONE => (#1 $ sub (#2 $ fsmTable (), curState), stream)
@@ -415,8 +402,7 @@ structure Tokenizer:> TOKENIZER = struct
val P as (_, startOff) = Stream.getPos stream
fun parse' stream acc = let
- val (c, stream) = (fn (c, s) => (SOME c, s)) $ Stream.getchar stream handle
- _ => (NONE, stream)
+ val (c, stream) = Stream.getchar stream
val (acc, tk, stream) = parser acc (stream, startOff) c handle
TkError (TkiDx dx, msg) => raise tkError2aug stream (dx, msg)
@@ -428,9 +414,10 @@ structure Tokenizer:> TOKENIZER = struct
end
| TkError (TkiEOF, msg) =>
let
- val (file, line, _) = Stream.getPposFromPos P stream
+ open Stream
+ val pos = pposWithoutCol $ getPposFromPos P stream
in
- raise TkErrorAug ((file, line, NONE), msg)
+ raise TkErrorAug (pos, msg)
end
in
case tk of
@@ -512,8 +499,11 @@ structure Tokenizer:> TOKENIZER = struct
fun eatEscSeq stream =
let
- val (c, stream) = Stream.getchar stream handle
- _ => raise TkError (TkiDx 0, "unfinished escape sequence")
+ val (c, stream) = Stream.getchar stream
+ val c =
+ case c of
+ NONE => raise TkError (TkiDx 0, "unfinished escape sequence")
+ | SOME c => c
in
(case c of
#"\\" => #"\\"
@@ -595,65 +585,6 @@ structure Tokenizer:> TOKENIZER = struct
val charParser = seqParser SpmChr
val strParser = seqParser SpmStr
- fun readIncludeArg stream =
- let
- open String
-
- fun triml s idx =
- if idx = size s then
- ""
- else if Char.isSpace $ sub (s, idx) then
- triml s (idx + 1)
- else
- extract (s, idx, NONE)
-
- fun trimr s idx =
- if idx = 0 then
- ""
- else if Char.isSpace $ sub (s, idx) then
- trimr s (idx - 1)
- else
- extract (s, 0, SOME $ idx + 1)
-
- fun trim s = triml (trimr s (size s - 1)) 0
-
- fun getLinePos () =
- let
- val (fname, line, _) = Stream.getPposFromPos (Stream.getPos stream) stream
- in
- (fname, line, NONE)
- end
-
- fun determineType s =
- let
- fun --> msg = raise TkErrorAug (getLinePos (), msg)
- fun isLast c = sub (s, size s - 1) = c
- in
- if s = "" then
- --> "#include argument is empty"
- else
- case sub (s, 0) of
- #"<" =>
- if isLast #">" then
- IAFromRef $ stringCut s
- else
- --> "expected > at #include argument end"
- | #"\"" =>
- if isLast #"\"" then
- IARel $ stringCut s
- else
- --> "expected \" at #include argument end"
- | _ => --> "#include argument should start with \" or <"
- end
-
- val (arg, stream) = Stream.readline stream handle
- Stream.LineWithoutNl =>
- raise TkErrorAug (getLinePos (),
- "#include line does not end with \\n")
- in
- (determineType $ trim arg, stream)
- end
-
fun postprocessCppDir tk tkl stream =
let
val isCppDir =
@@ -664,13 +595,7 @@ structure Tokenizer:> TOKENIZER = struct
fun conv tk = ((#1 $ hd tkl, tk) :: tl tkl)
in
- if isCppDir andalso tk' = Id "include" then
- let
- val (arg, stream) = readIncludeArg stream
- in
- (conv $ CppInclude arg, stream)
- end
- else if isCppDir then
+ if isCppDir then
(conv $ formCppDir tk', stream) handle
ExpectedCppDir =>
raise TkErrorAug (Stream.getPposFromPos pos stream,
@@ -697,7 +622,15 @@ structure Tokenizer:> TOKENIZER = struct
let
fun skip prevIsAsterisk stream =
let
- val (c, stream) = Stream.getchar stream
+ val (c, stream) =
+ case Stream.getchar stream of
+ (NONE, _) =>
+ let
+ val pos = Stream.getPposFromPos pos stream
+ in
+ raise TkErrorAug (pos, "unfinished comment")
+ end
+ | (SOME c, stream) => (c, stream)
in
if prevIsAsterisk andalso c = #"/" then
stream
@@ -705,19 +638,12 @@ structure Tokenizer:> TOKENIZER = struct
skip (c = #"*") stream
end
in
- skip false stream handle
- Stream.EndOfFile =>
- let
- val pos = Stream.getPposFromPos pos stream
- in
- raise TkErrorAug (pos, "unfinished comment")
- end
+ skip false stream
end
fun handleBackslash stream =
let
- val (c, stream) = (fn (c, s) => (SOME c, s)) $ Stream.getchar stream handle
- _ => (NONE, stream)
+ val (c, stream) = Stream.getchar stream
val raiseErr = fn () =>
let
@@ -748,18 +674,13 @@ structure Tokenizer:> TOKENIZER = struct
and tokenize stream tkl =
let
- fun getcharSkipEof stream = Stream.getchar stream handle
- Stream.EndOfFile => getcharSkipEof (Stream.advanceToNewFile stream)
-
- val (c, stream) = (fn (c, s) => (SOME c, s)) $ getcharSkipEof stream
- handle
- Stream.EndOfStream => (NONE, stream)
+ val (c, stream) = Stream.getchar stream
fun cont (tk, stream) = tokenize stream (tk :: tkl)
fun @-> parser acc = cont $ parseGeneric stream parser acc
in
case c of
- NONE => (rev tkl, Stream.extractFilesInfo stream)
+ NONE => (rev tkl, Stream.recycle stream)
| SOME c =>
if Char.isSpace c then
tokenize stream tkl