summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVladimir Azarov <avm@intermediate-node.net>2025-05-26 21:06:51 +0200
committerVladimir Azarov <avm@intermediate-node.net>2025-05-26 21:06:51 +0200
commit3a32398248e5593b1b536c837478cab276f7aebf (patch)
tree55ff4004a06278c84908668c26d39d0fc3098a08
parent6f3fa80b37ca5f8d992f5d6f66aee77ead303bf4 (diff)
Simpler tokenizer
-rw-r--r--parser.fun6
-rw-r--r--stream.sig3
-rw-r--r--stream.sml8
-rw-r--r--tokenizer.fun592
-rw-r--r--tokenizer.sig14
5 files changed, 214 insertions, 409 deletions
diff --git a/parser.fun b/parser.fun
index b571a40..15134a8 100644
--- a/parser.fun
+++ b/parser.fun
@@ -92,7 +92,7 @@ functor Parser(P: PPC): PARSER = struct
datatype expr =
Enum |
Eid of int |
- Estrlit of string |
+ Estrlit of int |
EmemberByV of int * exprAug |
EmemberByP of int * exprAug |
EfuncCall of exprAug * exprAug list |
@@ -254,7 +254,7 @@ functor Parser(P: PPC): PARSER = struct
case e of
Eid id => Printf out P.?id P %
| Enum => Printf out `"num" P %
- | Estrlit s => Printf out `"\"" `s `"\"" P %
+ | Estrlit s => Printf out P.?s P %
| EmemberByV pair => member pair "."
| EmemberByP pair => member pair "->"
| EfuncCall (func, args) => (
@@ -399,7 +399,7 @@ functor Parser(P: PPC): PARSER = struct
in
case tk of
Tk (T.Id id) => wrap $ Eid id
- | Tk (T.StringConst s) => wrap $ Estrlit s
+ | Tk (T.Strlit id) => wrap $ Estrlit id
| Tk (T.CharConst _) => raise Unimplemented
| Tk (T.Num _) => wrap Enum
| TkParens list =>
diff --git a/stream.sig b/stream.sig
index 4426e62..22d7a31 100644
--- a/stream.sig
+++ b/stream.sig
@@ -10,7 +10,7 @@ signature STREAM = sig
val Ppos: (pos, 'a, 'b, 'c) a1printer
- val getchar: t -> char option * t
+ val getchar: t -> char * t
(* Will throw UngetcError, if applied at the beginning of the stream.
* Can be always avoided, so is not provided in sig file *)
@@ -30,5 +30,6 @@ signature STREAM = sig
val isFirstOnLine: t -> fileOffset -> bool
val getPosRaw: fileOffset -> t -> pos * t
val getPos: t -> pos * t
+ val getPosDisc: t -> pos
val getPosAfterChar: t -> pos * t
end
diff --git a/stream.sml b/stream.sml
index 3e18f2e..4d58911 100644
--- a/stream.sml
+++ b/stream.sml
@@ -37,8 +37,8 @@ structure Stream :> STREAM = struct
end z
fun getchar (S as { contents, off, ... }: t) =
- (SOME $ String.sub (contents, off), updateStream S s#off (off + 1) %)
- handle Subscript => (NONE, S)
+ (String.sub (contents, off), updateStream S s#off (off + 1) %)
+ handle Subscript => (chr 0, S)
fun ungetc ({ off = 0, ... }: t) =
raise UngetcError
@@ -100,7 +100,7 @@ structure Stream :> STREAM = struct
(line, col)
else
calcPos (curOff + 1)
- (if String.sub (contents,curOff) = #"\n" then (line + 1, 1)
+ (if String.sub (contents, curOff) = #"\n" then (line + 1, 1)
else (line, col + 1))
val (line, col) = calcPos prevOff (line, col)
in
@@ -111,6 +111,8 @@ structure Stream :> STREAM = struct
fun getPos (S as { off, ... }: t) =
getPosRaw off S
+ fun getPosDisc s = #1 $ getPos s
+
fun EOFpos (S as { contents, ... }: t) =
getPosRaw (String.size contents) S
diff --git a/tokenizer.fun b/tokenizer.fun
index 93a7099..d6695d0 100644
--- a/tokenizer.fun
+++ b/tokenizer.fun
@@ -5,25 +5,17 @@ struct
structure ST = ST
structure S = S
- datatype intType = ItDec | ItOct | ItHex
- datatype intSfx = IsNone | IsU | IsL | IsUL | IsLL | IsULL
- datatype floatSfx = FsNone | FsF | FsL
-
- datatype numConst =
- IntConst of intType * string * intSfx |
- FloatConst of string * floatSfx
-
datatype token =
Invalid |
EOS |
NewLine |
MacroEnd of int |
- Num of numConst |
+ Num of string |
Id of int |
- CharConst of string * int |
- StringConst of string |
+ CharConst of int * int |
+ Strlit of int |
kwBreak |
kwCase |
@@ -127,15 +119,11 @@ struct
PpcError |
PpcPragma
- datatype tkErrorAuxInfo = TkiEOF | TkiDx of int | TkiStart
-
- exception TkError of tkErrorAuxInfo * string
+ exception TkError of int * string
fun error pos msg = (printf `"\n" S.Ppos pos `": " `msg `"\n" %; exit 1)
- (* Unreachable (should be) *)
exception TokenWithoutRepr
- exception SuffixWithoutRepr
val kwPrefix = #"`"
val ppcPrefix = #"$"
@@ -255,21 +243,6 @@ struct
]
end
- val intSuffixRepr = [
- (IsNone, ""),
- (IsU, "u"),
- (IsL, "l"),
- (IsUL, "ul"),
- (IsLL, "ll"),
- (IsULL, "ull")
- ]
-
- val floatSuffixRepr = [
- (FsNone, ""),
- (FsF, "f"),
- (FsL, "l")
- ]
-
fun initSymtab () =
let
val symtab = ST.init ()
@@ -292,14 +265,6 @@ struct
symtab
end
- fun getSfxRepr sfx buf onError =
- case List.find (fn (sfx', _) => sfx' = sfx) buf of
- NONE => onError ()
- | SOME (_, repr) => repr
-
- fun getSfxReprSimple sfx buf =
- getSfxRepr sfx buf (fn () => raise SuffixWithoutRepr)
-
fun printToken (out, symtab, tk) =
let
val ? = fn z =>
@@ -315,20 +280,9 @@ struct
| NewLine => Printf out `"\\n" %
| PpcInclude (dir, arg) =>
Printf out `"#include(" `dir `", " `arg `")" %
- | Num (IntConst (it, str, sfx)) =>
- let
- val intType =
- case it of
- ItDec => ""
- | ItOct => "0"
- | ItHex => "0x"
- in
- Printf out `intType `str `(getSfxReprSimple sfx intSuffixRepr) %
- end
- | Num (FloatConst (str, sfx)) =>
- Printf out `str `(getSfxReprSimple sfx floatSuffixRepr) %
- | CharConst (repr, _) => Printf out `repr %
- | StringConst s => Printf out `"\"" `s `"\"" %
+ | Num s => Printf out `s %
+ | CharConst (repr, _) => Printf out ?repr %
+ | Strlit id => Printf out ?id %
| v =>
case List.find (fn (x, _) => x = v) tokenRepr of
SOME (_, repr) =>
@@ -344,8 +298,7 @@ struct
val Ptk = fn z => bind A2 printToken z
- fun isIdStart c = Char.isAlpha c orelse c = #"_"
- fun isIdBody c = Char.isAlphaNum c orelse c = #"_"
+ fun isNondigit c = Char.isAlpha c orelse c = #"_"
fun isOctal c = ord c >= ord #"0" andalso ord c < ord #"8"
val isDigit = Char.isDigit
@@ -486,221 +439,25 @@ struct
let
val (c, stream) = S.getchar stream
in
- case c of
- NONE => (#1 $ sub (#2 $ fsmTable (), curState), stream)
- | SOME c =>
- let
- val (tk, row) = sub (#2 $ fsmTable (), curState)
- val nextState = sub (row, ord c)
- in
- if nextState = ~1 then
- (tk, S.ungetc stream)
- else
- get nextState stream
- end
+ if c = #"\000" then
+ (#1 $ sub (#2 $ fsmTable (), curState), stream)
+ else
+ let
+ val (tk, row) = sub (#2 $ fsmTable (), curState)
+ val nextState = sub (row, ord c)
+ in
+ if nextState = ~1 then
+ (tk, S.ungetc stream)
+ else
+ get nextState stream
+ end
end
val (tk, stream) = get 0 stream
in
(tk, pos, stream)
end
- fun errorDx stream dx msg =
- let
- val off = S.getOffset stream - 1 + dx
- val (pos, _) = S.getPosRaw off stream
- in
- error pos msg
- end
-
- fun parserWrapper stream parser acc =
- let
- val stream = S.ungetc stream
-
- val startOff = S.getOffset stream
- val (pos, stream) = S.getPos stream
-
- fun parse' stream acc =
- let
- val (c, stream) = S.getchar stream
-
- val (acc, tk, stream) = parser acc (stream, startOff) c handle
- TkError (TkiDx dx, msg) => errorDx stream dx msg
- | TkError (TkiStart, msg) => error pos msg
- | TkError (TkiEOF, msg) =>
- let
- val (pos, _) = S.EOFpos stream
- in
- error pos msg
- end
- in
- case tk of
- NONE => parse' stream acc
- | _ => (valOf tk, stream)
- end
-
- val (tk, stream) = parse' stream acc
- in
- (tk, pos, stream)
- end
-
- fun finishSeqRead startOff stream =
- S.getSubstr startOff (S.getOffset stream) stream
-
- fun idParser symtab () (stream, startOff) c =
- let
- fun finalize stream =
- let
- val s = finishSeqRead startOff stream
- val id = ST.getId symtab s
- in
- ((), SOME $ Id id, stream)
- end
- in
- case c of
- NONE => finalize stream
- | SOME c =>
- if isIdBody c then
- ((), NONE, stream)
- else
- finalize (S.ungetc stream)
- end
-
- datatype intMode = ImDec | ImOct | ImInvalidOct | ImHex
- datatype floatMode = FmDot | FmExp
-
- datatype npState = NpInit | IntMode of intMode | FloatMode of floatMode
-
- fun getLongestSeq acc pred stream =
- let
- val (c, stream') = S.getchar stream
- in
- if isSome c andalso pred $ valOf c then
- getLongestSeq (valOf c :: acc) pred stream'
- else
- (implode $ rev acc, stream)
- end
-
- fun skipDigitSeq off stream =
- let
- val (res, stream) = getLongestSeq [] isDigit stream
- in
- if res = "" then
- raise TkError (TkiDx off, "expected digit")
- else
- (String.size res, stream)
- end
-
- fun getSuffixCommon buf off stream =
- let
- val (sfx, stream) = getLongestSeq [] Char.isAlpha stream
- val sfx = String.map (fn c => Char.toLower c) sfx
- val sfx =
- case List.find (fn (_, repr) => repr = sfx) buf of
- NONE => raise TkError (TkiDx off, "unknown suffix")
- | SOME (sfx, _) => sfx
- in
- (sfx, stream)
- end
-
- val getIntSuffix = getSuffixCommon intSuffixRepr 0
- fun getFloatSuffix off = getSuffixCommon floatSuffixRepr off
-
- (*
- * It seems that there is an ambiguity in C:
- * gcc/clang/cparser consider 0xe-3 as a fp constant and reject it.
- * Right now we do not, but it may be reconsidered.
- *)
- fun numParser _ NpInit (stream, _) (SOME c) =
- if c = #"0" then
- let
- val (c, stream') = S.getchar stream
- in
- case c of
- NONE =>
- (NpInit, SOME $ Num $ IntConst (ItOct, "", IsNone), stream')
- | SOME c =>
- if Char.toLower c = #"x" then
- (IntMode ImHex, NONE, stream')
- else
- (IntMode ImOct, NONE, stream)
- end
- else
- (IntMode ImDec, NONE, stream)
- | numParser _ NpInit _ NONE = raise Unreachable
- | numParser _ (IntMode mode) (stream, startOff) c =
- let
- val (pred, res, offset) =
- case mode of
- ImDec => (isDigit, ItDec, 0)
- | ImOct => (isOctal, ItOct, 1)
- | ImInvalidOct => (isDigit, ItOct, 1)
- | ImHex => (isHexDigit, ItHex, 2)
-
- fun checkAndRaise m msg =
- if mode = m then raise TkError (TkiStart, msg) else ()
-
- fun finish () =
- let
- val () = checkAndRaise ImInvalidOct "invalid octal constant"
- val stream = S.ungetc stream
- val str = finishSeqRead (startOff + offset) stream
- val (sfx, stream) = getIntSuffix stream
- in
- (IntMode mode, SOME $ Num $ IntConst (res, str, sfx), stream)
- end
- in
- case c of
- NONE => finish ()
- | SOME c =>
- if pred c then
- (IntMode mode, NONE, stream)
- else if c = #"." then
- (FloatMode FmDot, NONE, stream)
- else if Char.toLower c = #"e" then
- (checkAndRaise ImHex
- "floating constant can not come with 0x prefix";
- (FloatMode FmExp, NONE, stream))
- else
- finish ()
- end
- | numParser _ (FloatMode FmDot) (stream, startOff) _ =
- let
- val (len, stream) = skipDigitSeq 0 $ S.ungetc stream
-
- val (c, stream') = S.getchar stream
-
- fun finish () =
- let
- val str = finishSeqRead startOff stream
- val (sfx, stream) = getFloatSuffix len stream
- in
- (FloatMode FmDot, SOME $ Num $ FloatConst (str, sfx), stream)
- end
- in
- case c of
- NONE => finish ()
- | SOME c =>
- if Char.toLower c = #"e" then
- (FloatMode FmExp, NONE, stream')
- else
- finish ()
- end
- | numParser _ (FloatMode FmExp) (stream, startOff) c =
- let
- val (off, stream) =
- if c = NONE then
- raise TkError (TkiDx 0, "expected digit")
- else if valOf c <> #"+" andalso valOf c <> #"-" then
- (0, S.ungetc stream)
- else
- (~1, stream)
-
- val (len, stream) = skipDigitSeq off stream
- val str = finishSeqRead startOff stream
- val (sfx, stream) = getFloatSuffix len stream
- in
- (FloatMode FmDot, SOME $ Num $ FloatConst (str, sfx), stream)
- end
+ datatype chr = Reg of char | EscSeqed of char | NoChar
fun chrIntVal c =
if isDigit c then
@@ -716,18 +473,17 @@ struct
let
fun follow stream acc count =
if count = 3 then
- (SOME $ chr acc, stream)
+ (EscSeqed $ chr acc, stream)
else
let
val (c, stream) = S.getchar stream
in
- case c of
- NONE => (SOME $ chr acc, stream)
- | SOME c =>
- if isOctal c then
- follow stream (acc * 8 + chrIntVal c) (count + 1)
- else
- (SOME $ chr acc, S.ungetc stream)
+ if c = #"\000" then
+ (EscSeqed $ chr acc, stream)
+ else if isOctal c then
+ follow stream (acc * 8 + chrIntVal c) (count + 1)
+ else
+ (EscSeqed $ chr acc, S.ungetc stream)
end
in
follow stream (chrIntVal c) 1
@@ -739,25 +495,24 @@ struct
let
val (c, stream) = S.getchar stream
- val noHex = TkError (TkiDx 0, "\\x without hex digits")
+ val noHex = TkError (0, "\\x without hex digits")
in
- case c of
- NONE =>
- if count = 0 then
- raise noHex
- else
- (SOME $ chr acc, stream)
- | SOME c =>
- if isHexDigit c then
- if count = 2 then
- raise TkError (TkiDx 2, "hex sequence out of range")
- else
- follow stream (acc * 16 + chrIntVal c) (count + 1)
+ if c = #"\000" then
+ if count = 0 then
+ raise noHex
+ else
+ (EscSeqed $ chr acc, stream)
+ else
+ if isHexDigit c then
+ if count = 2 then
+ raise TkError (2, "hex sequence out of range")
+ else
+ follow stream (acc * 16 + chrIntVal c) (count + 1)
+ else
+ if count = 0 then
+ raise noHex
else
- if count = 0 then
- raise noHex
- else
- (SOME $ chr acc, S.ungetc stream)
+ (EscSeqed $ chr acc, S.ungetc stream)
end
in
follow stream 0 0
@@ -765,15 +520,16 @@ struct
fun eatEscSeq stream =
let
- fun raiseErr0 msg = raise TkError (TkiDx 0, msg)
+ fun raiseErr0 msg = raise TkError (0, msg)
val (c, stream) = S.getchar stream
val c =
- case c of
- NONE => raiseErr0 "unfinished escape sequence"
- | SOME c => c
+ if c = #"\000" then
+ raiseErr0 "unfinished escape sequence"
+ else
+ c
- fun & c = (SOME c, stream)
+ fun & c = (EscSeqed c, stream)
in
case c of
#"'" => & #"'"
@@ -787,87 +543,135 @@ struct
| #"r" => & #"\r"
| #"t" => & #"\t"
| #"v" => & #"\v"
- | #"\n" => (NONE, stream)
+ | #"\n" => (NoChar, stream)
| #"x" => parseHexSeq stream
| c =>
if isOctal c then
parseOctalSeq stream c
else
raiseErr0 "unknown escape sequence"
- end
+ end handle
+ TkError (dx, msg) =>
+ let
+ val offset = S.getOffset stream + dx
+ val (pos, _) = S.getPosRaw offset stream
+ in
+ error pos msg
+ end
+
+ fun getMaybeBackslashed stream =
+ let
+ fun getMaybeBackslashed' stream =
+ let
+ val (c, stream) = S.getchar stream
+ in
+ if c = #"\\" then
+ eatEscSeq stream
+ else
+ (Reg c, stream)
+ end
- datatype SeqParseState = SeqInit | SeqStart | SeqValue of int | SeqTerm
+ val (c, stream) = getMaybeBackslashed' stream
+ in
+ case c of
+ NoChar => getMaybeBackslashed stream
+ | _ => (c, stream)
+ end
- datatype seqParseMode = SpmChr | SpmStr
+ fun parseCharConst symtab stream =
+ let
+ val startOff = S.getOffset stream - 1
+ val (pos, stream) = S.getPosRaw startOff stream
+
+ val (chr, stream1) = getMaybeBackslashed stream
+ val v =
+ case chr of
+ Reg #"\000" => error (S.getPosDisc stream) "expected character"
+ | Reg c | EscSeqed c => ord c
+ | NoChar => raise Unreachable
+
+ val (c, stream2) = getMaybeBackslashed stream1
+ val () =
+ case c of
+ Reg #"'" => ()
+ | _ => error (S.getPosDisc stream1) "expected '"
- fun seqBound SpmChr = #"'"
- | seqBound SpmStr = #"\""
+ val endOff = S.getOffset stream2
+ val repr = S.getSubstr startOff endOff stream2
+ val id = ST.getId symtab repr
+ in
+ (CharConst (id, v), pos, stream2)
+ end
- fun seqExnConv mode (TkError (v, msg)) =
+ fun parseStrlit symtab stream =
let
- val bound = if mode = SpmChr then "'" else "\""
- val msg =
- String.translate (fn c => if c = #"%" then bound else str c) msg
+ val startOff = S.getOffset stream - 1
+ val (pos, stream) = S.getPosRaw startOff stream
+
+ fun collect stream =
+ let
+ val (c, stream) = getMaybeBackslashed stream
+ in
+ case c of
+ Reg #"\000" => error pos "unfinished string literal"
+ | Reg #"\"" => (S.getOffset stream, stream)
+ | _ => collect stream
+ end
+
+ val (endOff, stream) = collect stream
+ val s = S.getSubstr startOff endOff stream
+ val id = ST.getId symtab s
in
- TkError (v, msg)
+ (Strlit id, pos, stream)
end
- | seqExnConv _ _ = raise Unreachable
- fun unfinishedSeq SpmChr = "unfinished character constant"
- | unfinishedSeq SpmStr = "unfinished string literal"
+ fun parseId symtab _ stream =
+ let
+ val startOff = S.getOffset stream - 1
- fun seqParser mode SeqInit (stream, _) (SOME c) =
- if seqBound mode = c then
- (SeqStart, NONE, stream)
- else
- raise Unreachable
- | seqParser mode SeqStart (stream, _) (SOME c) =
- if c <> seqBound mode then
- let
- val (c, stream) =
- if c <> #"\\" then (SOME c, stream) else eatEscSeq stream
- in
- if c = NONE then
- (SeqStart, NONE, stream)
- else
- (SeqValue (ord $ Option.valOf c), NONE, stream)
- end
- else if mode = SpmStr then
- (SeqTerm, SOME $ StringConst "", stream)
+ fun collect stream =
+ let
+ val (c, stream') = S.getchar stream
+ in
+ if isNondigit c orelse isDigit c then
+ collect stream'
else
- raise seqExnConv SpmChr $ TkError (TkiDx 0, "expected value after %")
- | seqParser mode (SeqValue v) (stream, startOff) (SOME c) =
- if seqBound mode = c then
- let
- fun term s v =
- if mode = SpmChr then
- CharConst (s, v)
- else
- StringConst $ String.extract (s, 1, SOME $ String.size s - 2)
- in
- (SeqTerm, SOME $ term (finishSeqRead startOff stream) v, stream)
- end
- else if mode = SpmStr then
- let
- val (_, stream) =
- if c <> #"\\" then (SOME c, stream) else eatEscSeq stream
- in
- (SeqValue v, NONE, stream)
- end
+ (S.getOffset stream, stream)
+ end
+
+ val (endOff, stream) = collect stream
+ val id = S.getSubstr startOff endOff stream
+ val id = ST.getId symtab id
+ in
+ (Id id, stream)
+ end
+
+ fun parseNumber dx stream =
+ let
+ fun collect stream =
+ let
+ val (c, stream') = S.getchar stream
+ val (c1, stream2) = S.getchar stream'
+ in
+ if isDigit c orelse c = #"." then
+ collect stream'
+ else if Char.toLower c = #"e" andalso (c1 = #"+" orelse c1 = #"-")
+ then
+ collect stream2
+ else if isNondigit c then
+ collect stream'
else
- raise seqExnConv SpmChr $ TkError (TkiDx 0, "expected % after value")
- | seqParser _ SeqTerm _ (SOME _) =
- raise Unreachable
- | seqParser mode state (_, _) NONE =
- raise case state of
- SeqInit => Unreachable
- | SeqStart => seqExnConv mode $ TkError (TkiStart, unfinishedSeq mode)
- | SeqValue _ =>
- seqExnConv mode $ TkError (TkiStart, unfinishedSeq mode)
- | SeqTerm => Unreachable
+ (S.getOffset stream, stream)
+ end
- fun charParser _ = seqParser SpmChr
- fun strParser _ = seqParser SpmStr
+ val startOff = S.getOffset stream - dx
+ val (pos, stream) = S.getPosRaw startOff stream
+ val (endOff, stream) = collect stream
+
+ val s = S.getSubstr startOff endOff stream
+ in
+ (Num s, pos, stream)
+ end
fun getDir stream = OS.Path.getParent o S.getFname $ stream
@@ -922,10 +726,8 @@ struct
let
fun skip prevIsAsterisk stream =
let
- val (c, stream) =
- case S.getchar stream of
- (NONE, _) => error pos "unfinished comment"
- | (SOME c, stream) => (c, stream)
+ val (c, stream) = S.getchar stream
+ val () = if c = #"\000" then error pos "unfinished comment" else ()
in
if prevIsAsterisk andalso c = #"/" then
stream
@@ -947,13 +749,13 @@ struct
error pos "expected \\n after backslash"
end
in
- case c of
- SOME c =>
- if c = #"\n" then
- stream
- else
- raiseErr ()
- | NONE => raiseErr ()
+ if c = #"\000" then
+ raiseErr ()
+ else
+ if c = #"\n" then
+ stream
+ else
+ raiseErr ()
end
fun processSymbol symtab stream =
@@ -985,31 +787,39 @@ struct
and getToken symtab stream =
let
val (c, stream) = S.getchar stream
-
+ val (c1, _) = S.getchar stream
fun conv tk (pos, stream) = (tk, pos, stream)
- fun @-> parser acc = parserWrapper stream (parser symtab) acc
+
+ fun @-> parser =
+ let
+ val (pos, stream) = S.getPosAfterChar stream
+ val (tk, stream) = parser pos stream
+ in
+ (tk, pos, stream)
+ end
in
- case c of
- NONE => conv EOS $ S.EOFpos stream
- | SOME c =>
- if c = #"\n" then
- conv NewLine $ S.getPosAfterChar stream
- else if Char.isSpace c then
- getToken symtab stream
- else if isIdStart c then
- @-> idParser ()
- else if isDigit c then
- @-> numParser NpInit
- else if c = #"'" then
- @-> charParser SeqInit
- else if c = #"\"" then
- @-> strParser SeqInit
- else if isStartForFsm c then
- processSymbol symtab stream
- else if c = #"\\" then
- getToken symtab $ handleBackslash stream
- else
- unexpectedCharRaise stream c
+ if c = #"\000" then
+ conv EOS $ S.EOFpos stream
+ else if c = #"\n" then
+ conv NewLine $ S.getPosAfterChar stream
+ else if Char.isSpace c then
+ getToken symtab stream
+ else if isNondigit c then
+ @-> $ parseId symtab
+ else if isDigit c then
+ parseNumber 1 stream
+ else if c = #"." andalso isDigit c1 then
+ parseNumber 2 stream
+ else if c = #"'" then
+ parseCharConst symtab stream
+ else if c = #"\"" then
+ parseStrlit symtab stream
+ else if isStartForFsm c then
+ processSymbol symtab stream
+ else if c = #"\\" then
+ getToken symtab $ handleBackslash stream
+ else
+ unexpectedCharRaise stream c
end
fun debugPrint fname =
diff --git a/tokenizer.sig b/tokenizer.sig
index 67666a4..f221ab3 100644
--- a/tokenizer.sig
+++ b/tokenizer.sig
@@ -2,25 +2,17 @@ signature TOKENIZER = sig
structure ST: SYMTAB
structure S: STREAM
- datatype intType = ItDec | ItOct | ItHex
- datatype intSfx = IsNone | IsU | IsL | IsUL | IsLL | IsULL
- datatype floatSfx = FsNone | FsF | FsL
-
- datatype numConst =
- IntConst of intType * string * intSfx |
- FloatConst of string * floatSfx
-
datatype token =
Invalid |
EOS |
NewLine |
MacroEnd of int |
- Num of numConst |
+ Num of string |
Id of int |
- CharConst of string * int |
- StringConst of string |
+ CharConst of int * int |
+ Strlit of int |
kwBreak |
kwCase |