diff options
Diffstat (limited to 'tokenizer.sml')
-rw-r--r-- | tokenizer.sml | 1018 |
1 files changed, 0 insertions, 1018 deletions
diff --git a/tokenizer.sml b/tokenizer.sml deleted file mode 100644 index 10c2e77..0000000 --- a/tokenizer.sml +++ /dev/null @@ -1,1018 +0,0 @@ -structure Tokenizer:> TOKENIZER = struct - - datatype intType = ItDec | ItOct | ItHex - datatype intSfx = IsNone | IsU | IsL | IsUL | IsLL | IsULL - datatype floatSfx = FsNone | FsF | FsL - - datatype numConst = - IntConst of intType * string * intSfx | - FloatConst of string * floatSfx - - datatype token = - Invalid | - EOS | - NewLine | - - Num of numConst | - - Id of string | - CharConst of string * int | - StringConst of string | - - kwBreak | - kwCase | - kwChar | - kwConst | - kwContinue | - kwDefault | - kwDouble | - kwElse | - kwEnum | - kwExtern | - kwFloat | - kwFor | - kwGoto | - kwInt | - kwLong | - kwRegister | - kwReturn | - kwShort | - kwSigned | - kwSizeof | - kwStruct | - kwSwitch | - kwTypedef | - kwUnion | - kwUnsigned | - kwVoid | - kwVolatile | - - LParen | - RParen | - LBracket | - RBracket | - LBrace | - RBrace | - - QuestionMark | - Colon | - Coma | - Semicolon | - - Arrow | - Plus | - DoublePlus| - Minus | - DoubleMinus | - Ampersand | - Asterisk | - Slash | - Tilde | - ExclMark | - Percent | - DoubleGreater | - DoubleLess | - Greater | - Less | - EqualSign | - LessEqualSign | - GreaterEqualSign | - DoubleEqualSign | - ExclMarkEqualSign | - Cap | - VerticalBar | - DoubleAmpersand | - DoubleVerticalBar | - - AsteriskEqualSign | - SlashEqualSign | - PercentEqualSign | - PlusEqualSign | - MinusEqualSign | - DoubleLessEqualSign | - DoubleGreaterEqualSign | - AmpersandEqualSign | - CapEqualSign | - VerticalBarEqualSign | - - Hash | - DoubleHash | - - Dot | - DoubleDot | - TripleDot | - - CommentStart | - - CppInclude | - CppDefine | - CppUndef | - CppIf | - CppIfdef | - CppIfndef | - CppElse | - CppElif | - CppEndif | - CppWarning | - CppError | - CppPragma - - val kwPrefix = #"@" - val cppPrefix = #"$" - - type fullToken = Stream.pos * token - - datatype tkErrorAuxInfo = TkiEOF | TkiDx of int | TkiStart - - exception TkError of tkErrorAuxInfo * string - exception TkErrorAug of Stream.ppos * string - - exception ExpectedCppDir (* handled in postprocess *) - - exception FsmTableIsTooSmall - - (* Unreachable (should be) *) - exception TokenWithoutRepr - exception SuffixWithoutRepr - - val tokenRepr = - let - fun & repr = str kwPrefix ^ repr - fun % repr = str cppPrefix ^ repr - in - [ - (NewLine, "NewLine"), - - (kwBreak, &"break"), - (kwCase, &"case"), - (kwChar, &"char"), - (kwConst, &"const"), - (kwContinue, &"continue"), - (kwDefault, &"default"), - (kwDouble, &"double"), - (kwElse, &"else"), - (kwEnum, &"enum"), - (kwExtern, &"extern"), - (kwFloat, &"float"), - (kwFor, &"for"), - (kwGoto, &"goto"), - (kwInt, &"int"), - (kwLong, &"long"), - (kwRegister, &"register"), - (kwReturn, &"return"), - (kwShort, &"short"), - (kwSigned, &"signed"), - (kwSizeof, &"sizeof"), - (kwStruct, &"struct"), - (kwSwitch, &"switch"), - (kwTypedef, &"typedef"), - (kwUnion, &"union"), - (kwUnsigned, &"unsigned"), - (kwVoid, &"void"), - (kwVolatile, &"volatile"), - - (LParen, "("), - (RParen, ")"), - (LBracket, "["), - (RBracket, "]"), - (LBrace, "{"), - (RBrace, "}"), - - (QuestionMark, "?"), - (Colon, ":"), - (Coma, ","), - (Semicolon, ";"), - - (Arrow, "->"), - (Plus, "+"), - (DoublePlus, "++"), - (Minus, "-"), - (DoubleMinus, "--"), - (Ampersand, "&"), - (Asterisk, "*"), - (Slash, "/"), - (Tilde, "~"), - (ExclMark, "!"), - (Percent, "%"), - (DoubleLess, "<<"), - (DoubleGreater, ">>"), - (Less, "<"), - (Greater, ">"), - (EqualSign, "="), - (LessEqualSign, "<="), - (GreaterEqualSign, ">="), - (DoubleEqualSign, "=="), - (ExclMarkEqualSign, "!="), - (Cap, "^"), - (VerticalBar, "|"), - (DoubleAmpersand, "&&"), - (DoubleVerticalBar, "||"), - - (AsteriskEqualSign, "*="), - (SlashEqualSign, "/="), - (PercentEqualSign, "%="), - (PlusEqualSign, "+="), - (MinusEqualSign, "-="), - (DoubleLessEqualSign, "<<="), - (DoubleGreaterEqualSign, ">>="), - (AmpersandEqualSign, "&="), - (CapEqualSign, "^="), - (VerticalBarEqualSign, "|="), - - (Hash, "#"), - (DoubleHash, "##"), - - (Dot, "."), - (DoubleDot, ".."), - (TripleDot, "..."), - - (CommentStart, "/*"), - - (CppInclude, %"include"), - (CppDefine, %"define"), - (CppUndef, %"undef"), - (CppIf, %"if"), - (CppIfdef, %"ifdef"), - (CppIfndef, %"ifndef"), - (CppElse, %"else"), - (CppElif, %"elif"), - (CppEndif, %"endif"), - (CppWarning, %"warning"), - (CppError, %"error"), - (CppPragma, %"pragma") - ] - end - - val intSuffixRepr = [ - (IsNone, ""), - (IsU, "u"), - (IsL, "l"), - (IsUL, "ul"), - (IsLL, "ll"), - (IsULL, "ull") - ] - - val floatSuffixRepr = [ - (FsNone, ""), - (FsF, "f"), - (FsL, "l") - ] - - fun getSfxRepr sfx buf onError = - case List.find (fn (sfx', _) => sfx' = sfx) buf of - NONE => onError () - | SOME (_, repr) => repr - - fun getSfxReprSimple sfx buf = - getSfxRepr sfx buf (fn () => raise SuffixWithoutRepr) - - val token2str = fn - Id s => "id:" ^ s - | Num (IntConst (it, str, sfx)) => - let - val intType = - case it of - ItDec => "" - | ItOct => "0" - | ItHex => "0x" - in - intType ^ str ^ "`" ^ getSfxReprSimple sfx intSuffixRepr ^ "`" - end - | Num (FloatConst (str, sfx)) => - str ^ "`" ^ getSfxReprSimple sfx floatSuffixRepr ^ "`" - | CharConst (repr, _) => repr - | StringConst s => - "\"" ^ s ^ "\"" - | v => - case List.find (fn (x, _) => x = v) tokenRepr of - SOME (_, repr) => repr - | NONE => raise TokenWithoutRepr - - fun printToken tk = print $ token2str tk - - fun isIdStart c = Char.isAlpha c orelse c = #"_" - fun isIdBody c = Char.isAlphaNum c orelse c = #"_" - - fun isOctal c = ord c >= ord #"0" andalso ord c < ord #"8" - val isDigit = Char.isDigit - val isHexDigit = Char.isHexDigit - - fun isPrintable c = Char.isPrint c andalso c <> #" " - - (* FSM for parsing symbols *) - - val maxStates = 51 - - fun fsmInsert (nextState, buf) curState tk (c :: cs) = - let - open Array - - val (_, row) = sub (buf, curState) - val potNextState = sub (row, ord c) - in - if potNextState <> ~1 then - fsmInsert (nextState, buf) potNextState tk cs - else ( - update (row, ord c, !nextState); - nextState := !nextState + 1; - fsmInsert (nextState, buf) (!nextState - 1) tk cs - ) - end - | fsmInsert (_, buf) curState tk [] = - let - open Array - val (_, row) = sub (buf, curState) - in - update (buf, curState, (tk, row)) - end - - fun isStartForFsmGen () = - let - open Array - - val lookupTable = array (128, false) - - fun firstChr s = ord $ String.sub (s, 0) - val () = List.app - (fn (_, repr) => update (lookupTable, firstChr repr, true)) - $ List.filter - (fn (tk, repr) => - let - val c = String.sub (repr, 0) - in - c <> kwPrefix andalso c <> cppPrefix - andalso tk <> NewLine - end) - tokenRepr - - in - fn c => sub (lookupTable, ord c) - end - - val isStartForFsm = isStartForFsmGen () - - fun fsmTableCreate () = - let - open Array - - val T as (nextState, buf) = - (ref 1, array (maxStates, (Invalid, array (128, ~1)))) - val r = ref 1 - - fun filterNeeded [] acc = acc - | filterNeeded ((T as (_, repr)) :: tks) acc = - filterNeeded tks - (if isStartForFsm $ String.sub (repr, 0) then - T :: acc - else - acc) - - val tokenRepr = filterNeeded tokenRepr [] - - fun fsmInsert' T curState tk repr = fsmInsert T curState tk repr handle - Subscript => raise FsmTableIsTooSmall - in - while !r < length buf do ( - update (buf, !r, (Invalid, array(128, ~1))); - r := !r + 1 - ); - - List.app (fn (v, p) => fsmInsert' T 0 v $ explode p) tokenRepr; - if !nextState <> maxStates then - printLn $ "note: Fsm table size can be smaller: " - ^ Int.toString (!nextState) ^ " is enough" - else (); - T - end - - (* Unused right now - fun printTable (nextState, buf) = - let - fun printRow i row = - if i = length row then - print "\n" - else - let - val state = sub (row, i) - in - if state = ~1 then - () - else - print ((str (chr i)) ^ ": " ^ (Int.toString state) ^ ", "); - printRow (i + 1) row - end - - fun print' rowNum buf = - if rowNum = !nextState then - () - else - let - val (tk, row) = sub (buf, rowNum) - in - print ((token2string tk) ^ ": "); - printRow 0 row; - print' (rowNum + 1) buf - end - in - print ("NextState: " ^ Int.toString (!nextState) ^ "\n"); - print' 0 buf; - print "\n" - end - *) - - val fsmTable = lazy fsmTableCreate - - fun fsmEat stream = - let - open Array - val pos = Stream.getPos stream - - fun get curState stream = - let - val (c, stream) = Stream.getchar stream - in - case c of - NONE => (#1 $ sub (#2 $ fsmTable (), curState), stream) - | SOME c => - let - val (tk, row) = sub (#2 $ fsmTable (), curState) - val nextState = sub (row, ord c) - in - if nextState = ~1 then - (tk, Stream.ungetc stream) - else - get nextState stream - end - end - in - (fn (tk, stream) => ((pos, tk), stream)) $ get 0 stream - end - - fun tkError2aug stream (dx, msg) = - let - val (id, pos) = Stream.getPosAfterCharRead stream - val pos = Stream.pos2ppos (id, pos + dx) stream - in - TkErrorAug (pos, msg) - end - - fun parseGeneric stream parser acc = - let - val stream = Stream.ungetc stream - val P as (_, startOff) = Stream.getPos stream - - fun parse' stream acc = let - val (c, stream) = Stream.getchar stream - - val (acc, tk, stream) = parser acc (stream, startOff) c handle - TkError (TkiDx dx, msg) => raise tkError2aug stream (dx, msg) - | TkError (TkiStart, msg) => - let - val startPos = Stream.pos2ppos P stream - in - raise TkErrorAug (startPos, msg) - end - | TkError (TkiEOF, msg) => - let - open Stream - val pos = pposWithoutCol $ pos2ppos P stream - in - raise TkErrorAug (pos, msg) - end - in - case tk of - NONE => parse' stream acc - | _ => (valOf tk, stream) - end - - val (tk, stream) = parse' stream acc - in - ((P, tk): fullToken, stream) - end - - fun finishSeqRead startOff stream = - let - val (_, endOff) = Stream.getPos stream - val s = Stream.getSubstr startOff endOff stream - in - s - end - - fun keywordHashtableGen () = - let - open Hashtable - val table = create 128 - val () = - List.app - (fn (tk, repr) => - if String.sub (repr, 0) = kwPrefix then - insert table (String.extract (repr, 1, NONE)) tk - else - ()) - tokenRepr - in - table - end - - val keywordHashtable = lazy keywordHashtableGen - - fun findKeyword str = - case Hashtable.lookup (keywordHashtable ()) str of - NONE => Id str - | SOME tk => tk - - fun idParser () (stream, startOff) c = - let - fun finalize stream = - let - val s = finishSeqRead startOff stream - val tk = findKeyword s - in - ((), SOME tk, stream) - end - in - case c of - NONE => finalize stream - | SOME c => - if isIdBody c then - ((), NONE, stream) - else - finalize (Stream.ungetc stream) - end - - datatype intMode = ImDec | ImOct | ImInvalidOct | ImHex - datatype floatMode = FmDot | FmExp - - datatype npState = NpInit | IntMode of intMode | FloatMode of floatMode - - fun getLongestSeq acc pred stream = - let - val (c, stream') = Stream.getchar stream - in - if isSome c andalso pred $ valOf c then - getLongestSeq (valOf c :: acc) pred stream' - else - (implode $ rev acc, stream) - end - - fun skipDigitSeq off stream = - let - val (res, stream) = getLongestSeq [] isDigit stream - in - if res = "" then - raise TkError (TkiDx off, "expected digit") - else - (String.size res, stream) - end - - fun getSuffixCommon buf off stream = - let - val (sfx, stream) = getLongestSeq [] Char.isAlpha stream - val sfx = String.map (fn c => Char.toLower c) sfx - val sfx = - case List.find (fn (_, repr) => repr = sfx) buf of - NONE => raise TkError (TkiDx off, "unknown suffix") - | SOME (sfx, _) => sfx - in - (sfx, stream) - end - - val getIntSuffix = getSuffixCommon intSuffixRepr 0 - fun getFloatSuffix off = getSuffixCommon floatSuffixRepr off - - (* - * It seems that there is an ambiguity in C: - * gcc/clang/cparser consider 0xe-3 as a fp constant and reject it. - * Right now we do not, but it may be reconsidered. - *) - fun numParser NpInit (stream, _) (SOME c) = - if c = #"0" then - let - val (c, stream') = Stream.getchar stream - in - case c of - NONE => - (NpInit, SOME $ Num $ IntConst (ItOct, "", IsNone), stream') - | SOME c => - if Char.toLower c = #"x" then - (IntMode ImHex, NONE, stream') - else - (IntMode ImOct, NONE, stream) - end - else - (IntMode ImDec, NONE, stream) - | numParser NpInit _ NONE = raise Unreachable - | numParser (IntMode mode) (stream, startOff) c = - let - val (pred, res, offset) = - case mode of - ImDec => (isDigit, ItDec, 0) - | ImOct => (isOctal, ItOct, 1) - | ImInvalidOct => (isDigit, ItOct, 1) - | ImHex => (isHexDigit, ItHex, 2) - - fun checkAndRaise m msg = - if mode = m then raise TkError (TkiStart, msg) else () - - fun finish () = - let - val () = checkAndRaise ImInvalidOct "invalid octal constant" - val stream = Stream.ungetc stream - val str = finishSeqRead (startOff + offset) stream - val (sfx, stream) = getIntSuffix stream - in - (IntMode mode, SOME $ Num $ IntConst (res, str, sfx), stream) - end - in - case c of - NONE => finish () - | SOME c => - if pred c then - (IntMode mode, NONE, stream) - else if c = #"." then - (FloatMode FmDot, NONE, stream) - else if Char.toLower c = #"e" then - (checkAndRaise ImHex - "floating constant can not come with 0x prefix"; - (FloatMode FmExp, NONE, stream)) - else - finish () - end - | numParser (FloatMode FmDot) (stream, startOff) _ = - let - val (len, stream) = skipDigitSeq 0 $ Stream.ungetc stream - - val (c, stream') = Stream.getchar stream - - fun finish () = - let - val str = finishSeqRead startOff stream - val (sfx, stream) = getFloatSuffix len stream - in - (FloatMode FmDot, SOME $ Num $ FloatConst (str, sfx), stream) - end - in - case c of - NONE => finish () - | SOME c => - if Char.toLower c = #"e" then - (FloatMode FmExp, NONE, stream') - else - finish () - end - | numParser (FloatMode FmExp) (stream, startOff) c = - let - val (off, stream) = - if c = NONE then - raise TkError (TkiDx 0, "expected digit") - else if valOf c <> #"+" andalso valOf c <> #"-" then - (0, Stream.ungetc stream) - else - (~1, stream) - - val (len, stream) = skipDigitSeq off stream - val str = finishSeqRead startOff stream - val (sfx, stream) = getFloatSuffix len stream - in - (FloatMode FmDot, SOME $ Num $ FloatConst (str, sfx), stream) - end - - fun chrIntVal c = - if isDigit c then - ord c - ord #"0" - else if ord c >= ord #"a" andalso ord c <= ord #"z" then - ord c - ord #"a" + 10 - else if ord c >= ord #"A" andalso ord c <= ord #"Z" then - ord c - ord #"A" + 10 - else - raise Unreachable - - fun parseOctalSeq stream c = - let - fun follow stream acc count = - if count = 3 then - (SOME $ chr acc, stream) - else - let - val (c, stream) = Stream.getchar stream - in - case c of - NONE => (SOME $ chr acc, stream) - | SOME c => - if isOctal c then - follow stream (acc * 8 + chrIntVal c) (count + 1) - else - (SOME $ chr acc, Stream.ungetc stream) - end - in - follow stream (chrIntVal c) 1 - end - - fun parseHexSeq stream = - let - fun follow stream acc count = - let - val (c, stream) = Stream.getchar stream - - val noHex = TkError (TkiDx 0, "\\x without hex digits") - in - case c of - NONE => - if count = 0 then - raise noHex - else - (SOME $ chr acc, stream) - | SOME c => - if isHexDigit c then - if count = 2 then - raise TkError (TkiDx 2, "hex sequence out of range") - else - follow stream (acc * 16 + chrIntVal c) (count + 1) - else - if count = 0 then - raise noHex - else - (SOME $ chr acc, Stream.ungetc stream) - end - in - follow stream 0 0 - end - - fun eatEscSeq stream = - let - fun raiseErr0 msg = raise TkError (TkiDx 0, msg) - - val (c, stream) = Stream.getchar stream - val c = - case c of - NONE => raiseErr0 "unfinished escape sequence" - | SOME c => c - - fun & c = (SOME c, stream) - in - case c of - #"'" => & #"'" - | #"\"" => & #"\"" - | #"?" => & #"?" - | #"\\" => & #"\\" - | #"a" => & #"\a" - | #"b" => & #"\b" - | #"f" => & #"\f" - | #"n" => & #"\n" - | #"r" => & #"\r" - | #"t" => & #"\t" - | #"v" => & #"\v" - | #"\n" => (NONE, stream) - | #"x" => parseHexSeq stream - | c => - if isOctal c then - parseOctalSeq stream c - else - raiseErr0 "unknown escape sequence" - end - - datatype SeqParseState = SeqInit | SeqStart | SeqValue of int | SeqTerm - - datatype seqParseMode = SpmChr | SpmStr - - fun seqBound SpmChr = #"'" - | seqBound SpmStr = #"\"" - - fun seqExnConv mode (TkError (v, msg)) = - let - val bound = if mode = SpmChr then "'" else "\"" - val msg = - String.translate (fn c => if c = #"%" then bound else str c) msg - in - TkError (v, msg) - end - | seqExnConv _ _ = raise Unreachable - - fun unfinishedSeq SpmChr = "unfinished character constant" - | unfinishedSeq SpmStr = "unfinished string literal" - - fun seqParser mode SeqInit (stream, _) (SOME c) = - if seqBound mode = c then - (SeqStart, NONE, stream) - else - raise Unreachable - | seqParser mode SeqStart (stream, _) (SOME c) = - if c <> seqBound mode then - let - val (c, stream) = - if c <> #"\\" then (SOME c, stream) else eatEscSeq stream - in - if c = NONE then - (SeqStart, NONE, stream) - else - (SeqValue (ord $ Option.valOf c), NONE, stream) - end - else if mode = SpmStr then - (SeqTerm, SOME $ StringConst "", stream) - else - raise seqExnConv SpmChr $ TkError (TkiDx 0, "expected value after %") - | seqParser mode (SeqValue v) (stream, startOff) (SOME c) = - if seqBound mode = c then - let - fun term s v = - if mode = SpmChr then - CharConst (s, v) - else - StringConst $ String.extract (s, 1, SOME $ String.size s - 2) - in - (SeqTerm, SOME $ term (finishSeqRead startOff stream) v, stream) - end - else if mode = SpmStr then - let - val (_, stream) = - if c <> #"\\" then (SOME c, stream) else eatEscSeq stream - in - (SeqValue v, NONE, stream) - end - else - raise seqExnConv SpmChr $ TkError (TkiDx 0, "expected % after value") - | seqParser _ SeqTerm _ (SOME _) = - raise Unreachable - | seqParser mode state (_, _) NONE = - raise case state of - SeqInit => Unreachable - | SeqStart => seqExnConv mode $ TkError (TkiStart, unfinishedSeq mode) - | SeqValue _ => - seqExnConv mode $ TkError (TkiStart, unfinishedSeq mode) - | SeqTerm => Unreachable - - val charParser = seqParser SpmChr - val strParser = seqParser SpmStr - - fun formCppDir (Id s) = - let - open String - in - case List.find - (fn (_, repr) => - sub (repr, 0) = cppPrefix andalso - extract (repr, 1, NONE) = s) - tokenRepr - of - SOME (tk, _) => tk - | NONE => raise ExpectedCppDir - end - | formCppDir kwElse = CppElse - | formCppDir _ = raise ExpectedCppDir - - fun handleCppDir tk prevPos stream = - let - val (pos, tk') = tk - in - (prevPos, formCppDir tk') handle - ExpectedCppDir => - raise TkErrorAug (Stream.pos2ppos pos stream, - "expected preprocessor directive") - end - - fun unexpectedCharRaise stream c = - let - open Stream - val pos = pos2ppos (getPosAfterCharRead stream) stream - val repr = - if isPrintable c then - str c - else - "<" ^ Int.toString (ord c) ^ ">" - in - raise TkErrorAug (pos, "unexpected character " ^ repr) - end - - fun skipComment stream pos = - let - fun skip prevIsAsterisk stream = - let - val (c, stream) = - case Stream.getchar stream of - (NONE, _) => - let - val pos = Stream.pos2ppos pos stream - in - raise TkErrorAug (pos, "unfinished comment") - end - | (SOME c, stream) => (c, stream) - in - if prevIsAsterisk andalso c = #"/" then - stream - else - skip (c = #"*") stream - end - in - skip false stream - end - - fun handleBackslash stream = - let - val (c, stream) = Stream.getchar stream - - val raiseErr = fn () => - let - val pos = Stream.getPosAfterCharRead stream - val pos = Stream.pos2ppos pos stream - in - raise TkErrorAug (pos, "expected \\n after backslash") - end - in - case c of - SOME c => - if c = #"\n" then - stream - else - raiseErr () - | NONE => raiseErr () - end - - fun processSymbol stream = - let - val (T as (p, tk), stream) = fsmEat $ Stream.ungetc stream - in - case tk of - CommentStart => getToken $ skipComment stream p - | DoubleDot => (SOME (p, Dot), Stream.ungetc stream) - | Hash => - if Stream.isFirstOnLine p stream then - let - val (tk, stream) = getToken stream - in - case tk of - NONE => - raise TkErrorAug (Stream.pos2ppos p stream, - "unfinished preprecessor directive") - | SOME tk => - (SOME $ handleCppDir tk p stream, stream) - end - else - (SOME T, stream) - | _ => (SOME T, stream) - end - - and getToken stream = - let - val (c, stream) = Stream.getchar stream - - fun @-> parser acc = - (fn (tk, s) => (SOME tk, s)) $ parseGeneric stream parser acc - in - case c of - NONE => (NONE, stream) - | SOME c => - if c = #"\n" then - (SOME (Stream.getPosAfterCharRead stream, NewLine), stream) - else if Char.isSpace c then - getToken stream - else if isIdStart c then - @-> idParser () - else if isDigit c then - @-> numParser NpInit - else if c = #"'" then - @-> charParser SeqInit - else if c = #"\"" then - @-> strParser SeqInit - else if isStartForFsm c then - processSymbol stream - else if c = #"\\" then - getToken $ handleBackslash stream - else - unexpectedCharRaise stream c - end - - fun tokenize stream = - let - fun aux acc stream = - let - val (tk, stream) = getToken stream - in - case tk of - NONE => rev acc - | SOME tk => aux (tk :: acc) stream - end - in - aux [] stream - end - - fun debugPrint tkl fname = - let - fun print' line _ ((_, NewLine) :: tks) = - print' (line + 1) true tks - | print' line firstOnLine ((_, tk) :: tks) = ( - if firstOnLine then ( - print "\n"; - printLn $ fname ^ ":" ^ Int.toString line; - print "\t") - else - (); - printToken tk; - print " "; - print' line false tks - ) - | print' _ _ [] = () - in - print' 1 true tkl; - print "\n" - end -end |