diff options
author | Vladimir Azarov <avm@intermediate-node.net> | 2025-05-12 01:51:27 +0200 |
---|---|---|
committer | Vladimir Azarov <avm@intermediate-node.net> | 2025-05-12 01:51:27 +0200 |
commit | 52a6f8656e8a600a2c59fa2802fb46fafb30de45 (patch) | |
tree | 72511efdccc742709f40e52ca73b708a0c74c1c6 /tokenizer.fun | |
parent | e99a8dc48ede26696be2ba75a8cb0d5122d94598 (diff) |
Object-like macros
Diffstat (limited to 'tokenizer.fun')
-rw-r--r-- | tokenizer.fun | 133 |
1 files changed, 53 insertions, 80 deletions
diff --git a/tokenizer.fun b/tokenizer.fun index 4cb5d1d..5162308 100644 --- a/tokenizer.fun +++ b/tokenizer.fun @@ -16,6 +16,8 @@ struct Invalid | EOS | NewLine | + MacroStart of string | + MacroEnd | Num of numConst | @@ -125,15 +127,10 @@ struct CppError | CppPragma - val kwPrefix = #"`" - val cppPrefix = #"$" - - type fullToken = S.pos * token - datatype tkErrorAuxInfo = TkiEOF | TkiDx of int | TkiStart exception TkError of tkErrorAuxInfo * string - exception TkErrorAug of S.ppos * string + exception TkErrorAug of S.pos * string exception ExpectedCppDir (* handled in postprocess *) @@ -143,13 +140,19 @@ struct exception TokenWithoutRepr exception SuffixWithoutRepr + val kwPrefix = #"`" + val cppPrefix = #"$" + val otherPrefix = #"@" + val tokenRepr = let fun & repr = str kwPrefix ^ repr fun % repr = str cppPrefix ^ repr in [ - (NewLine, "NewLine"), + (NewLine, "@NewLine"), + (EOS, "@EOS"), + (MacroEnd, "@Mend"), (kwBreak, &"break"), (kwCase, &"case"), @@ -280,6 +283,7 @@ struct val token2str = fn Id s => s + | MacroStart macro => "m(" ^ macro ^ ")" | Num (IntConst (it, str, sfx)) => let val intType = @@ -348,12 +352,12 @@ struct val () = List.app (fn (_, repr) => update (lookupTable, firstChr repr, true)) $ List.filter - (fn (tk, repr) => + (fn (_, repr) => let val c = String.sub (repr, 0) in c <> kwPrefix andalso c <> cppPrefix - andalso tk <> NewLine + andalso c <> otherPrefix end) tokenRepr @@ -437,7 +441,7 @@ struct fun fsmEat stream = let open Array - val pos = S.getPos stream + val (pos, stream) = S.getPos stream fun get curState stream = let @@ -456,14 +460,15 @@ struct get nextState stream end end + val (tk, stream) = get 0 stream in - (fn (tk, stream) => ((pos, tk), stream)) $ get 0 stream + (tk, pos, stream) end fun tkError2aug stream (dx, msg) = let - val (id, pos) = S.getPosAfterCharRead stream - val pos = S.pos2ppos (id, pos + dx) stream + val off = S.getOffset stream - 1 + dx + val (pos, _) = S.getPosRaw off stream in TkErrorAug (pos, msg) end @@ -471,22 +476,20 @@ struct fun parserWrapper stream parser acc = let val stream = S.ungetc stream - val P as (_, startOff) = S.getPos stream - fun parse' stream acc = let + val startOff = S.getOffset stream + val (pos, stream) = S.getPos stream + + fun parse' stream acc = + let val (c, stream) = S.getchar stream val (acc, tk, stream) = parser acc (stream, startOff) c handle TkError (TkiDx dx, msg) => raise tkError2aug stream (dx, msg) - | TkError (TkiStart, msg) => - let - val startPos = S.pos2ppos P stream - in - raise TkErrorAug (startPos, msg) - end + | TkError (TkiStart, msg) => raise TkErrorAug (pos, msg) | TkError (TkiEOF, msg) => let - val pos = S.pposWithoutCol $ S.pos2ppos P stream + val (pos, _) = S.EOFpos stream in raise TkErrorAug (pos, msg) end @@ -498,16 +501,11 @@ struct val (tk, stream) = parse' stream acc in - ((P, tk): fullToken, stream) + (tk, pos, stream) end fun finishSeqRead startOff stream = - let - val (_, endOff) = S.getPos stream - val s = S.getSubstr startOff endOff stream - in - s - end + S.getSubstr startOff (S.getOffset stream) stream fun keywordHashtableGen () = let @@ -871,19 +869,14 @@ struct | formCppDir kwIf = CppIf | formCppDir _ = raise ExpectedCppDir - fun handleCppDir tk prevPos stream = - let - val (pos, tk') = tk - in - (prevPos, formCppDir tk') handle + fun handleCppDir (pos, tk) = + formCppDir tk handle ExpectedCppDir => - raise TkErrorAug (S.pos2ppos pos stream, - "expected preprocessor directive") - end + raise TkErrorAug (pos, "expected preprocessor directive") fun unexpectedCharRaise stream c = let - val pos = S.pos2ppos (S.getPosAfterCharRead stream) stream + val (pos, _) = S.getPosAfterChar stream val repr = if isPrintable c then str c @@ -899,13 +892,8 @@ struct let val (c, stream) = case S.getchar stream of - (NONE, _) => - let - val pos = S.pos2ppos pos stream - in - raise TkErrorAug (pos, "unfinished comment") - end - | (SOME c, stream) => (c, stream) + (NONE, _) => raise TkErrorAug (pos, "unfinished comment") + | (SOME c, stream) => (c, stream) in if prevIsAsterisk andalso c = #"/" then stream @@ -922,8 +910,7 @@ struct val raiseErr = fn () => let - val pos = S.getPosAfterCharRead stream - val pos = S.pos2ppos pos stream + val (pos, _) = S.getPosAfterChar stream in raise TkErrorAug (pos, "expected \\n after backslash") end @@ -939,40 +926,39 @@ struct fun processSymbol stream = let - val (T as (p, tk), stream) = fsmEat $ S.ungetc stream + val (tk, pos, stream) = fsmEat $ S.ungetc stream + val S.Pos (_, _, col) = pos in case tk of - CommentStart => getToken $ skipComment stream p - | DoubleDot => (SOME (p, Dot), S.ungetc stream) + CommentStart => getToken $ skipComment stream pos + | DoubleDot => (Dot, pos, S.ungetc stream) | Hash => - if S.isFirstOnLine p stream then + if col = 1 then let - val (tk, stream) = getToken stream + val (tk, pos', stream) = getToken stream in - case tk of - NONE => - raise TkErrorAug (S.pos2ppos p stream, - "unfinished preprecessor directive") - | SOME tk => - (SOME $ handleCppDir tk p stream, stream) + if tk = EOS then + raise TkErrorAug (pos, "unfinished preprecessor directive") + else + (handleCppDir (pos', tk), pos, stream) end else - (SOME T, stream) - | _ => (SOME T, stream) + (tk, pos, stream) + | _ => (tk, pos, stream) end and getToken stream = let val (c, stream) = S.getchar stream - fun @-> parser acc = - (fn (tk, s) => (SOME tk, s)) $ parserWrapper stream parser acc + fun conv tk (pos, stream) = (tk, pos, stream) + fun @-> parser acc = parserWrapper stream parser acc in case c of - NONE => (NONE, stream) + NONE => conv EOS $ S.EOFpos stream | SOME c => if c = #"\n" then - (SOME (S.getPosAfterCharRead stream, NewLine), stream) + conv NewLine $ S.getPosAfterChar stream else if Char.isSpace c then getToken stream else if isIdStart c then @@ -991,25 +977,12 @@ struct unexpectedCharRaise stream c end - fun tokenize stream = - let - fun aux acc stream = - let - val (tk, stream) = getToken stream - in - case tk of - NONE => rev acc - | SOME tk => aux (tk :: acc) stream - end - in - aux [] stream - end - + (* TODO: remove *) fun debugPrint tkl fname = let - fun print' line _ ((_, NewLine) :: tks) = + fun print' line _ ((NewLine, _) :: tks) = print' (line + 1) true tks - | print' line firstOnLine ((_, tk) :: tks) = ( + | print' line firstOnLine ((tk, _) :: tks) = ( if firstOnLine then ( print "\n"; printLn $ fname ^ ":" ^ Int.toString line; |