summaryrefslogtreecommitdiff
path: root/tokenizer.fun
diff options
context:
space:
mode:
authorVladimir Azarov <avm@intermediate-node.net>2025-05-12 01:51:27 +0200
committerVladimir Azarov <avm@intermediate-node.net>2025-05-12 01:51:27 +0200
commit52a6f8656e8a600a2c59fa2802fb46fafb30de45 (patch)
tree72511efdccc742709f40e52ca73b708a0c74c1c6 /tokenizer.fun
parente99a8dc48ede26696be2ba75a8cb0d5122d94598 (diff)
Object-like macros
Diffstat (limited to 'tokenizer.fun')
-rw-r--r--tokenizer.fun133
1 files changed, 53 insertions, 80 deletions
diff --git a/tokenizer.fun b/tokenizer.fun
index 4cb5d1d..5162308 100644
--- a/tokenizer.fun
+++ b/tokenizer.fun
@@ -16,6 +16,8 @@ struct
Invalid |
EOS |
NewLine |
+ MacroStart of string |
+ MacroEnd |
Num of numConst |
@@ -125,15 +127,10 @@ struct
CppError |
CppPragma
- val kwPrefix = #"`"
- val cppPrefix = #"$"
-
- type fullToken = S.pos * token
-
datatype tkErrorAuxInfo = TkiEOF | TkiDx of int | TkiStart
exception TkError of tkErrorAuxInfo * string
- exception TkErrorAug of S.ppos * string
+ exception TkErrorAug of S.pos * string
exception ExpectedCppDir (* handled in postprocess *)
@@ -143,13 +140,19 @@ struct
exception TokenWithoutRepr
exception SuffixWithoutRepr
+ val kwPrefix = #"`"
+ val cppPrefix = #"$"
+ val otherPrefix = #"@"
+
val tokenRepr =
let
fun & repr = str kwPrefix ^ repr
fun % repr = str cppPrefix ^ repr
in
[
- (NewLine, "NewLine"),
+ (NewLine, "@NewLine"),
+ (EOS, "@EOS"),
+ (MacroEnd, "@Mend"),
(kwBreak, &"break"),
(kwCase, &"case"),
@@ -280,6 +283,7 @@ struct
val token2str = fn
Id s => s
+ | MacroStart macro => "m(" ^ macro ^ ")"
| Num (IntConst (it, str, sfx)) =>
let
val intType =
@@ -348,12 +352,12 @@ struct
val () = List.app
(fn (_, repr) => update (lookupTable, firstChr repr, true))
$ List.filter
- (fn (tk, repr) =>
+ (fn (_, repr) =>
let
val c = String.sub (repr, 0)
in
c <> kwPrefix andalso c <> cppPrefix
- andalso tk <> NewLine
+ andalso c <> otherPrefix
end)
tokenRepr
@@ -437,7 +441,7 @@ struct
fun fsmEat stream =
let
open Array
- val pos = S.getPos stream
+ val (pos, stream) = S.getPos stream
fun get curState stream =
let
@@ -456,14 +460,15 @@ struct
get nextState stream
end
end
+ val (tk, stream) = get 0 stream
in
- (fn (tk, stream) => ((pos, tk), stream)) $ get 0 stream
+ (tk, pos, stream)
end
fun tkError2aug stream (dx, msg) =
let
- val (id, pos) = S.getPosAfterCharRead stream
- val pos = S.pos2ppos (id, pos + dx) stream
+ val off = S.getOffset stream - 1 + dx
+ val (pos, _) = S.getPosRaw off stream
in
TkErrorAug (pos, msg)
end
@@ -471,22 +476,20 @@ struct
fun parserWrapper stream parser acc =
let
val stream = S.ungetc stream
- val P as (_, startOff) = S.getPos stream
- fun parse' stream acc = let
+ val startOff = S.getOffset stream
+ val (pos, stream) = S.getPos stream
+
+ fun parse' stream acc =
+ let
val (c, stream) = S.getchar stream
val (acc, tk, stream) = parser acc (stream, startOff) c handle
TkError (TkiDx dx, msg) => raise tkError2aug stream (dx, msg)
- | TkError (TkiStart, msg) =>
- let
- val startPos = S.pos2ppos P stream
- in
- raise TkErrorAug (startPos, msg)
- end
+ | TkError (TkiStart, msg) => raise TkErrorAug (pos, msg)
| TkError (TkiEOF, msg) =>
let
- val pos = S.pposWithoutCol $ S.pos2ppos P stream
+ val (pos, _) = S.EOFpos stream
in
raise TkErrorAug (pos, msg)
end
@@ -498,16 +501,11 @@ struct
val (tk, stream) = parse' stream acc
in
- ((P, tk): fullToken, stream)
+ (tk, pos, stream)
end
fun finishSeqRead startOff stream =
- let
- val (_, endOff) = S.getPos stream
- val s = S.getSubstr startOff endOff stream
- in
- s
- end
+ S.getSubstr startOff (S.getOffset stream) stream
fun keywordHashtableGen () =
let
@@ -871,19 +869,14 @@ struct
| formCppDir kwIf = CppIf
| formCppDir _ = raise ExpectedCppDir
- fun handleCppDir tk prevPos stream =
- let
- val (pos, tk') = tk
- in
- (prevPos, formCppDir tk') handle
+ fun handleCppDir (pos, tk) =
+ formCppDir tk handle
ExpectedCppDir =>
- raise TkErrorAug (S.pos2ppos pos stream,
- "expected preprocessor directive")
- end
+ raise TkErrorAug (pos, "expected preprocessor directive")
fun unexpectedCharRaise stream c =
let
- val pos = S.pos2ppos (S.getPosAfterCharRead stream) stream
+ val (pos, _) = S.getPosAfterChar stream
val repr =
if isPrintable c then
str c
@@ -899,13 +892,8 @@ struct
let
val (c, stream) =
case S.getchar stream of
- (NONE, _) =>
- let
- val pos = S.pos2ppos pos stream
- in
- raise TkErrorAug (pos, "unfinished comment")
- end
- | (SOME c, stream) => (c, stream)
+ (NONE, _) => raise TkErrorAug (pos, "unfinished comment")
+ | (SOME c, stream) => (c, stream)
in
if prevIsAsterisk andalso c = #"/" then
stream
@@ -922,8 +910,7 @@ struct
val raiseErr = fn () =>
let
- val pos = S.getPosAfterCharRead stream
- val pos = S.pos2ppos pos stream
+ val (pos, _) = S.getPosAfterChar stream
in
raise TkErrorAug (pos, "expected \\n after backslash")
end
@@ -939,40 +926,39 @@ struct
fun processSymbol stream =
let
- val (T as (p, tk), stream) = fsmEat $ S.ungetc stream
+ val (tk, pos, stream) = fsmEat $ S.ungetc stream
+ val S.Pos (_, _, col) = pos
in
case tk of
- CommentStart => getToken $ skipComment stream p
- | DoubleDot => (SOME (p, Dot), S.ungetc stream)
+ CommentStart => getToken $ skipComment stream pos
+ | DoubleDot => (Dot, pos, S.ungetc stream)
| Hash =>
- if S.isFirstOnLine p stream then
+ if col = 1 then
let
- val (tk, stream) = getToken stream
+ val (tk, pos', stream) = getToken stream
in
- case tk of
- NONE =>
- raise TkErrorAug (S.pos2ppos p stream,
- "unfinished preprecessor directive")
- | SOME tk =>
- (SOME $ handleCppDir tk p stream, stream)
+ if tk = EOS then
+ raise TkErrorAug (pos, "unfinished preprecessor directive")
+ else
+ (handleCppDir (pos', tk), pos, stream)
end
else
- (SOME T, stream)
- | _ => (SOME T, stream)
+ (tk, pos, stream)
+ | _ => (tk, pos, stream)
end
and getToken stream =
let
val (c, stream) = S.getchar stream
- fun @-> parser acc =
- (fn (tk, s) => (SOME tk, s)) $ parserWrapper stream parser acc
+ fun conv tk (pos, stream) = (tk, pos, stream)
+ fun @-> parser acc = parserWrapper stream parser acc
in
case c of
- NONE => (NONE, stream)
+ NONE => conv EOS $ S.EOFpos stream
| SOME c =>
if c = #"\n" then
- (SOME (S.getPosAfterCharRead stream, NewLine), stream)
+ conv NewLine $ S.getPosAfterChar stream
else if Char.isSpace c then
getToken stream
else if isIdStart c then
@@ -991,25 +977,12 @@ struct
unexpectedCharRaise stream c
end
- fun tokenize stream =
- let
- fun aux acc stream =
- let
- val (tk, stream) = getToken stream
- in
- case tk of
- NONE => rev acc
- | SOME tk => aux (tk :: acc) stream
- end
- in
- aux [] stream
- end
-
+ (* TODO: remove *)
fun debugPrint tkl fname =
let
- fun print' line _ ((_, NewLine) :: tks) =
+ fun print' line _ ((NewLine, _) :: tks) =
print' (line + 1) true tks
- | print' line firstOnLine ((_, tk) :: tks) = (
+ | print' line firstOnLine ((tk, _) :: tks) = (
if firstOnLine then (
print "\n";
printLn $ fname ^ ":" ^ Int.toString line;