summaryrefslogtreecommitdiff
path: root/tokenizer.fun
diff options
context:
space:
mode:
authorVladimir Azarov <avm@intermediate-node.net>2025-05-26 14:42:35 +0200
committerVladimir Azarov <avm@intermediate-node.net>2025-05-26 14:42:35 +0200
commit6f3fa80b37ca5f8d992f5d6f66aee77ead303bf4 (patch)
tree1d3099280e63fac03d906b24bc6b877840348eab /tokenizer.fun
parentc6b6203f8420f76a47433717eab8026d524ec5c1 (diff)
Symbol table
Diffstat (limited to 'tokenizer.fun')
-rw-r--r--tokenizer.fun132
1 files changed, 68 insertions, 64 deletions
diff --git a/tokenizer.fun b/tokenizer.fun
index 6859a73..93a7099 100644
--- a/tokenizer.fun
+++ b/tokenizer.fun
@@ -1,7 +1,8 @@
-functor Tokenizer(structure H: HASHTABLE; structure S: STREAM)
+functor Tokenizer(structure ST: SYMTAB; structure S: STREAM)
: TOKENIZER =
struct
+ structure ST = ST
structure S = S
datatype intType = ItDec | ItOct | ItHex
@@ -16,11 +17,11 @@ struct
Invalid |
EOS |
NewLine |
- MacroEnd of string |
+ MacroEnd of int |
Num of numConst |
- Id of string |
+ Id of int |
CharConst of string * int |
StringConst of string |
@@ -269,6 +270,28 @@ struct
(FsL, "l")
]
+ fun initSymtab () =
+ let
+ val symtab = ST.init ()
+ fun ?s = String.sub (s, 0)
+ fun rest s = String.extract (s, 1, NONE)
+
+ fun clPpc tk (NONE, kw) = (SOME tk, kw)
+ | clPpc _ _ = raise Unreachable
+
+ fun clKw tk (ppc, NONE) = (ppc, SOME tk)
+ | clKw _ _ = raise Unreachable
+ in
+ app (fn (tk, repr) =>
+ if ?repr = ppcPrefix then
+ ignore $ ST.insert symtab (rest repr) (clPpc tk)
+ else if ?repr = kwPrefix then
+ ignore $ ST.insert symtab (rest repr) (clKw tk)
+ else
+ ()) tokenRepr;
+ symtab
+ end
+
fun getSfxRepr sfx buf onError =
case List.find (fn (sfx', _) => sfx' = sfx) buf of
NONE => onError ()
@@ -277,10 +300,18 @@ struct
fun getSfxReprSimple sfx buf =
getSfxRepr sfx buf (fn () => raise SuffixWithoutRepr)
- fun printToken (out, tk) =
+ fun printToken (out, symtab, tk) =
+ let
+ val ? = fn z =>
+ let
+ fun f (out, id) = Printf out `(ST.getStr symtab id) %
+ in
+ bind A1 f
+ end z
+ in
case tk of
- Id s => Printf out `s %
- | MacroEnd macro => Printf out `"mend(" `macro `")" %
+ Id id => Printf out ?id %
+ | MacroEnd mid => Printf out `"mend(" ?mid `")" %
| NewLine => Printf out `"\\n" %
| PpcInclude (dir, arg) =>
Printf out `"#include(" `dir `", " `arg `")" %
@@ -309,8 +340,9 @@ struct
Printf out C head `tail %
end
| NONE => raise TokenWithoutRepr
+ end
- val Ptk = fn z => bind A1 printToken z
+ val Ptk = fn z => bind A2 printToken z
fun isIdStart c = Char.isAlpha c orelse c = #"_"
fun isIdBody c = Char.isAlphaNum c orelse c = #"_"
@@ -408,7 +440,7 @@ struct
T
end
- fun printTable (nextState, buf) =
+ fun printTable symtab (nextState, buf) =
let
open Array
fun printRow i row =
@@ -432,7 +464,7 @@ struct
let
val (tk, row) = sub (buf, rowNum)
in
- printf `"row " I rowNum `" - " Ptk tk `": \t" %;
+ printf `"row " I rowNum `" - " Ptk symtab tk `": \t" %;
printRow 0 row;
print' (rowNum + 1) buf
end
@@ -514,36 +546,14 @@ struct
fun finishSeqRead startOff stream =
S.getSubstr startOff (S.getOffset stream) stream
- fun keywordHashtableGen () =
- let
- val table = H.createLog 7
- val () =
- List.app
- (fn (tk, repr) =>
- if String.sub (repr, 0) = kwPrefix then
- H.insert table (String.extract (repr, 1, NONE)) tk
- else
- ())
- tokenRepr
- in
- table
- end
-
- val keywordHashtable = lazy keywordHashtableGen
-
- fun findKeyword str =
- case H.lookup (keywordHashtable ()) str of
- NONE => Id str
- | SOME tk => tk
-
- fun idParser () (stream, startOff) c =
+ fun idParser symtab () (stream, startOff) c =
let
fun finalize stream =
let
val s = finishSeqRead startOff stream
- val tk = findKeyword s
+ val id = ST.getId symtab s
in
- ((), SOME tk, stream)
+ ((), SOME $ Id id, stream)
end
in
case c of
@@ -600,7 +610,7 @@ struct
* gcc/clang/cparser consider 0xe-3 as a fp constant and reject it.
* Right now we do not, but it may be reconsidered.
*)
- fun numParser NpInit (stream, _) (SOME c) =
+ fun numParser _ NpInit (stream, _) (SOME c) =
if c = #"0" then
let
val (c, stream') = S.getchar stream
@@ -616,8 +626,8 @@ struct
end
else
(IntMode ImDec, NONE, stream)
- | numParser NpInit _ NONE = raise Unreachable
- | numParser (IntMode mode) (stream, startOff) c =
+ | numParser _ NpInit _ NONE = raise Unreachable
+ | numParser _ (IntMode mode) (stream, startOff) c =
let
val (pred, res, offset) =
case mode of
@@ -653,7 +663,7 @@ struct
else
finish ()
end
- | numParser (FloatMode FmDot) (stream, startOff) _ =
+ | numParser _ (FloatMode FmDot) (stream, startOff) _ =
let
val (len, stream) = skipDigitSeq 0 $ S.ungetc stream
@@ -675,7 +685,7 @@ struct
else
finish ()
end
- | numParser (FloatMode FmExp) (stream, startOff) c =
+ | numParser _ (FloatMode FmExp) (stream, startOff) c =
let
val (off, stream) =
if c = NONE then
@@ -856,8 +866,8 @@ struct
seqExnConv mode $ TkError (TkiStart, unfinishedSeq mode)
| SeqTerm => Unreachable
- val charParser = seqParser SpmChr
- val strParser = seqParser SpmStr
+ fun charParser _ = seqParser SpmChr
+ fun strParser _ = seqParser SpmStr
fun getDir stream = OS.Path.getParent o S.getFname $ stream
@@ -877,29 +887,22 @@ struct
SOME (_, repr) => String.sub (repr, 0) = ppcPrefix
| NONE => false
- fun handlePpcDir (tk, pos) stream =
+ fun handlePpcDir symtab (tk, pos) stream =
let
open String
val error = fn () => error pos "expected preprocessor directive"
fun getById id =
- let
- fun right repr =
- sub (repr, 0) = ppcPrefix andalso extract (repr, 1, NONE) = id
- in
- case List.find (fn (_, repr) => right repr) tokenRepr of
- SOME (tk, _) => (tk, stream)
+ case ST.isPpcDir symtab id of
+ SOME tk => (tk, stream)
| NONE =>
- if id = "include" then
+ if ST.getStr symtab id = "include" then
completePpcInclude pos stream
else
error ()
- end
in
case tk of
Id id => getById id
- | kwElse => (PpcElse, stream)
- | kwIf => (PpcIf, stream)
| _ => error ()
end
@@ -953,23 +956,23 @@ struct
| NONE => raiseErr ()
end
- fun processSymbol stream =
+ fun processSymbol symtab stream =
let
val (tk, pos, stream) = fsmEat $ S.ungetc stream
in
case tk of
- CommentStart => getToken $ skipComment stream pos
+ CommentStart => getToken symtab $ skipComment stream pos
| DoubleDot => (Dot, pos, S.ungetc stream)
| Hash =>
if S.isFirstOnLine stream (S.getOffset stream - 1) then
let
- val (tk, pos', stream) = getToken stream
+ val (tk, pos', stream) = getToken symtab stream
in
if tk = EOS then
error pos "unfinished preprecessor directive"
else
let
- val (tk, stream) = handlePpcDir (tk, pos') stream
+ val (tk, stream) = handlePpcDir symtab (tk, pos') stream
in
(tk, pos, stream)
end
@@ -979,12 +982,12 @@ struct
| _ => (tk, pos, stream)
end
- and getToken stream =
+ and getToken symtab stream =
let
val (c, stream) = S.getchar stream
fun conv tk (pos, stream) = (tk, pos, stream)
- fun @-> parser acc = parserWrapper stream parser acc
+ fun @-> parser acc = parserWrapper stream (parser symtab) acc
in
case c of
NONE => conv EOS $ S.EOFpos stream
@@ -992,7 +995,7 @@ struct
if c = #"\n" then
conv NewLine $ S.getPosAfterChar stream
else if Char.isSpace c then
- getToken stream
+ getToken symtab stream
else if isIdStart c then
@-> idParser ()
else if isDigit c then
@@ -1002,9 +1005,9 @@ struct
else if c = #"\"" then
@-> strParser SeqInit
else if isStartForFsm c then
- processSymbol stream
+ processSymbol symtab stream
else if c = #"\\" then
- getToken $ handleBackslash stream
+ getToken symtab $ handleBackslash stream
else
unexpectedCharRaise stream c
end
@@ -1012,23 +1015,24 @@ struct
fun debugPrint fname =
let
val stream = S.create fname
+ val symtab = initSymtab ()
fun print line stream =
let
- val (tk, S.Pos (_, line', col'), stream) = getToken stream
+ val (tk, S.Pos (_, line', col'), stream) = getToken symtab stream
in
if line <> line' then
printf `"\nline " I line' `": \t" %
else
();
- printf I col' `":" Ptk tk `" ";
+ printf I col' `":" Ptk symtab tk `" ";
if tk = EOS then
()
else
print line' stream
end
in
- printTable $ fsmTable ();
+ printTable symtab $ fsmTable ();
printf `"Tokenizing file: " `fname;
print 0 stream;
printf `"\n" %