diff options
author | Vladimir Azarov <avm@intermediate-node.net> | 2025-05-26 14:42:35 +0200 |
---|---|---|
committer | Vladimir Azarov <avm@intermediate-node.net> | 2025-05-26 14:42:35 +0200 |
commit | 6f3fa80b37ca5f8d992f5d6f66aee77ead303bf4 (patch) | |
tree | 1d3099280e63fac03d906b24bc6b877840348eab /tokenizer.fun | |
parent | c6b6203f8420f76a47433717eab8026d524ec5c1 (diff) |
Symbol table
Diffstat (limited to 'tokenizer.fun')
-rw-r--r-- | tokenizer.fun | 132 |
1 files changed, 68 insertions, 64 deletions
diff --git a/tokenizer.fun b/tokenizer.fun index 6859a73..93a7099 100644 --- a/tokenizer.fun +++ b/tokenizer.fun @@ -1,7 +1,8 @@ -functor Tokenizer(structure H: HASHTABLE; structure S: STREAM) +functor Tokenizer(structure ST: SYMTAB; structure S: STREAM) : TOKENIZER = struct + structure ST = ST structure S = S datatype intType = ItDec | ItOct | ItHex @@ -16,11 +17,11 @@ struct Invalid | EOS | NewLine | - MacroEnd of string | + MacroEnd of int | Num of numConst | - Id of string | + Id of int | CharConst of string * int | StringConst of string | @@ -269,6 +270,28 @@ struct (FsL, "l") ] + fun initSymtab () = + let + val symtab = ST.init () + fun ?s = String.sub (s, 0) + fun rest s = String.extract (s, 1, NONE) + + fun clPpc tk (NONE, kw) = (SOME tk, kw) + | clPpc _ _ = raise Unreachable + + fun clKw tk (ppc, NONE) = (ppc, SOME tk) + | clKw _ _ = raise Unreachable + in + app (fn (tk, repr) => + if ?repr = ppcPrefix then + ignore $ ST.insert symtab (rest repr) (clPpc tk) + else if ?repr = kwPrefix then + ignore $ ST.insert symtab (rest repr) (clKw tk) + else + ()) tokenRepr; + symtab + end + fun getSfxRepr sfx buf onError = case List.find (fn (sfx', _) => sfx' = sfx) buf of NONE => onError () @@ -277,10 +300,18 @@ struct fun getSfxReprSimple sfx buf = getSfxRepr sfx buf (fn () => raise SuffixWithoutRepr) - fun printToken (out, tk) = + fun printToken (out, symtab, tk) = + let + val ? = fn z => + let + fun f (out, id) = Printf out `(ST.getStr symtab id) % + in + bind A1 f + end z + in case tk of - Id s => Printf out `s % - | MacroEnd macro => Printf out `"mend(" `macro `")" % + Id id => Printf out ?id % + | MacroEnd mid => Printf out `"mend(" ?mid `")" % | NewLine => Printf out `"\\n" % | PpcInclude (dir, arg) => Printf out `"#include(" `dir `", " `arg `")" % @@ -309,8 +340,9 @@ struct Printf out C head `tail % end | NONE => raise TokenWithoutRepr + end - val Ptk = fn z => bind A1 printToken z + val Ptk = fn z => bind A2 printToken z fun isIdStart c = Char.isAlpha c orelse c = #"_" fun isIdBody c = Char.isAlphaNum c orelse c = #"_" @@ -408,7 +440,7 @@ struct T end - fun printTable (nextState, buf) = + fun printTable symtab (nextState, buf) = let open Array fun printRow i row = @@ -432,7 +464,7 @@ struct let val (tk, row) = sub (buf, rowNum) in - printf `"row " I rowNum `" - " Ptk tk `": \t" %; + printf `"row " I rowNum `" - " Ptk symtab tk `": \t" %; printRow 0 row; print' (rowNum + 1) buf end @@ -514,36 +546,14 @@ struct fun finishSeqRead startOff stream = S.getSubstr startOff (S.getOffset stream) stream - fun keywordHashtableGen () = - let - val table = H.createLog 7 - val () = - List.app - (fn (tk, repr) => - if String.sub (repr, 0) = kwPrefix then - H.insert table (String.extract (repr, 1, NONE)) tk - else - ()) - tokenRepr - in - table - end - - val keywordHashtable = lazy keywordHashtableGen - - fun findKeyword str = - case H.lookup (keywordHashtable ()) str of - NONE => Id str - | SOME tk => tk - - fun idParser () (stream, startOff) c = + fun idParser symtab () (stream, startOff) c = let fun finalize stream = let val s = finishSeqRead startOff stream - val tk = findKeyword s + val id = ST.getId symtab s in - ((), SOME tk, stream) + ((), SOME $ Id id, stream) end in case c of @@ -600,7 +610,7 @@ struct * gcc/clang/cparser consider 0xe-3 as a fp constant and reject it. * Right now we do not, but it may be reconsidered. *) - fun numParser NpInit (stream, _) (SOME c) = + fun numParser _ NpInit (stream, _) (SOME c) = if c = #"0" then let val (c, stream') = S.getchar stream @@ -616,8 +626,8 @@ struct end else (IntMode ImDec, NONE, stream) - | numParser NpInit _ NONE = raise Unreachable - | numParser (IntMode mode) (stream, startOff) c = + | numParser _ NpInit _ NONE = raise Unreachable + | numParser _ (IntMode mode) (stream, startOff) c = let val (pred, res, offset) = case mode of @@ -653,7 +663,7 @@ struct else finish () end - | numParser (FloatMode FmDot) (stream, startOff) _ = + | numParser _ (FloatMode FmDot) (stream, startOff) _ = let val (len, stream) = skipDigitSeq 0 $ S.ungetc stream @@ -675,7 +685,7 @@ struct else finish () end - | numParser (FloatMode FmExp) (stream, startOff) c = + | numParser _ (FloatMode FmExp) (stream, startOff) c = let val (off, stream) = if c = NONE then @@ -856,8 +866,8 @@ struct seqExnConv mode $ TkError (TkiStart, unfinishedSeq mode) | SeqTerm => Unreachable - val charParser = seqParser SpmChr - val strParser = seqParser SpmStr + fun charParser _ = seqParser SpmChr + fun strParser _ = seqParser SpmStr fun getDir stream = OS.Path.getParent o S.getFname $ stream @@ -877,29 +887,22 @@ struct SOME (_, repr) => String.sub (repr, 0) = ppcPrefix | NONE => false - fun handlePpcDir (tk, pos) stream = + fun handlePpcDir symtab (tk, pos) stream = let open String val error = fn () => error pos "expected preprocessor directive" fun getById id = - let - fun right repr = - sub (repr, 0) = ppcPrefix andalso extract (repr, 1, NONE) = id - in - case List.find (fn (_, repr) => right repr) tokenRepr of - SOME (tk, _) => (tk, stream) + case ST.isPpcDir symtab id of + SOME tk => (tk, stream) | NONE => - if id = "include" then + if ST.getStr symtab id = "include" then completePpcInclude pos stream else error () - end in case tk of Id id => getById id - | kwElse => (PpcElse, stream) - | kwIf => (PpcIf, stream) | _ => error () end @@ -953,23 +956,23 @@ struct | NONE => raiseErr () end - fun processSymbol stream = + fun processSymbol symtab stream = let val (tk, pos, stream) = fsmEat $ S.ungetc stream in case tk of - CommentStart => getToken $ skipComment stream pos + CommentStart => getToken symtab $ skipComment stream pos | DoubleDot => (Dot, pos, S.ungetc stream) | Hash => if S.isFirstOnLine stream (S.getOffset stream - 1) then let - val (tk, pos', stream) = getToken stream + val (tk, pos', stream) = getToken symtab stream in if tk = EOS then error pos "unfinished preprecessor directive" else let - val (tk, stream) = handlePpcDir (tk, pos') stream + val (tk, stream) = handlePpcDir symtab (tk, pos') stream in (tk, pos, stream) end @@ -979,12 +982,12 @@ struct | _ => (tk, pos, stream) end - and getToken stream = + and getToken symtab stream = let val (c, stream) = S.getchar stream fun conv tk (pos, stream) = (tk, pos, stream) - fun @-> parser acc = parserWrapper stream parser acc + fun @-> parser acc = parserWrapper stream (parser symtab) acc in case c of NONE => conv EOS $ S.EOFpos stream @@ -992,7 +995,7 @@ struct if c = #"\n" then conv NewLine $ S.getPosAfterChar stream else if Char.isSpace c then - getToken stream + getToken symtab stream else if isIdStart c then @-> idParser () else if isDigit c then @@ -1002,9 +1005,9 @@ struct else if c = #"\"" then @-> strParser SeqInit else if isStartForFsm c then - processSymbol stream + processSymbol symtab stream else if c = #"\\" then - getToken $ handleBackslash stream + getToken symtab $ handleBackslash stream else unexpectedCharRaise stream c end @@ -1012,23 +1015,24 @@ struct fun debugPrint fname = let val stream = S.create fname + val symtab = initSymtab () fun print line stream = let - val (tk, S.Pos (_, line', col'), stream) = getToken stream + val (tk, S.Pos (_, line', col'), stream) = getToken symtab stream in if line <> line' then printf `"\nline " I line' `": \t" % else (); - printf I col' `":" Ptk tk `" "; + printf I col' `":" Ptk symtab tk `" "; if tk = EOS then () else print line' stream end in - printTable $ fsmTable (); + printTable symtab $ fsmTable (); printf `"Tokenizing file: " `fname; print 0 stream; printf `"\n" % |