summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ccross.mlb2
-rw-r--r--ccross.sml3
-rw-r--r--common.sml4
-rw-r--r--hashtable.sig10
-rw-r--r--hashtable.sml54
-rw-r--r--parser.fun14
-rw-r--r--ppc.fun75
-rw-r--r--ppc.sig2
-rw-r--r--symtab.fun92
-rw-r--r--symtab.sig16
-rw-r--r--tokenizer.fun132
-rw-r--r--tokenizer.sig11
12 files changed, 287 insertions, 128 deletions
diff --git a/ccross.mlb b/ccross.mlb
index 26be1c1..3bd2ca6 100644
--- a/ccross.mlb
+++ b/ccross.mlb
@@ -10,6 +10,8 @@ in
stream.sig stream.sml
hashtable.sig hashtable.sml
tree.sig tree.sml
+
+ symtab.sig symtab.fun
tokenizer.sig tokenizer.fun
ppc.sig ppc.fun
parser.sig parser.fun
diff --git a/ccross.sml b/ccross.sml
index 92810b8..64be5db 100644
--- a/ccross.sml
+++ b/ccross.sml
@@ -1,6 +1,7 @@
structure ccross:> CCROSS = struct
+ structure ST:> SYMTAB = Symtab(Hashtable)
structure T:> TOKENIZER =
- Tokenizer(structure H = Hashtable; structure S = Stream)
+ Tokenizer(structure ST = ST; structure S = Stream)
structure ppc:> PPC = ppc(structure Tree = Tree; structure T = T)
structure Parser:> PARSER = Parser(ppc)
structure D:> DRIVER = Driver(Parser)
diff --git a/common.sml b/common.sml
index e947e0f..e648203 100644
--- a/common.sml
+++ b/common.sml
@@ -175,6 +175,10 @@ val R = fn z => bind A1 (fn ((output, _), n) => app (fn f => f ())
type ('t, 'a, 'b, 'c) a1printer = (bool * ((string -> unit) * 'a)) * 'b
-> 't -> ((bool * ((string -> unit) * 'a)) * 'b -> 'c) -> 'c
+type ('t1, 't2, 'a, 'b, 'c) a2printer =
+ (bool * ((string -> unit) * 'a)) * 'b -> 't1 -> 't2 ->
+ ((bool * ((string -> unit) * 'a)) * 'b -> 'c) -> 'c
+
fun die code g =
let
fun finish (true, _) = raise Unreachable
diff --git a/hashtable.sig b/hashtable.sig
index b96bfb3..2eac5b8 100644
--- a/hashtable.sig
+++ b/hashtable.sig
@@ -1,11 +1,11 @@
signature HASHTABLE = sig
type 'a t
- exception Full and Exists
+ exception Full
val createLog: int -> 'a t
- val insert: 'a t -> string -> 'a -> unit
- val lookup: 'a t -> string -> 'a option
- val lookup2: 'a t -> string ->
- ('a -> 'a option * 'b) -> (unit -> 'b) -> 'b
+ val insertIfNew: 'a t -> string -> 'a -> 'a option
+
+ val taken: 'a t -> int
+ val size: 'a t -> int
end
diff --git a/hashtable.sml b/hashtable.sml
index 822d1fd..286d241 100644
--- a/hashtable.sml
+++ b/hashtable.sml
@@ -3,10 +3,11 @@ structure Hashtable: HASHTABLE = struct
open Word
(* buf * taken * mask *)
- type 'a t = (string * 'a) option array * int * word
+ type 'a t = ((string * 'a) option array * int * word) ref
+
val ` = Word.fromInt
- val ! = Word.toInt
+ val ~> = Word.toInt
infixr 4 <<
infix 5 andb
@@ -18,10 +19,10 @@ structure Hashtable: HASHTABLE = struct
val size = `1 << `log
val mask = size - `1
in
- (array (Word.toInt size, NONE), 0, mask)
+ ref (array (Word.toInt size, NONE), 0, mask)
end
- exception Full and Exists
+ exception Full
fun hash key =
List.foldl (fn (c, s) => s * `31 + `(ord c)) (`17) (explode key)
@@ -39,37 +40,42 @@ structure Hashtable: HASHTABLE = struct
fun next idx mask = (idx + `1) andb mask
- fun lookup2 (array, _, mask) key f g =
+ fun insertIfNew H key v =
let
+ val (array, _, mask) = !H
+
+ fun inc H =
+ let
+ open Int
+ val (array, taken, mask) = !H
+ in
+ H := (array, taken + 1, mask)
+ end
+
+ val () = checkFreeSpace $ !H
fun find idx =
- case sub (array, !idx) of
- NONE => g ()
- | SOME (key', v) =>
+ case sub (array, ~> idx) of
+ NONE => (update (array, ~> idx, SOME (key, v)); inc H; NONE)
+ | SOME (key', v') =>
if key' = key then
- case f v of
- (NONE, res) => res
- | (SOME v, res) =>
- (update (array, !idx, SOME (key, v)); res)
+ (SOME v')
else
find (next idx mask)
in
find (hash key andb mask)
end
- fun lookup H key = lookup2 H key (fn v => (NONE, SOME v)) (fn () => NONE)
+ fun taken H =
+ let
+ val (_, taken, _) = !H
+ in
+ taken
+ end
- fun insert (H as (array, _, mask)) key v =
+ fun size H =
let
- val () = checkFreeSpace H;
- fun find idx =
- case sub (array, !idx) of
- NONE => update (array, !idx, SOME (key, v))
- | SOME (key', _) =>
- if key' = key then
- raise Exists
- else
- find (next idx mask)
+ val (array, _, _) = !H
in
- find (hash key andb mask)
+ length array
end
end
diff --git a/parser.fun b/parser.fun
index 20c1c76..b571a40 100644
--- a/parser.fun
+++ b/parser.fun
@@ -91,10 +91,10 @@ functor Parser(P: PPC): PARSER = struct
datatype expr =
Enum |
- Eid of string |
+ Eid of int |
Estrlit of string |
- EmemberByV of string * exprAug |
- EmemberByP of string * exprAug |
+ EmemberByV of int * exprAug |
+ EmemberByP of int * exprAug |
EfuncCall of exprAug * exprAug list |
ETernary of exprAug * exprAug * exprAug |
Eunop of unop * exprAug |
@@ -128,7 +128,7 @@ functor Parser(P: PPC): PARSER = struct
val (tk, _) = head
in
case tk of
- Tk tk => Printf out T.Ptk tk `"," A1 PtokenL tail %
+ Tk tk => Printf out P.Ptk tk `"," A1 PtokenL tail %
| TkParens list => printL list "(" ")"
| TkBrackets list => printL list "[" "]"
| TkBraces list => printL list "{" "}"
@@ -230,7 +230,7 @@ functor Parser(P: PPC): PARSER = struct
fun Pbinop (out, binop) =
case List.find (fn (binop', _, _, _) => binop' = binop) binopTable
of
- SOME (_, tk, _, _) => Printf out T.Ptk tk %
+ SOME (_, tk, _, _) => Printf out P.Ptk tk %
| NONE => raise Unreachable
in
bind A1 Pbinop
@@ -248,11 +248,11 @@ functor Parser(P: PPC): PARSER = struct
end z
fun member (member, ea) s = Printf out
- `"(" `s `member P `"\n" A2 printExpr' (off + 1) ea `")" %;
+ `"(" `s P.?member P `"\n" A2 printExpr' (off + 1) ea `")" %;
in
printf R off %;
case e of
- Eid id => Printf out `id P %
+ Eid id => Printf out P.?id P %
| Enum => Printf out `"num" P %
| Estrlit s => Printf out `"\"" `s `"\"" P %
| EmemberByV pair => member pair "."
diff --git a/ppc.fun b/ppc.fun
index c3b7412..b3428c2 100644
--- a/ppc.fun
+++ b/ppc.fun
@@ -3,25 +3,42 @@ struct
structure T = T
- type mLayers = (string * T.S.pos) list
+ val symtab = T.initSymtab ()
+
+ fun ?? id = T.ST.getStr symtab id
+ val ? = fn z =>
+ let
+ fun f (out, id) = Printf out `(??id) %
+ in
+ bind A1 f
+ end z
+
+ val Ptk = fn z =>
+ let
+ fun f (out, tk) = Printf out T.Ptk symtab tk %
+ in
+ bind A1 f
+ end z
+
+ type mLayers = (int * T.S.pos) list
datatype tkPos = TkPos of T.S.pos * mLayers
type macroBody = (T.token * tkPos) list
datatype macro =
ObjMacro of macroBody |
- FuncMacro of string list * macroBody
+ FuncMacro of int list * macroBody
datatype layer = Stream of T.S.t | Tokens of (T.token * tkPos) list
type t = {
buffer: layer list,
- macros: (string, bool * T.S.pos * macro) Tree.t,
+ macros: (int, bool * T.S.pos * macro) Tree.t,
debugMode: bool,
incDirs: string list
}
- val macroCompare = fn s1 => fn s2 => String.compare (s1, s2)
+ val macroCompare = fn m1 => fn m2 => Int.compare (m1, m2)
val insertMacro = Tree.insert macroCompare
val macrosLookup = fn z => Tree.lookup2 macroCompare z
@@ -42,7 +59,7 @@ struct
val PlayersU = fn z =>
let
fun PlayersU (out, ((macroName, pos) :: layers)) =
- Printf out F `"\tfrom " `macroName `" at " T.S.Ppos pos `"\n"
+ Printf out F `"\tfrom " ?macroName `" at " T.S.Ppos pos `"\n"
A1 PlayersU layers %
| PlayersU (_, []) = ()
in
@@ -63,7 +80,7 @@ struct
let
fun Pcl (out, cl) =
case cl of
- Ctk tk => Printf out T.Ptk tk %
+ Ctk tk => Printf out Ptk tk %
| Cid => Printf out `"identifier" %
| Cconst => Printf out `"constant" %
| Cunop => Printf out `"unary operator" %
@@ -96,7 +113,7 @@ struct
{ buffer = [Stream $ T.S.create fname], macros = Tree.empty, debugMode,
incDirs }
- fun printLayer out (macro, pos) = Printf out `macro `" " T.S.Ppos pos %
+ fun printLayer out (macro, pos) = Printf out ?macro `" " T.S.Ppos pos %
fun printLayers _ [] = ()
| printLayers out [layer] = printLayer out layer
@@ -184,7 +201,7 @@ struct
Printf out F R off A0 Ppos %
else
();
- Printf out I col' `":" T.Ptk tk `" ";
+ Printf out I col' `":" Ptk tk `" ";
(off, layers', (fname', line'))
end
@@ -195,7 +212,7 @@ struct
fun Players (out, x, y) = ignore $ PlayersCompact (out, x, y)
in
Printf out F `"expanding (" A2 Players NONE (rev mLayers)
- `") macro " `id %
+ `") macro " ?id %
end
in
bind A2 printMacroHeader
@@ -257,7 +274,7 @@ struct
)
| getTokenNoexpand (P as { buffer = Stream head :: _, ... }: t) =
let
- val (tk, pos, head) = T.getToken head
+ val (tk, pos, head) = T.getToken symtab head
in
(tk, pos2tkPos pos, updatePpc P u#buffer (updateH head) %)
end
@@ -430,7 +447,7 @@ struct
fun parseDefineMacroArgs ppc =
let
- datatype arg = Arg of string * tkPos | LastArg of string * tkPos
+ datatype arg = Arg of int * tkPos | LastArg of int * tkPos
fun parseArg ppc =
let
@@ -475,8 +492,8 @@ struct
fun printParams out =
let
fun printParams' [] = ()
- | printParams' [p] = Printf out `p %
- | printParams' (p :: ps) = (Printf out `p `", "; printParams' ps)
+ | printParams' [p] = Printf out ?p %
+ | printParams' (p :: ps) = (Printf out ?p `", "; printParams' ps)
in
Printf out `"(";
printParams' params;
@@ -495,10 +512,10 @@ struct
val macroName =
case macroName of T.Id id => id | _ => raise Unreachable
- val () = dprintf ppc PDP T.S.Ppos pos `": #define " `macroName %
+ val () = dprintf ppc PDP T.S.Ppos pos `": #define " ?macroName %
val parser =
- if isFuncMacroDefine (size macroName) pos ppc then
+ if isFuncMacroDefine (size $ ??macroName) pos ppc then
parseDefineFuncMacro
else
parseDefineObjMacro
@@ -529,7 +546,7 @@ struct
case prevVal of
SOME (_, pos', macro') =>
if not $ eqMacro (macro, macro') then (
- warning (pos2tkPos pos) `macroName `" macro redefinition" %;
+ warning (pos2tkPos pos) ?macroName `" macro redefinition" %;
printf F `"See " T.S.Ppos pos' %
) else
()
@@ -684,7 +701,7 @@ struct
let
fun print [] = ()
| print ((p, args) :: tail) = (
- Printf out `p `": ";
+ Printf out ?p `": ";
Printf out PtokenL 1 args;
print tail
)
@@ -732,7 +749,7 @@ struct
fun def dl = collect (level + dl) ppc ((tk, pos) :: acc)
fun unexpected tk prevTk =
- error pos `"unexpected " T.Ptk tk `" inside " T.Ptk prevTk %
+ error pos `"unexpected " Ptk tk `" inside " Ptk prevTk %
fun handleIfdef ifTk =
if tk = T.PpcElif then
@@ -837,7 +854,7 @@ struct
(not defined, "ifndef")
in
dprintf ppc PDP T.S.Ppos ifPos `": #" `form `" "
- `id `" -> " B cond `"\n"%;
+ ?id `" -> " B cond `"\n"%;
(cond, ppc)
end
@@ -884,7 +901,7 @@ struct
val (_, _, ppc) = getClassNoexpand ppc [Ctk T.NewLine]
val (prevVal, macros) = Tree.delete macroCompare (#macros ppc) id
in
- dprintf ppc PDP `"#undef " `id `"\n" %;
+ dprintf ppc PDP `"#undef " ?id `"\n" %;
case prevVal of
NONE => warning pos
`"#undef: no macro with provided name was defined" %
@@ -892,7 +909,7 @@ struct
updatePpc ppc s#macros macros %
end
- and handleStray (tk, pos) _ = errorSpos pos `"stray " T.Ptk tk %
+ and handleStray (tk, pos) _ = errorSpos pos `"stray " Ptk tk %
and handlePragma (_, pos) ppc = (
dprintf ppc PDP `"#pragma -> ignored\n";
@@ -962,7 +979,7 @@ struct
| getToken (P as { buffer = [], ... }: t) = (T.EOS, dummyEOSpos, P)
| getToken (P as { buffer = Stream head :: tail, ... }: t) =
let
- val (tk, pos, head) = T.getToken head
+ val (tk, pos, head) = T.getToken symtab head
in
case (tk, tail) of
(T.EOS, []) =>
@@ -1004,5 +1021,17 @@ struct
printf A2 debugPrint' startCache ppc F %
end
- val getToken = getTokenSkipNL
+ fun getToken ppc =
+ let
+ val (tk, pos, ppc) = getTokenSkipNL ppc
+ fun def () = (tk, pos, ppc)
+ in
+ case tk of
+ T.Id id => (
+ case T.ST.isKw symtab id of
+ SOME kw => (kw, pos, ppc)
+ | NONE => def ()
+ )
+ | _ => def ()
+ end
end
diff --git a/ppc.sig b/ppc.sig
index 0bfaca7..f700634 100644
--- a/ppc.sig
+++ b/ppc.sig
@@ -21,5 +21,7 @@ signature PPC = sig
val getToken: t -> T.token * tkPos * t
val debugPrint: string -> string list -> unit
+ val ? : (int, 'a, 'b, 'c) a1printer
+ val Ptk: (T.token, 'a, 'b, 'c) a1printer
val PtkPos: (tkPos, 'a, 'b, 'c) a1printer
end
diff --git a/symtab.fun b/symtab.fun
new file mode 100644
index 0000000..7518640
--- /dev/null
+++ b/symtab.fun
@@ -0,0 +1,92 @@
+functor Symtab(H: HASHTABLE): SYMTAB = struct
+
+ type 'token t =
+ ((string * 'token option * 'token option) option Array.array
+ * int H.t) ref
+
+ type 'token auxInfo = 'token option * 'token option
+
+ exception UnknownId
+
+ fun init () =
+ let
+ val log = 20
+ val h = H.createLog log
+ in
+ ref $ (Array.array (H.size h, NONE), h)
+ end
+
+ fun insert symtab idStr cl =
+ let
+ val (array, table) = !symtab
+
+ val id = H.taken table
+ val prev = H.insertIfNew table idStr id
+
+ fun update id cl arg =
+ let
+ val (ppc, kw) = cl arg
+ in
+ Array.update (array, id, SOME (idStr, ppc, kw))
+ end
+ in
+ case prev of
+ NONE => (update id cl (NONE, NONE); id)
+ | SOME id =>
+ case Array.sub (array, id) of
+ NONE => raise Unreachable
+ | SOME (_, ppc, kw) => (update id cl (ppc, kw); id)
+ end
+
+ fun getId symtab idStr =
+ let
+ val (array, table) = !symtab
+ val id = H.taken table
+ val prev = H.insertIfNew table idStr id
+ in
+ case prev of
+ NONE => (Array.update (array, id, SOME (idStr, NONE, NONE)); id)
+ | SOME id => id
+ end
+
+ fun getAuxInfo symtab id =
+ let
+ val (array, _) = !symtab
+ in
+ case Array.sub (array, id) of
+ NONE => raise UnknownId
+ | SOME (_, ppc, kw) => (ppc, kw)
+ end
+
+ fun getStr symtab id =
+ let
+ val (array, _) = !symtab
+ in
+ case Array.sub (array, id) of
+ NONE => raise UnknownId
+ | SOME (str, _, _) => str
+ end
+
+ fun isPpcDir symtab id =
+ case getAuxInfo symtab id of
+ (SOME ppc, _) => SOME ppc
+ | _ => NONE
+
+ fun isKw symtab id =
+ case getAuxInfo symtab id of
+ (_, SOME kw) => SOME kw
+ | _ => NONE
+
+ fun print symtab =
+ let
+ val (array, _) = !symtab
+ fun print idx =
+ case Array.sub (array, idx) of
+ NONE => printf `"\n" %
+ | SOME (str, _, _) =>
+ (printf I idx `": " `str `"\n" %; print (idx + 1))
+ in
+ printf `"Symbol table:\n" %;
+ print 0
+ end
+end
diff --git a/symtab.sig b/symtab.sig
new file mode 100644
index 0000000..61ed1cb
--- /dev/null
+++ b/symtab.sig
@@ -0,0 +1,16 @@
+signature SYMTAB = sig
+
+ type 'token t
+
+ val init: unit -> 'token t
+
+ type 'token auxInfo = 'token option * 'token option
+ val insert: 'token t -> string -> ('token auxInfo -> 'token auxInfo)
+ -> int
+ val getId: 'token t -> string -> int
+ val getStr: 'token t -> int -> string
+ val isPpcDir: 'token t -> int -> 'token option
+ val isKw: 'token t -> int -> 'token option
+
+ val print: 'token t -> unit
+end
diff --git a/tokenizer.fun b/tokenizer.fun
index 6859a73..93a7099 100644
--- a/tokenizer.fun
+++ b/tokenizer.fun
@@ -1,7 +1,8 @@
-functor Tokenizer(structure H: HASHTABLE; structure S: STREAM)
+functor Tokenizer(structure ST: SYMTAB; structure S: STREAM)
: TOKENIZER =
struct
+ structure ST = ST
structure S = S
datatype intType = ItDec | ItOct | ItHex
@@ -16,11 +17,11 @@ struct
Invalid |
EOS |
NewLine |
- MacroEnd of string |
+ MacroEnd of int |
Num of numConst |
- Id of string |
+ Id of int |
CharConst of string * int |
StringConst of string |
@@ -269,6 +270,28 @@ struct
(FsL, "l")
]
+ fun initSymtab () =
+ let
+ val symtab = ST.init ()
+ fun ?s = String.sub (s, 0)
+ fun rest s = String.extract (s, 1, NONE)
+
+ fun clPpc tk (NONE, kw) = (SOME tk, kw)
+ | clPpc _ _ = raise Unreachable
+
+ fun clKw tk (ppc, NONE) = (ppc, SOME tk)
+ | clKw _ _ = raise Unreachable
+ in
+ app (fn (tk, repr) =>
+ if ?repr = ppcPrefix then
+ ignore $ ST.insert symtab (rest repr) (clPpc tk)
+ else if ?repr = kwPrefix then
+ ignore $ ST.insert symtab (rest repr) (clKw tk)
+ else
+ ()) tokenRepr;
+ symtab
+ end
+
fun getSfxRepr sfx buf onError =
case List.find (fn (sfx', _) => sfx' = sfx) buf of
NONE => onError ()
@@ -277,10 +300,18 @@ struct
fun getSfxReprSimple sfx buf =
getSfxRepr sfx buf (fn () => raise SuffixWithoutRepr)
- fun printToken (out, tk) =
+ fun printToken (out, symtab, tk) =
+ let
+ val ? = fn z =>
+ let
+ fun f (out, id) = Printf out `(ST.getStr symtab id) %
+ in
+ bind A1 f
+ end z
+ in
case tk of
- Id s => Printf out `s %
- | MacroEnd macro => Printf out `"mend(" `macro `")" %
+ Id id => Printf out ?id %
+ | MacroEnd mid => Printf out `"mend(" ?mid `")" %
| NewLine => Printf out `"\\n" %
| PpcInclude (dir, arg) =>
Printf out `"#include(" `dir `", " `arg `")" %
@@ -309,8 +340,9 @@ struct
Printf out C head `tail %
end
| NONE => raise TokenWithoutRepr
+ end
- val Ptk = fn z => bind A1 printToken z
+ val Ptk = fn z => bind A2 printToken z
fun isIdStart c = Char.isAlpha c orelse c = #"_"
fun isIdBody c = Char.isAlphaNum c orelse c = #"_"
@@ -408,7 +440,7 @@ struct
T
end
- fun printTable (nextState, buf) =
+ fun printTable symtab (nextState, buf) =
let
open Array
fun printRow i row =
@@ -432,7 +464,7 @@ struct
let
val (tk, row) = sub (buf, rowNum)
in
- printf `"row " I rowNum `" - " Ptk tk `": \t" %;
+ printf `"row " I rowNum `" - " Ptk symtab tk `": \t" %;
printRow 0 row;
print' (rowNum + 1) buf
end
@@ -514,36 +546,14 @@ struct
fun finishSeqRead startOff stream =
S.getSubstr startOff (S.getOffset stream) stream
- fun keywordHashtableGen () =
- let
- val table = H.createLog 7
- val () =
- List.app
- (fn (tk, repr) =>
- if String.sub (repr, 0) = kwPrefix then
- H.insert table (String.extract (repr, 1, NONE)) tk
- else
- ())
- tokenRepr
- in
- table
- end
-
- val keywordHashtable = lazy keywordHashtableGen
-
- fun findKeyword str =
- case H.lookup (keywordHashtable ()) str of
- NONE => Id str
- | SOME tk => tk
-
- fun idParser () (stream, startOff) c =
+ fun idParser symtab () (stream, startOff) c =
let
fun finalize stream =
let
val s = finishSeqRead startOff stream
- val tk = findKeyword s
+ val id = ST.getId symtab s
in
- ((), SOME tk, stream)
+ ((), SOME $ Id id, stream)
end
in
case c of
@@ -600,7 +610,7 @@ struct
* gcc/clang/cparser consider 0xe-3 as a fp constant and reject it.
* Right now we do not, but it may be reconsidered.
*)
- fun numParser NpInit (stream, _) (SOME c) =
+ fun numParser _ NpInit (stream, _) (SOME c) =
if c = #"0" then
let
val (c, stream') = S.getchar stream
@@ -616,8 +626,8 @@ struct
end
else
(IntMode ImDec, NONE, stream)
- | numParser NpInit _ NONE = raise Unreachable
- | numParser (IntMode mode) (stream, startOff) c =
+ | numParser _ NpInit _ NONE = raise Unreachable
+ | numParser _ (IntMode mode) (stream, startOff) c =
let
val (pred, res, offset) =
case mode of
@@ -653,7 +663,7 @@ struct
else
finish ()
end
- | numParser (FloatMode FmDot) (stream, startOff) _ =
+ | numParser _ (FloatMode FmDot) (stream, startOff) _ =
let
val (len, stream) = skipDigitSeq 0 $ S.ungetc stream
@@ -675,7 +685,7 @@ struct
else
finish ()
end
- | numParser (FloatMode FmExp) (stream, startOff) c =
+ | numParser _ (FloatMode FmExp) (stream, startOff) c =
let
val (off, stream) =
if c = NONE then
@@ -856,8 +866,8 @@ struct
seqExnConv mode $ TkError (TkiStart, unfinishedSeq mode)
| SeqTerm => Unreachable
- val charParser = seqParser SpmChr
- val strParser = seqParser SpmStr
+ fun charParser _ = seqParser SpmChr
+ fun strParser _ = seqParser SpmStr
fun getDir stream = OS.Path.getParent o S.getFname $ stream
@@ -877,29 +887,22 @@ struct
SOME (_, repr) => String.sub (repr, 0) = ppcPrefix
| NONE => false
- fun handlePpcDir (tk, pos) stream =
+ fun handlePpcDir symtab (tk, pos) stream =
let
open String
val error = fn () => error pos "expected preprocessor directive"
fun getById id =
- let
- fun right repr =
- sub (repr, 0) = ppcPrefix andalso extract (repr, 1, NONE) = id
- in
- case List.find (fn (_, repr) => right repr) tokenRepr of
- SOME (tk, _) => (tk, stream)
+ case ST.isPpcDir symtab id of
+ SOME tk => (tk, stream)
| NONE =>
- if id = "include" then
+ if ST.getStr symtab id = "include" then
completePpcInclude pos stream
else
error ()
- end
in
case tk of
Id id => getById id
- | kwElse => (PpcElse, stream)
- | kwIf => (PpcIf, stream)
| _ => error ()
end
@@ -953,23 +956,23 @@ struct
| NONE => raiseErr ()
end
- fun processSymbol stream =
+ fun processSymbol symtab stream =
let
val (tk, pos, stream) = fsmEat $ S.ungetc stream
in
case tk of
- CommentStart => getToken $ skipComment stream pos
+ CommentStart => getToken symtab $ skipComment stream pos
| DoubleDot => (Dot, pos, S.ungetc stream)
| Hash =>
if S.isFirstOnLine stream (S.getOffset stream - 1) then
let
- val (tk, pos', stream) = getToken stream
+ val (tk, pos', stream) = getToken symtab stream
in
if tk = EOS then
error pos "unfinished preprecessor directive"
else
let
- val (tk, stream) = handlePpcDir (tk, pos') stream
+ val (tk, stream) = handlePpcDir symtab (tk, pos') stream
in
(tk, pos, stream)
end
@@ -979,12 +982,12 @@ struct
| _ => (tk, pos, stream)
end
- and getToken stream =
+ and getToken symtab stream =
let
val (c, stream) = S.getchar stream
fun conv tk (pos, stream) = (tk, pos, stream)
- fun @-> parser acc = parserWrapper stream parser acc
+ fun @-> parser acc = parserWrapper stream (parser symtab) acc
in
case c of
NONE => conv EOS $ S.EOFpos stream
@@ -992,7 +995,7 @@ struct
if c = #"\n" then
conv NewLine $ S.getPosAfterChar stream
else if Char.isSpace c then
- getToken stream
+ getToken symtab stream
else if isIdStart c then
@-> idParser ()
else if isDigit c then
@@ -1002,9 +1005,9 @@ struct
else if c = #"\"" then
@-> strParser SeqInit
else if isStartForFsm c then
- processSymbol stream
+ processSymbol symtab stream
else if c = #"\\" then
- getToken $ handleBackslash stream
+ getToken symtab $ handleBackslash stream
else
unexpectedCharRaise stream c
end
@@ -1012,23 +1015,24 @@ struct
fun debugPrint fname =
let
val stream = S.create fname
+ val symtab = initSymtab ()
fun print line stream =
let
- val (tk, S.Pos (_, line', col'), stream) = getToken stream
+ val (tk, S.Pos (_, line', col'), stream) = getToken symtab stream
in
if line <> line' then
printf `"\nline " I line' `": \t" %
else
();
- printf I col' `":" Ptk tk `" ";
+ printf I col' `":" Ptk symtab tk `" ";
if tk = EOS then
()
else
print line' stream
end
in
- printTable $ fsmTable ();
+ printTable symtab $ fsmTable ();
printf `"Tokenizing file: " `fname;
print 0 stream;
printf `"\n" %
diff --git a/tokenizer.sig b/tokenizer.sig
index 3a711a4..67666a4 100644
--- a/tokenizer.sig
+++ b/tokenizer.sig
@@ -1,4 +1,5 @@
signature TOKENIZER = sig
+ structure ST: SYMTAB
structure S: STREAM
datatype intType = ItDec | ItOct | ItHex
@@ -13,11 +14,11 @@ signature TOKENIZER = sig
Invalid |
EOS |
NewLine |
- MacroEnd of string |
+ MacroEnd of int |
Num of numConst |
- Id of string |
+ Id of int |
CharConst of string * int |
StringConst of string |
@@ -123,8 +124,10 @@ signature TOKENIZER = sig
PpcError |
PpcPragma
- val getToken: S.t -> token * S.pos * S.t
- val Ptk: (token, 'a, 'b, 'c) a1printer
+ val initSymtab: unit -> token ST.t
+
+ val getToken: token ST.t -> S.t -> token * S.pos * S.t
+ val Ptk: (token ST.t, token, 'a, 'b, 'c) a2printer
val isPpcDir: token -> bool
val debugPrint: string -> unit