From d7d4830443f1e385af862462f976553c8a9033e1 Mon Sep 17 00:00:00 2001 From: Vladimir Azarov Date: Thu, 27 Mar 2025 23:31:10 +0100 Subject: Hashtable-based findKeyword function --- cpp.mlb | 5 ++++- hashtable.sig | 10 ++++++++++ hashtable.sml | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ tokenizer.sml | 32 ++++++++++++++++++++---------- 4 files changed, 100 insertions(+), 11 deletions(-) create mode 100644 hashtable.sig create mode 100644 hashtable.sml diff --git a/cpp.mlb b/cpp.mlb index dd7fb36..0cc9341 100644 --- a/cpp.mlb +++ b/cpp.mlb @@ -9,9 +9,12 @@ in stream.sig stream.sml + hashtable.sig + hashtable.sml + tokenizer.sig tokenizer.sml exn_handler.sml - cpp.sml + cpp.sml end diff --git a/hashtable.sig b/hashtable.sig new file mode 100644 index 0000000..b714fd6 --- /dev/null +++ b/hashtable.sig @@ -0,0 +1,10 @@ +signature HASHTABLE = sig + type 'a t + + exception HashtableFull + exception Exists + + val create: int -> 'a t + val insert: 'a t -> string -> 'a -> unit + val lookup: 'a t -> string -> 'a option +end diff --git a/hashtable.sml b/hashtable.sml new file mode 100644 index 0000000..24e5bba --- /dev/null +++ b/hashtable.sml @@ -0,0 +1,64 @@ +structure Hashtable :> HASHTABLE = struct + (* (Array, (cells occupied, total size)) *) + type 'a t = ((string * 'a) option) array * (int * int) + + exception HashtableFull + exception Exists + + fun create size = (Array.array (size, NONE), (0, size)) + + val fillLimit = 0.75 + + val ` = Word32.fromInt + + fun hashFunc key = + let + open Word32 + in + List.foldl (fn (x, acc) => acc * `31 + `(ord x)) (`0) $ explode key + end + + fun insert (buf, (taken, total)) key value = + if Real.fromInt (taken + 1) / Real.fromInt total > fillLimit then + raise HashtableFull + else + let + open Array + val hash = hashFunc key + + (* TODO: use bitwise manipulation instead of mod *) + val i = ref $ Word32.toInt $ Word32.mod (hash, `(length buf)) + val flag = ref true + in + while !flag do ( + case sub (buf, !i) of + NONE => (update (buf, !i, SOME (key, value)); flag := false) + | SOME (key', _) => + if key' = key then + raise Exists + else + i := (!i + 1) mod (length buf) + ) + end + + fun lookup (buf, _) key = + let + open Array + val hash = hashFunc key + + val i = ref $ Word32.toInt $ Word32.mod (hash, `(length buf)) + val flag = ref true + val result = ref NONE + in + while !flag do ( + case sub (buf, !i) of + SOME (key', value) => + if key' = key then + (result := SOME value; flag := false) + else + i := (!i + 1) mod (length buf) + | NONE => flag := false + ); + !result + end +end diff --git a/tokenizer.sml b/tokenizer.sml index 41f8d38..104ae61 100644 --- a/tokenizer.sml +++ b/tokenizer.sml @@ -8,7 +8,6 @@ structure Tokenizer:> TOKENIZER = struct IntConst of intType * string * intSfx | FloatConst of string * floatSfx - datatype token = Invalid | NewLine | @@ -500,15 +499,28 @@ structure Tokenizer:> TOKENIZER = struct s end - fun findKeyword s = - case List.find - (fn (_, repr) => - String.sub (repr, 0) = kwPrefix andalso - String.extract (repr, 1, NONE) = s) - tokenRepr - of - SOME (tk, _) => tk - | NONE => Id s + fun keywordHashtableGen () = + let + open Hashtable + val table = create 128 + val () = + List.app + (fn (tk, repr) => + if String.sub (repr, 0) = kwPrefix then + insert table (String.extract (repr, 1, NONE)) tk + else + ()) + tokenRepr + in + table + end + + val keywordHashtable = lazy keywordHashtableGen + + fun findKeyword str = + case Hashtable.lookup (keywordHashtable ()) str of + NONE => Id str + | SOME tk => tk fun idParser () (stream, startOff) c = let -- cgit v1.2.3