diff options
author | Vladimir Azarov <avm@intermediate-node.net> | 2025-03-27 23:31:10 +0100 |
---|---|---|
committer | Vladimir Azarov <avm@intermediate-node.net> | 2025-03-27 23:31:10 +0100 |
commit | d7d4830443f1e385af862462f976553c8a9033e1 (patch) | |
tree | 0fbdef6579a26e962b249c55e6ae407cd5b8dfda | |
parent | 11e14dd4b93154964c87fc97cfcee62c52edf97a (diff) |
Hashtable-based findKeyword function
-rw-r--r-- | cpp.mlb | 5 | ||||
-rw-r--r-- | hashtable.sig | 10 | ||||
-rw-r--r-- | hashtable.sml | 64 | ||||
-rw-r--r-- | tokenizer.sml | 32 |
4 files changed, 100 insertions, 11 deletions
@@ -9,9 +9,12 @@ in stream.sig stream.sml + hashtable.sig + hashtable.sml + tokenizer.sig tokenizer.sml exn_handler.sml - cpp.sml + cpp.sml end diff --git a/hashtable.sig b/hashtable.sig new file mode 100644 index 0000000..b714fd6 --- /dev/null +++ b/hashtable.sig @@ -0,0 +1,10 @@ +signature HASHTABLE = sig + type 'a t + + exception HashtableFull + exception Exists + + val create: int -> 'a t + val insert: 'a t -> string -> 'a -> unit + val lookup: 'a t -> string -> 'a option +end diff --git a/hashtable.sml b/hashtable.sml new file mode 100644 index 0000000..24e5bba --- /dev/null +++ b/hashtable.sml @@ -0,0 +1,64 @@ +structure Hashtable :> HASHTABLE = struct + (* (Array, (cells occupied, total size)) *) + type 'a t = ((string * 'a) option) array * (int * int) + + exception HashtableFull + exception Exists + + fun create size = (Array.array (size, NONE), (0, size)) + + val fillLimit = 0.75 + + val ` = Word32.fromInt + + fun hashFunc key = + let + open Word32 + in + List.foldl (fn (x, acc) => acc * `31 + `(ord x)) (`0) $ explode key + end + + fun insert (buf, (taken, total)) key value = + if Real.fromInt (taken + 1) / Real.fromInt total > fillLimit then + raise HashtableFull + else + let + open Array + val hash = hashFunc key + + (* TODO: use bitwise manipulation instead of mod *) + val i = ref $ Word32.toInt $ Word32.mod (hash, `(length buf)) + val flag = ref true + in + while !flag do ( + case sub (buf, !i) of + NONE => (update (buf, !i, SOME (key, value)); flag := false) + | SOME (key', _) => + if key' = key then + raise Exists + else + i := (!i + 1) mod (length buf) + ) + end + + fun lookup (buf, _) key = + let + open Array + val hash = hashFunc key + + val i = ref $ Word32.toInt $ Word32.mod (hash, `(length buf)) + val flag = ref true + val result = ref NONE + in + while !flag do ( + case sub (buf, !i) of + SOME (key', value) => + if key' = key then + (result := SOME value; flag := false) + else + i := (!i + 1) mod (length buf) + | NONE => flag := false + ); + !result + end +end diff --git a/tokenizer.sml b/tokenizer.sml index 41f8d38..104ae61 100644 --- a/tokenizer.sml +++ b/tokenizer.sml @@ -8,7 +8,6 @@ structure Tokenizer:> TOKENIZER = struct IntConst of intType * string * intSfx | FloatConst of string * floatSfx - datatype token = Invalid | NewLine | @@ -500,15 +499,28 @@ structure Tokenizer:> TOKENIZER = struct s end - fun findKeyword s = - case List.find - (fn (_, repr) => - String.sub (repr, 0) = kwPrefix andalso - String.extract (repr, 1, NONE) = s) - tokenRepr - of - SOME (tk, _) => tk - | NONE => Id s + fun keywordHashtableGen () = + let + open Hashtable + val table = create 128 + val () = + List.app + (fn (tk, repr) => + if String.sub (repr, 0) = kwPrefix then + insert table (String.extract (repr, 1, NONE)) tk + else + ()) + tokenRepr + in + table + end + + val keywordHashtable = lazy keywordHashtableGen + + fun findKeyword str = + case Hashtable.lookup (keywordHashtable ()) str of + NONE => Id str + | SOME tk => tk fun idParser () (stream, startOff) c = let |