summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVladimir Azarov <avm@intermediate-node.net>2025-03-27 23:31:10 +0100
committerVladimir Azarov <avm@intermediate-node.net>2025-03-27 23:31:10 +0100
commitd7d4830443f1e385af862462f976553c8a9033e1 (patch)
tree0fbdef6579a26e962b249c55e6ae407cd5b8dfda
parent11e14dd4b93154964c87fc97cfcee62c52edf97a (diff)
Hashtable-based findKeyword function
-rw-r--r--cpp.mlb5
-rw-r--r--hashtable.sig10
-rw-r--r--hashtable.sml64
-rw-r--r--tokenizer.sml32
4 files changed, 100 insertions, 11 deletions
diff --git a/cpp.mlb b/cpp.mlb
index dd7fb36..0cc9341 100644
--- a/cpp.mlb
+++ b/cpp.mlb
@@ -9,9 +9,12 @@ in
stream.sig
stream.sml
+ hashtable.sig
+ hashtable.sml
+
tokenizer.sig
tokenizer.sml
exn_handler.sml
- cpp.sml
+ cpp.sml
end
diff --git a/hashtable.sig b/hashtable.sig
new file mode 100644
index 0000000..b714fd6
--- /dev/null
+++ b/hashtable.sig
@@ -0,0 +1,10 @@
+signature HASHTABLE = sig
+ type 'a t
+
+ exception HashtableFull
+ exception Exists
+
+ val create: int -> 'a t
+ val insert: 'a t -> string -> 'a -> unit
+ val lookup: 'a t -> string -> 'a option
+end
diff --git a/hashtable.sml b/hashtable.sml
new file mode 100644
index 0000000..24e5bba
--- /dev/null
+++ b/hashtable.sml
@@ -0,0 +1,64 @@
+structure Hashtable :> HASHTABLE = struct
+ (* (Array, (cells occupied, total size)) *)
+ type 'a t = ((string * 'a) option) array * (int * int)
+
+ exception HashtableFull
+ exception Exists
+
+ fun create size = (Array.array (size, NONE), (0, size))
+
+ val fillLimit = 0.75
+
+ val ` = Word32.fromInt
+
+ fun hashFunc key =
+ let
+ open Word32
+ in
+ List.foldl (fn (x, acc) => acc * `31 + `(ord x)) (`0) $ explode key
+ end
+
+ fun insert (buf, (taken, total)) key value =
+ if Real.fromInt (taken + 1) / Real.fromInt total > fillLimit then
+ raise HashtableFull
+ else
+ let
+ open Array
+ val hash = hashFunc key
+
+ (* TODO: use bitwise manipulation instead of mod *)
+ val i = ref $ Word32.toInt $ Word32.mod (hash, `(length buf))
+ val flag = ref true
+ in
+ while !flag do (
+ case sub (buf, !i) of
+ NONE => (update (buf, !i, SOME (key, value)); flag := false)
+ | SOME (key', _) =>
+ if key' = key then
+ raise Exists
+ else
+ i := (!i + 1) mod (length buf)
+ )
+ end
+
+ fun lookup (buf, _) key =
+ let
+ open Array
+ val hash = hashFunc key
+
+ val i = ref $ Word32.toInt $ Word32.mod (hash, `(length buf))
+ val flag = ref true
+ val result = ref NONE
+ in
+ while !flag do (
+ case sub (buf, !i) of
+ SOME (key', value) =>
+ if key' = key then
+ (result := SOME value; flag := false)
+ else
+ i := (!i + 1) mod (length buf)
+ | NONE => flag := false
+ );
+ !result
+ end
+end
diff --git a/tokenizer.sml b/tokenizer.sml
index 41f8d38..104ae61 100644
--- a/tokenizer.sml
+++ b/tokenizer.sml
@@ -8,7 +8,6 @@ structure Tokenizer:> TOKENIZER = struct
IntConst of intType * string * intSfx |
FloatConst of string * floatSfx
-
datatype token =
Invalid |
NewLine |
@@ -500,15 +499,28 @@ structure Tokenizer:> TOKENIZER = struct
s
end
- fun findKeyword s =
- case List.find
- (fn (_, repr) =>
- String.sub (repr, 0) = kwPrefix andalso
- String.extract (repr, 1, NONE) = s)
- tokenRepr
- of
- SOME (tk, _) => tk
- | NONE => Id s
+ fun keywordHashtableGen () =
+ let
+ open Hashtable
+ val table = create 128
+ val () =
+ List.app
+ (fn (tk, repr) =>
+ if String.sub (repr, 0) = kwPrefix then
+ insert table (String.extract (repr, 1, NONE)) tk
+ else
+ ())
+ tokenRepr
+ in
+ table
+ end
+
+ val keywordHashtable = lazy keywordHashtableGen
+
+ fun findKeyword str =
+ case Hashtable.lookup (keywordHashtable ()) str of
+ NONE => Id str
+ | SOME tk => tk
fun idParser () (stream, startOff) c =
let