summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVladimir Azarov <avm@intermediate-node.net>2025-05-31 19:30:21 +0200
committerVladimir Azarov <avm@intermediate-node.net>2025-05-31 19:30:38 +0200
commit546a5861526192a908f2aa2bfc3cfe4f3f3baf43 (patch)
treeacfe627e088bdba54a42e786d3b6b7053ec56fca
parent868e6313e3824d68b3121c5c95c7f29bc088c0e9 (diff)
Proper constant parsing
-rw-r--r--.gitignore1
-rw-r--r--Makefile6
-rw-r--r--caux.c35
-rw-r--r--caux.sml16
-rw-r--r--ccross.mlb2
-rw-r--r--parser.fun199
-rw-r--r--ppc.sig1
-rw-r--r--tokenizer.fun15
-rw-r--r--tokenizer.sig3
9 files changed, 262 insertions, 16 deletions
diff --git a/.gitignore b/.gitignore
index 0d3416d..3f026e7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
test
+export.h
ccross
doc/todo.txt
mlmon.out*
diff --git a/Makefile b/Makefile
index 92e0285..a832a4f 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,8 @@
history := -const "Exn.keepHistory true"
+ffi := -default-ann "allowFFI true" -export-header export.h
+caux := caux.c
def:
- mlton $(history) ccross.mlb
+ mlton $(history) $(ffi) ccross.mlb $(caux)
+clean:
+ rm -f ccross export.h
diff --git a/caux.c b/caux.c
new file mode 100644
index 0000000..1602c55
--- /dev/null
+++ b/caux.c
@@ -0,0 +1,35 @@
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include <math.h>
+
+#include "export.h"
+
+#define PARSE_FP(fp_type, func, huge_val) \
+ fp_type parse_ ## fp_type (Pointer repr, int *status) {\
+ char *f = (char*)repr;\
+ char *tmp;\
+ fp_type result; \
+ int saved_errno = errno;\
+\
+ errno = 0;\
+ result = func(f, &tmp);\
+\
+ if (errno == ERANGE) {\
+ if (result == huge_val)\
+ *status = 1;\
+ else\
+ *status = -1;\
+ }\
+ errno = saved_errno;\
+\
+ if (f + strlen(f) != tmp)\
+ *status = 2;\
+ return result;\
+ }
+
+PARSE_FP(float, strtof, HUGE_VALF)
+PARSE_FP(double, strtod, HUGE_VAL)
diff --git a/caux.sml b/caux.sml
new file mode 100644
index 0000000..9c5b1c9
--- /dev/null
+++ b/caux.sml
@@ -0,0 +1,16 @@
+local
+ fun parse parse_fp repr =
+ let
+ val status = ref 0
+ val result = parse_fp (repr ^ str #"\000", status)
+ in
+ (!status, result)
+ end
+
+ val parse_float = _import "parse_float": string * int ref -> Real32.real;
+ val parse_double =
+ _import "parse_float": string * int ref -> Real64.real;
+in
+ val parseFloat = parse parse_float
+ val parseDouble = parse parse_double
+end
diff --git a/ccross.mlb b/ccross.mlb
index 3aea79a..fd603fe 100644
--- a/ccross.mlb
+++ b/ccross.mlb
@@ -6,7 +6,7 @@ in
$(SML_LIB)/basis/basis.mlb
$(SML_LIB)/basis/mlton.mlb
- common.sml
+ common.sml caux.sml
exn_handler.sig exn_handler.sml
diff --git a/parser.fun b/parser.fun
index 5e69692..7b41ee1 100644
--- a/parser.fun
+++ b/parser.fun
@@ -54,9 +54,14 @@ functor Parser(P: PPC): PARSER = struct
BrComma
+ and cnum =
+ Ninteger of ctype * Word64.word
+ | Nfloat of Real32.real
+ | Ndouble of Real64.real
+
and expr =
- Enum |
Eid of int |
+ Econst of int * cnum |
Estrlit of int |
EmemberByV of int * exprAug |
EmemberByP of int * exprAug |
@@ -575,7 +580,12 @@ functor Parser(P: PPC): PARSER = struct
printf R off %;
case e of
Eid id => Printf out P.?id P %
- | Enum => Printf out `"num" P %
+ | Econst (id, n) => (
+ case n of
+ Ninteger (t, _) => Printf out P.?id `":" Pctype t `" " P %
+ | Nfloat _ => Printf out P.?id `":float" P %
+ | Ndouble _ => Printf out P.?id `":double" P %
+ )
| Estrlit s => Printf out P.?s P %
| EmemberByV pair => member pair "."
| EmemberByP pair => member pair "->"
@@ -743,6 +753,186 @@ functor Parser(P: PPC): PARSER = struct
| NONE => (eAug, ctx)
end
+ and determineMinNumType candidates acc =
+ let
+ open IntInf
+ fun p n = pow (fromInt 2, n)
+
+ val limits = [
+ (int_t, p 31),
+ (uint_t, p 32),
+ (long_t, p 63),
+ (ulong_t, p 64)
+ ]
+
+ fun findLimit longlong_t = p 63
+ | findLimit ulonglong_t = p 64
+ | findLimit ctype =
+ case List.find (fn (t, _) => t = ctype) limits of
+ NONE => raise Unreachable
+ | SOME (_, limit) => limit
+
+ fun find [] = (ulonglong_t, Word64.fromLargeInt acc)
+ | find (t :: tail) =
+ if acc < (findLimit t) then
+ (t, Word64.fromLargeInt acc)
+ else
+ find tail
+ in
+ find candidates
+ end
+
+ and getSuffix pos repr =
+ let
+
+ fun suffixChar c =
+ let
+ val c = Char.toLower c
+ in
+ c = #"u" orelse c = #"l"
+ end
+
+ fun findBorder idx =
+ if suffixChar $ String.sub (repr, idx) then
+ findBorder (idx - 1)
+ else
+ idx + 1
+
+ val startIdx = findBorder $ String.size repr - 1
+ val suffix = String.extract (repr, startIdx, NONE)
+
+ val suffixCode =
+ case suffix of
+ "" => 0
+ | "u" | "U" => 1
+ | "l" | "L" => 2
+ | "ul" | "uL" | "Ul" | "UL" | "lu" | "lU" | "Lu" | "LU" => 3
+ | "ll" | "LL" => 4
+ | "ull" | "uLL" | "Ull" | "ULL" | "llu" | "llU" | "LLu" | "LLU" => 5
+ | _ => P.error pos `"unknown integer constant suffix" %
+ in
+ (String.substring (repr, 0, startIdx), suffixCode)
+ end
+
+ and determiteIntNumType isDec (acc, suffix) =
+ let
+ val candidates = [
+ ([int_t, long_t, longlong_t], [int_t, uint_t, long_t, ulong_t,
+ longlong_t]),
+ ([uint_t, ulong_t], [uint_t, ulong_t]),
+ ([long_t, longlong_t], [long_t, ulong_t, longlong_t]),
+ ([ulong_t], [ulong_t]),
+ ([longlong_t], [longlong_t]),
+ ([], [])
+ ]
+
+ val candArray = Array.fromList candidates
+ val (dec, other) = Array.sub (candArray, suffix)
+ in
+ determineMinNumType (if isDec then dec else other) acc
+ end
+
+ and parseNumGeneric (pos, conv) (idx, s) acc radix =
+ if idx = String.size s then
+ acc
+ else
+ let
+ val d =
+ case conv $ String.sub (s, idx) of
+ NONE => P.error pos `"invalid integer constant" %
+ | SOME v => IntInf.fromInt v
+ val idx = idx + 1
+ open IntInf
+ in
+ parseNumGeneric (pos, conv) (idx, s)
+ (acc * radix + d) radix
+ end
+
+ and collectNum pos num =
+ let
+ fun hexDigit c =
+ if Char.isDigit c then
+ SOME $ ord c - ord #"0"
+ else if Char.isHexDigit c then
+ SOME $ ord c - ord #"a" + 10
+ else
+ NONE
+
+ fun octDigit c =
+ if ord c >= ord #"0" andalso ord c < ord #"8" then
+ SOME $ ord c - ord #"0"
+ else
+ NONE
+
+ fun decDigit c =
+ if Char.isDigit c then
+ SOME $ ord c - ord #"0"
+ else
+ NONE
+ in
+ if String.sub (num, 0) = #"0" then
+ (if String.size num > 1 andalso
+ Char.toLower (String.sub (num, 1)) = #"x"
+ then
+ parseNumGeneric (pos, hexDigit) (2, num) 0 16
+ else
+ parseNumGeneric (pos, octDigit) (1, num) 0 8, false)
+ else
+ (parseNumGeneric (pos, decDigit) (0, num) 0 10, true)
+ end
+
+ and parseInteger pos s =
+ let
+ val (num, suffix) = getSuffix pos s
+ val (acc, isDec) = collectNum pos num
+ val p = determiteIntNumType isDec (acc, suffix)
+ in
+ Ninteger p
+ end
+
+ and isFPconst s =
+ let
+ open String
+ fun find idx =
+ if idx = size s then
+ false
+ else
+ case sub (s, idx) of
+ #"." | #"e" | #"E" => true
+ | c =>
+ if Char.isDigit c then
+ find (idx + 1)
+ else
+ false
+ in
+ find 0
+ end
+
+ and parseFP pos s =
+ let
+ val lastC = String.sub (s, String.size s - 1)
+ fun handleStatus (status, v) =
+ case status of
+ 0 => v
+ | 1 => P.error pos `"floating-point constant overflow" %
+ | ~1 => P.error pos `"floating-point constant underflow" %
+ | 2 => P.error pos `"invalid floating-point constant" %
+ | _ => raise Unreachable
+ in
+ case Char.toLower lastC of
+ #"f" =>
+ let
+ val repr = String.substring (s, 0, String.size s - 1)
+ in
+ Nfloat o handleStatus o parseFloat $ repr
+ end
+ | #"L" => P.error pos `"long double is not supported" %
+ | _ => Ndouble o handleStatus o parseDouble $ s
+ end
+
+ and parseNumber pos s =
+ (if isFPconst s then parseFP else parseInteger) pos s
+
and parsePrimaryExpr ctx =
let
val (tk, pos, ctx) = getTokenCtx ctx
@@ -751,8 +941,9 @@ functor Parser(P: PPC): PARSER = struct
case tk of
Tk (T.Id id) => wrap $ Eid id
| Tk (T.Strlit id) => wrap $ Estrlit id
- | Tk (T.CharConst _) => raise Unimplemented
- | Tk (T.Num _) => wrap Enum
+ | Tk (T.CharConst (id, v)) =>
+ wrap $ Econst (id, Ninteger (int_t, Word64.fromInt v))
+ | Tk (T.Num id) => wrap $ Econst (id, parseNumber pos $ P.?? id)
| TkParens list =>
let
val ((_, ea), ctx) = ctxWithLayer ctx list (parseExpr [])
diff --git a/ppc.sig b/ppc.sig
index 53d4ed2..210489b 100644
--- a/ppc.sig
+++ b/ppc.sig
@@ -24,6 +24,7 @@ signature PPC = sig
val debugPrint: string -> string list -> unit
val ? : (int, 'a, 'b, 'c) a1printer
+ val ?? : int -> string
val psid: int -> 'a acc -> unit
val Ptk: (T.token, 'a, 'b, 'c) a1printer
val PtkPos: (tkPos, 'a, 'b, 'c) a1printer
diff --git a/tokenizer.fun b/tokenizer.fun
index 2adfd5c..b0df510 100644
--- a/tokenizer.fun
+++ b/tokenizer.fun
@@ -11,10 +11,9 @@ struct
NewLine |
MacroEnd of int |
- Num of string |
-
Id of int |
CharConst of int * int |
+ Num of int |
Strlit of int |
kwBreak |
@@ -275,12 +274,11 @@ struct
end z
in
case tk of
- Id id => Printf out ?id %
+ Id id | Num id => Printf out ?id %
| MacroEnd mid => Printf out `"mend(" ?mid `")" %
| NewLine => Printf out `"\\n" %
| PpcInclude (dir, arg) =>
Printf out `"#include(" `dir `", " `arg `")" %
- | Num s => Printf out `s %
| CharConst (repr, _) => Printf out ?repr %
| Strlit id => Printf out ?id %
| v =>
@@ -646,7 +644,7 @@ struct
(Id id, stream)
end
- fun parseNumber dx stream =
+ fun parseNumber symtab dx stream =
let
fun collect stream =
let
@@ -669,8 +667,9 @@ struct
val (endOff, stream) = collect stream
val s = S.getSubstr startOff endOff stream
+ val id = ST.getId symtab s
in
- (Num s, pos, stream)
+ (Num id, pos, stream)
end
fun getDir stream = OS.Path.getParent o S.getFname $ stream
@@ -807,9 +806,9 @@ struct
else if isNondigit c then
@-> $ parseId symtab
else if isDigit c then
- parseNumber 1 stream
+ parseNumber symtab 1 stream
else if c = #"." andalso isDigit c1 then
- parseNumber 2 stream
+ parseNumber symtab 2 stream
else if c = #"'" then
parseCharConst symtab stream
else if c = #"\"" then
diff --git a/tokenizer.sig b/tokenizer.sig
index f221ab3..eff8e46 100644
--- a/tokenizer.sig
+++ b/tokenizer.sig
@@ -8,10 +8,9 @@ signature TOKENIZER = sig
NewLine |
MacroEnd of int |
- Num of string |
-
Id of int |
CharConst of int * int |
+ Num of int |
Strlit of int |
kwBreak |