diff options
author | Vladimir Azarov <avm@intermediate-node.net> | 2025-05-31 19:30:21 +0200 |
---|---|---|
committer | Vladimir Azarov <avm@intermediate-node.net> | 2025-05-31 19:30:38 +0200 |
commit | 546a5861526192a908f2aa2bfc3cfe4f3f3baf43 (patch) | |
tree | acfe627e088bdba54a42e786d3b6b7053ec56fca | |
parent | 868e6313e3824d68b3121c5c95c7f29bc088c0e9 (diff) |
Proper constant parsing
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | Makefile | 6 | ||||
-rw-r--r-- | caux.c | 35 | ||||
-rw-r--r-- | caux.sml | 16 | ||||
-rw-r--r-- | ccross.mlb | 2 | ||||
-rw-r--r-- | parser.fun | 199 | ||||
-rw-r--r-- | ppc.sig | 1 | ||||
-rw-r--r-- | tokenizer.fun | 15 | ||||
-rw-r--r-- | tokenizer.sig | 3 |
9 files changed, 262 insertions, 16 deletions
@@ -1,4 +1,5 @@ test +export.h ccross doc/todo.txt mlmon.out* @@ -1,4 +1,8 @@ history := -const "Exn.keepHistory true" +ffi := -default-ann "allowFFI true" -export-header export.h +caux := caux.c def: - mlton $(history) ccross.mlb + mlton $(history) $(ffi) ccross.mlb $(caux) +clean: + rm -f ccross export.h @@ -0,0 +1,35 @@ +#include <errno.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <assert.h> + +#include <math.h> + +#include "export.h" + +#define PARSE_FP(fp_type, func, huge_val) \ + fp_type parse_ ## fp_type (Pointer repr, int *status) {\ + char *f = (char*)repr;\ + char *tmp;\ + fp_type result; \ + int saved_errno = errno;\ +\ + errno = 0;\ + result = func(f, &tmp);\ +\ + if (errno == ERANGE) {\ + if (result == huge_val)\ + *status = 1;\ + else\ + *status = -1;\ + }\ + errno = saved_errno;\ +\ + if (f + strlen(f) != tmp)\ + *status = 2;\ + return result;\ + } + +PARSE_FP(float, strtof, HUGE_VALF) +PARSE_FP(double, strtod, HUGE_VAL) diff --git a/caux.sml b/caux.sml new file mode 100644 index 0000000..9c5b1c9 --- /dev/null +++ b/caux.sml @@ -0,0 +1,16 @@ +local + fun parse parse_fp repr = + let + val status = ref 0 + val result = parse_fp (repr ^ str #"\000", status) + in + (!status, result) + end + + val parse_float = _import "parse_float": string * int ref -> Real32.real; + val parse_double = + _import "parse_float": string * int ref -> Real64.real; +in + val parseFloat = parse parse_float + val parseDouble = parse parse_double +end @@ -6,7 +6,7 @@ in $(SML_LIB)/basis/basis.mlb $(SML_LIB)/basis/mlton.mlb - common.sml + common.sml caux.sml exn_handler.sig exn_handler.sml @@ -54,9 +54,14 @@ functor Parser(P: PPC): PARSER = struct BrComma + and cnum = + Ninteger of ctype * Word64.word + | Nfloat of Real32.real + | Ndouble of Real64.real + and expr = - Enum | Eid of int | + Econst of int * cnum | Estrlit of int | EmemberByV of int * exprAug | EmemberByP of int * exprAug | @@ -575,7 +580,12 @@ functor Parser(P: PPC): PARSER = struct printf R off %; case e of Eid id => Printf out P.?id P % - | Enum => Printf out `"num" P % + | Econst (id, n) => ( + case n of + Ninteger (t, _) => Printf out P.?id `":" Pctype t `" " P % + | Nfloat _ => Printf out P.?id `":float" P % + | Ndouble _ => Printf out P.?id `":double" P % + ) | Estrlit s => Printf out P.?s P % | EmemberByV pair => member pair "." | EmemberByP pair => member pair "->" @@ -743,6 +753,186 @@ functor Parser(P: PPC): PARSER = struct | NONE => (eAug, ctx) end + and determineMinNumType candidates acc = + let + open IntInf + fun p n = pow (fromInt 2, n) + + val limits = [ + (int_t, p 31), + (uint_t, p 32), + (long_t, p 63), + (ulong_t, p 64) + ] + + fun findLimit longlong_t = p 63 + | findLimit ulonglong_t = p 64 + | findLimit ctype = + case List.find (fn (t, _) => t = ctype) limits of + NONE => raise Unreachable + | SOME (_, limit) => limit + + fun find [] = (ulonglong_t, Word64.fromLargeInt acc) + | find (t :: tail) = + if acc < (findLimit t) then + (t, Word64.fromLargeInt acc) + else + find tail + in + find candidates + end + + and getSuffix pos repr = + let + + fun suffixChar c = + let + val c = Char.toLower c + in + c = #"u" orelse c = #"l" + end + + fun findBorder idx = + if suffixChar $ String.sub (repr, idx) then + findBorder (idx - 1) + else + idx + 1 + + val startIdx = findBorder $ String.size repr - 1 + val suffix = String.extract (repr, startIdx, NONE) + + val suffixCode = + case suffix of + "" => 0 + | "u" | "U" => 1 + | "l" | "L" => 2 + | "ul" | "uL" | "Ul" | "UL" | "lu" | "lU" | "Lu" | "LU" => 3 + | "ll" | "LL" => 4 + | "ull" | "uLL" | "Ull" | "ULL" | "llu" | "llU" | "LLu" | "LLU" => 5 + | _ => P.error pos `"unknown integer constant suffix" % + in + (String.substring (repr, 0, startIdx), suffixCode) + end + + and determiteIntNumType isDec (acc, suffix) = + let + val candidates = [ + ([int_t, long_t, longlong_t], [int_t, uint_t, long_t, ulong_t, + longlong_t]), + ([uint_t, ulong_t], [uint_t, ulong_t]), + ([long_t, longlong_t], [long_t, ulong_t, longlong_t]), + ([ulong_t], [ulong_t]), + ([longlong_t], [longlong_t]), + ([], []) + ] + + val candArray = Array.fromList candidates + val (dec, other) = Array.sub (candArray, suffix) + in + determineMinNumType (if isDec then dec else other) acc + end + + and parseNumGeneric (pos, conv) (idx, s) acc radix = + if idx = String.size s then + acc + else + let + val d = + case conv $ String.sub (s, idx) of + NONE => P.error pos `"invalid integer constant" % + | SOME v => IntInf.fromInt v + val idx = idx + 1 + open IntInf + in + parseNumGeneric (pos, conv) (idx, s) + (acc * radix + d) radix + end + + and collectNum pos num = + let + fun hexDigit c = + if Char.isDigit c then + SOME $ ord c - ord #"0" + else if Char.isHexDigit c then + SOME $ ord c - ord #"a" + 10 + else + NONE + + fun octDigit c = + if ord c >= ord #"0" andalso ord c < ord #"8" then + SOME $ ord c - ord #"0" + else + NONE + + fun decDigit c = + if Char.isDigit c then + SOME $ ord c - ord #"0" + else + NONE + in + if String.sub (num, 0) = #"0" then + (if String.size num > 1 andalso + Char.toLower (String.sub (num, 1)) = #"x" + then + parseNumGeneric (pos, hexDigit) (2, num) 0 16 + else + parseNumGeneric (pos, octDigit) (1, num) 0 8, false) + else + (parseNumGeneric (pos, decDigit) (0, num) 0 10, true) + end + + and parseInteger pos s = + let + val (num, suffix) = getSuffix pos s + val (acc, isDec) = collectNum pos num + val p = determiteIntNumType isDec (acc, suffix) + in + Ninteger p + end + + and isFPconst s = + let + open String + fun find idx = + if idx = size s then + false + else + case sub (s, idx) of + #"." | #"e" | #"E" => true + | c => + if Char.isDigit c then + find (idx + 1) + else + false + in + find 0 + end + + and parseFP pos s = + let + val lastC = String.sub (s, String.size s - 1) + fun handleStatus (status, v) = + case status of + 0 => v + | 1 => P.error pos `"floating-point constant overflow" % + | ~1 => P.error pos `"floating-point constant underflow" % + | 2 => P.error pos `"invalid floating-point constant" % + | _ => raise Unreachable + in + case Char.toLower lastC of + #"f" => + let + val repr = String.substring (s, 0, String.size s - 1) + in + Nfloat o handleStatus o parseFloat $ repr + end + | #"L" => P.error pos `"long double is not supported" % + | _ => Ndouble o handleStatus o parseDouble $ s + end + + and parseNumber pos s = + (if isFPconst s then parseFP else parseInteger) pos s + and parsePrimaryExpr ctx = let val (tk, pos, ctx) = getTokenCtx ctx @@ -751,8 +941,9 @@ functor Parser(P: PPC): PARSER = struct case tk of Tk (T.Id id) => wrap $ Eid id | Tk (T.Strlit id) => wrap $ Estrlit id - | Tk (T.CharConst _) => raise Unimplemented - | Tk (T.Num _) => wrap Enum + | Tk (T.CharConst (id, v)) => + wrap $ Econst (id, Ninteger (int_t, Word64.fromInt v)) + | Tk (T.Num id) => wrap $ Econst (id, parseNumber pos $ P.?? id) | TkParens list => let val ((_, ea), ctx) = ctxWithLayer ctx list (parseExpr []) @@ -24,6 +24,7 @@ signature PPC = sig val debugPrint: string -> string list -> unit val ? : (int, 'a, 'b, 'c) a1printer + val ?? : int -> string val psid: int -> 'a acc -> unit val Ptk: (T.token, 'a, 'b, 'c) a1printer val PtkPos: (tkPos, 'a, 'b, 'c) a1printer diff --git a/tokenizer.fun b/tokenizer.fun index 2adfd5c..b0df510 100644 --- a/tokenizer.fun +++ b/tokenizer.fun @@ -11,10 +11,9 @@ struct NewLine | MacroEnd of int | - Num of string | - Id of int | CharConst of int * int | + Num of int | Strlit of int | kwBreak | @@ -275,12 +274,11 @@ struct end z in case tk of - Id id => Printf out ?id % + Id id | Num id => Printf out ?id % | MacroEnd mid => Printf out `"mend(" ?mid `")" % | NewLine => Printf out `"\\n" % | PpcInclude (dir, arg) => Printf out `"#include(" `dir `", " `arg `")" % - | Num s => Printf out `s % | CharConst (repr, _) => Printf out ?repr % | Strlit id => Printf out ?id % | v => @@ -646,7 +644,7 @@ struct (Id id, stream) end - fun parseNumber dx stream = + fun parseNumber symtab dx stream = let fun collect stream = let @@ -669,8 +667,9 @@ struct val (endOff, stream) = collect stream val s = S.getSubstr startOff endOff stream + val id = ST.getId symtab s in - (Num s, pos, stream) + (Num id, pos, stream) end fun getDir stream = OS.Path.getParent o S.getFname $ stream @@ -807,9 +806,9 @@ struct else if isNondigit c then @-> $ parseId symtab else if isDigit c then - parseNumber 1 stream + parseNumber symtab 1 stream else if c = #"." andalso isDigit c1 then - parseNumber 2 stream + parseNumber symtab 2 stream else if c = #"'" then parseCharConst symtab stream else if c = #"\"" then diff --git a/tokenizer.sig b/tokenizer.sig index f221ab3..eff8e46 100644 --- a/tokenizer.sig +++ b/tokenizer.sig @@ -8,10 +8,9 @@ signature TOKENIZER = sig NewLine | MacroEnd of int | - Num of string | - Id of int | CharConst of int * int | + Num of int | Strlit of int | kwBreak | |