summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVladimir Azarov <avm@intermediate-node.net>2025-03-23 19:03:21 +0100
committerVladimir Azarov <avm@intermediate-node.net>2025-03-23 19:03:21 +0100
commitc4b48d26658e3359217725e81e8afcda6a6a257e (patch)
treedb72a3a0232a29b9dcd558cbad98bceeb93eda8e
parent912d3013062b5e81e781037ef94e9549f5bb19be (diff)
String literal parsing
-rw-r--r--cpp.sml109
1 files changed, 75 insertions, 34 deletions
diff --git a/cpp.sml b/cpp.sml
index e08a41d..3d3bd9f 100644
--- a/cpp.sml
+++ b/cpp.sml
@@ -10,6 +10,7 @@ datatype token =
Number of string |
Id of string |
CharConst of string * int |
+ StringConst of string |
kwBreak |
kwCase |
@@ -71,7 +72,9 @@ type pos = fileId * fileOffset
type convPos = string * int * int option
type fullToken = pos * token
-exception TkError of int * string
+datatype tkErrorAuxInfo = TkiEOF | TkiDx of int | TkiStart
+
+exception TkError of tkErrorAuxInfo * string
exception TkErrorAug of convPos * string (* main exception *)
exception ExpectedCppDir (* handled in postprocess *)
@@ -209,6 +212,8 @@ val printToken = fn
in
printLn $ (str cppPrefix) ^ "include " ^ start ^ arg ^ end'
end
+ | StringConst s =>
+ printLn $ "\"" ^ s ^ "\""
| v =>
case List.find (fn (x, _) => x = v) tokenRepr of
SOME (_, repr) => printLn repr
@@ -524,26 +529,34 @@ in
end
end
-fun tkError2aug stream (TkError (dx, msg)) =
+fun tkError2aug stream (dx, msg) =
let
val (id, pos) = getPosAfterCharRead stream
val pos = getPposFromPos (id, pos + dx) stream
in
TkErrorAug (pos, msg)
end
- | tkError2aug _ _ = raise Unreachable
fun parseGeneric stream parser acc =
let
val stream = ungetc stream
val P as (_, startOff) = getPos stream
+ val startPos = getPposFromPos P stream
+
fun parse' stream acc = let
val (c, stream) = (fn (c, s) => (SOME c, s)) $ getchar stream handle
_ => (NONE, stream)
val (acc, tk, stream) = parser acc (stream, startOff) c handle
- e as TkError _ => raise tkError2aug stream e
+ TkError (TkiDx dx, msg) => raise tkError2aug stream (dx, msg)
+ | TkError (TkiStart, msg) => raise TkErrorAug (startPos, msg)
+ | TkError (TkiEOF, msg) =>
+ let
+ val (file, line, _) = getPposFromPos P stream
+ in
+ raise TkErrorAug ((file, line, NONE), msg)
+ end
in
case tk of
NONE => parse' stream acc
@@ -624,7 +637,7 @@ end
fun eatEscSeq stream =
let
val (c, stream) = getchar stream handle
- _ => raise TkError (0, "unfinished escape sequence")
+ _ => raise TkError (TkiDx 0, "unfinished escape sequence")
in
(case c of
#"\\" => #"\\"
@@ -634,42 +647,69 @@ in
stream)
end
-datatype CharParseState = ChInit | ChStart | ChValue of int | ChTerm
+fun stringCut s = String.extract (s, 1, SOME $ String.size s - 2)
+
+datatype SeqParseState = SeqInit | SeqStart | SeqValue of int | SeqTerm
+
+datatype seqParseMode = SpmChr | SpmStr
+
+fun seqBound SpmChr = #"'"
+ | seqBound SpmStr = #"\""
-val chExnExpValue = TkError (0, "expected value after '")
-val chExnExpTerm = TkError (0, "expected ' after value")
-val chExnStart = Unreachable
-val chExnAfterTerm = Unreachable
+fun seqExnConv mode (TkError (v, msg)) =
+let
+ val bound = if mode = SpmChr then "'" else "\""
+ val msg =
+ String.translate (fn c => if c = #"%" then bound else str c) msg
+in
+ TkError (v, msg)
+end
+ | seqExnConv _ _ = raise Unreachable
+
+fun unfinishedSeq SpmChr = "unfinished character constant"
+ | unfinishedSeq SpmStr = "unfinished string literal"
-fun charParser ChInit (stream, _) (SOME c) =
- if c = #"'" then
- (ChStart, NONE, stream)
+fun seqParser mode SeqInit (stream, _) (SOME c) =
+ if seqBound mode = c then
+ (SeqStart, NONE, stream)
else
- raise chExnStart
- | charParser ChStart (stream, _) (SOME c) =
- if c <> #"'" then
+ raise Unreachable
+ | seqParser mode SeqStart (stream, _) (SOME c) =
+ if c <> seqBound mode then
let
val (c, stream) =
if c <> #"\\" then (c, stream) else eatEscSeq stream
in
- (ChValue $ ord c, NONE, stream)
+ (SeqValue (ord c), NONE, stream)
end
+ else if mode = SpmStr then
+ (SeqTerm, SOME $ StringConst "", stream)
else
- raise chExnExpValue
- | charParser (ChValue v) (stream, startOff) (SOME c) =
- if c = #"'" then
- (ChTerm,
- SOME $ CharConst (finishSeqRead startOff stream, v), stream)
+ raise seqExnConv SpmChr $ TkError (TkiDx 0, "expected value after %")
+ | seqParser mode (SeqValue v) (stream, startOff) (SOME c) =
+ let
+ fun term s v =
+ if mode = SpmChr then CharConst (s, v) else StringConst $ stringCut s
+ in
+ if seqBound mode = c then
+ (SeqTerm, SOME $ term (finishSeqRead startOff stream) v, stream)
+ else if mode = SpmStr then
+ (SeqValue v, NONE, stream)
else
- raise chExnExpTerm
- | charParser ChTerm _ (SOME _) =
- raise chExnAfterTerm
- | charParser state (_, _) NONE =
+ raise seqExnConv SpmChr $ TkError (TkiDx 0, "expected % after value")
+ end
+ | seqParser _ SeqTerm _ (SOME _) =
+ raise Unreachable
+ | seqParser mode state (_, _) NONE =
raise case state of
- ChInit => chExnStart
- | ChStart => chExnExpValue
- | ChValue _ => chExnExpTerm
- | ChTerm => chExnAfterTerm
+ SeqInit => Unreachable
+ | SeqStart => seqExnConv mode $ TkError (TkiStart, unfinishedSeq mode)
+ | SeqValue _ =>
+ seqExnConv mode $ TkError (TkiStart, unfinishedSeq mode)
+ | SeqTerm => Unreachable
+
+val charParser = seqParser SpmChr
+val strParser = seqParser SpmStr
fun readIncludeArg stream =
let
@@ -704,7 +744,6 @@ let
let
fun --> msg = raise TkErrorAug (getLinePos (), msg)
fun isLast c = sub (s, size s - 1) = c
- fun cut () = extract (s, 1, SOME $ size s - 2)
in
if s = "" then
--> "#include argument is empty"
@@ -712,12 +751,12 @@ let
case sub (s, 0) of
#"<" =>
if isLast #">" then
- IAFromRef $ cut ()
+ IAFromRef $ stringCut s
else
--> "expected > at #include argument end"
| #"\"" =>
if isLast #"\"" then
- IARel $ cut ()
+ IARel $ stringCut s
else
--> "expected \" at #include argument end"
| _ => --> "#include argument should start with \" or <"
@@ -795,7 +834,9 @@ in
else if isDigit c then
@-> numParser ()
else if c = #"'" then
- @-> charParser ChInit
+ @-> charParser SeqInit
+ else if c = #"\"" then
+ @-> strParser SeqInit
else if isStartForFsm c then
cont $ fsmEat stream
else