diff options
author | Vladimir Azarov <avm@intermediate-node.net> | 2025-03-23 19:03:21 +0100 |
---|---|---|
committer | Vladimir Azarov <avm@intermediate-node.net> | 2025-03-23 19:03:21 +0100 |
commit | c4b48d26658e3359217725e81e8afcda6a6a257e (patch) | |
tree | db72a3a0232a29b9dcd558cbad98bceeb93eda8e | |
parent | 912d3013062b5e81e781037ef94e9549f5bb19be (diff) |
String literal parsing
-rw-r--r-- | cpp.sml | 109 |
1 files changed, 75 insertions, 34 deletions
@@ -10,6 +10,7 @@ datatype token = Number of string | Id of string | CharConst of string * int | + StringConst of string | kwBreak | kwCase | @@ -71,7 +72,9 @@ type pos = fileId * fileOffset type convPos = string * int * int option type fullToken = pos * token -exception TkError of int * string +datatype tkErrorAuxInfo = TkiEOF | TkiDx of int | TkiStart + +exception TkError of tkErrorAuxInfo * string exception TkErrorAug of convPos * string (* main exception *) exception ExpectedCppDir (* handled in postprocess *) @@ -209,6 +212,8 @@ val printToken = fn in printLn $ (str cppPrefix) ^ "include " ^ start ^ arg ^ end' end + | StringConst s => + printLn $ "\"" ^ s ^ "\"" | v => case List.find (fn (x, _) => x = v) tokenRepr of SOME (_, repr) => printLn repr @@ -524,26 +529,34 @@ in end end -fun tkError2aug stream (TkError (dx, msg)) = +fun tkError2aug stream (dx, msg) = let val (id, pos) = getPosAfterCharRead stream val pos = getPposFromPos (id, pos + dx) stream in TkErrorAug (pos, msg) end - | tkError2aug _ _ = raise Unreachable fun parseGeneric stream parser acc = let val stream = ungetc stream val P as (_, startOff) = getPos stream + val startPos = getPposFromPos P stream + fun parse' stream acc = let val (c, stream) = (fn (c, s) => (SOME c, s)) $ getchar stream handle _ => (NONE, stream) val (acc, tk, stream) = parser acc (stream, startOff) c handle - e as TkError _ => raise tkError2aug stream e + TkError (TkiDx dx, msg) => raise tkError2aug stream (dx, msg) + | TkError (TkiStart, msg) => raise TkErrorAug (startPos, msg) + | TkError (TkiEOF, msg) => + let + val (file, line, _) = getPposFromPos P stream + in + raise TkErrorAug ((file, line, NONE), msg) + end in case tk of NONE => parse' stream acc @@ -624,7 +637,7 @@ end fun eatEscSeq stream = let val (c, stream) = getchar stream handle - _ => raise TkError (0, "unfinished escape sequence") + _ => raise TkError (TkiDx 0, "unfinished escape sequence") in (case c of #"\\" => #"\\" @@ -634,42 +647,69 @@ in stream) end -datatype CharParseState = ChInit | ChStart | ChValue of int | ChTerm +fun stringCut s = String.extract (s, 1, SOME $ String.size s - 2) + +datatype SeqParseState = SeqInit | SeqStart | SeqValue of int | SeqTerm + +datatype seqParseMode = SpmChr | SpmStr + +fun seqBound SpmChr = #"'" + | seqBound SpmStr = #"\"" -val chExnExpValue = TkError (0, "expected value after '") -val chExnExpTerm = TkError (0, "expected ' after value") -val chExnStart = Unreachable -val chExnAfterTerm = Unreachable +fun seqExnConv mode (TkError (v, msg)) = +let + val bound = if mode = SpmChr then "'" else "\"" + val msg = + String.translate (fn c => if c = #"%" then bound else str c) msg +in + TkError (v, msg) +end + | seqExnConv _ _ = raise Unreachable + +fun unfinishedSeq SpmChr = "unfinished character constant" + | unfinishedSeq SpmStr = "unfinished string literal" -fun charParser ChInit (stream, _) (SOME c) = - if c = #"'" then - (ChStart, NONE, stream) +fun seqParser mode SeqInit (stream, _) (SOME c) = + if seqBound mode = c then + (SeqStart, NONE, stream) else - raise chExnStart - | charParser ChStart (stream, _) (SOME c) = - if c <> #"'" then + raise Unreachable + | seqParser mode SeqStart (stream, _) (SOME c) = + if c <> seqBound mode then let val (c, stream) = if c <> #"\\" then (c, stream) else eatEscSeq stream in - (ChValue $ ord c, NONE, stream) + (SeqValue (ord c), NONE, stream) end + else if mode = SpmStr then + (SeqTerm, SOME $ StringConst "", stream) else - raise chExnExpValue - | charParser (ChValue v) (stream, startOff) (SOME c) = - if c = #"'" then - (ChTerm, - SOME $ CharConst (finishSeqRead startOff stream, v), stream) + raise seqExnConv SpmChr $ TkError (TkiDx 0, "expected value after %") + | seqParser mode (SeqValue v) (stream, startOff) (SOME c) = + let + fun term s v = + if mode = SpmChr then CharConst (s, v) else StringConst $ stringCut s + in + if seqBound mode = c then + (SeqTerm, SOME $ term (finishSeqRead startOff stream) v, stream) + else if mode = SpmStr then + (SeqValue v, NONE, stream) else - raise chExnExpTerm - | charParser ChTerm _ (SOME _) = - raise chExnAfterTerm - | charParser state (_, _) NONE = + raise seqExnConv SpmChr $ TkError (TkiDx 0, "expected % after value") + end + | seqParser _ SeqTerm _ (SOME _) = + raise Unreachable + | seqParser mode state (_, _) NONE = raise case state of - ChInit => chExnStart - | ChStart => chExnExpValue - | ChValue _ => chExnExpTerm - | ChTerm => chExnAfterTerm + SeqInit => Unreachable + | SeqStart => seqExnConv mode $ TkError (TkiStart, unfinishedSeq mode) + | SeqValue _ => + seqExnConv mode $ TkError (TkiStart, unfinishedSeq mode) + | SeqTerm => Unreachable + +val charParser = seqParser SpmChr +val strParser = seqParser SpmStr fun readIncludeArg stream = let @@ -704,7 +744,6 @@ let let fun --> msg = raise TkErrorAug (getLinePos (), msg) fun isLast c = sub (s, size s - 1) = c - fun cut () = extract (s, 1, SOME $ size s - 2) in if s = "" then --> "#include argument is empty" @@ -712,12 +751,12 @@ let case sub (s, 0) of #"<" => if isLast #">" then - IAFromRef $ cut () + IAFromRef $ stringCut s else --> "expected > at #include argument end" | #"\"" => if isLast #"\"" then - IARel $ cut () + IARel $ stringCut s else --> "expected \" at #include argument end" | _ => --> "#include argument should start with \" or <" @@ -795,7 +834,9 @@ in else if isDigit c then @-> numParser () else if c = #"'" then - @-> charParser ChInit + @-> charParser SeqInit + else if c = #"\"" then + @-> strParser SeqInit else if isStartForFsm c then cont $ fsmEat stream else |