diff options
author | Vladimir Azarov <avm@intermediate-node.net> | 2025-03-24 00:49:06 +0100 |
---|---|---|
committer | Vladimir Azarov <avm@intermediate-node.net> | 2025-03-24 00:49:30 +0100 |
commit | e40727b58e357f123256557af50666aa42c2caa4 (patch) | |
tree | daf34c2dd0d5552d2ea74eb5b18307afbbac9ee5 | |
parent | c4b48d26658e3359217725e81e8afcda6a6a257e (diff) |
Rest of symbols
-rw-r--r-- | .gitignore | 3 | ||||
-rw-r--r-- | Makefile | 4 | ||||
-rw-r--r-- | cpp.sml | 253 |
3 files changed, 217 insertions, 43 deletions
@@ -1,3 +1,4 @@ -test +test* cpp todo* +mlmon.out* @@ -1,2 +1,4 @@ +history := -const "Exn.keepHistory true" + def: - mlton -const "Exn.keepHistory true" cpp.mlb + mlton $(history) cpp.mlb @@ -40,28 +40,73 @@ datatype token = kwVoid | kwVolatile | - OParen | - CParen | + LParen | + RParen | + LBracket | + RBracket | + LBrace | + RBrace | + QuestionMark | + Colon | + Coma | + Semicolon | + + Arrow | Plus | DoublePlus| Minus | DoubleMinus | - - Semicolon | + Ampersand | + Asterisk | + Slash | + Tilde | + ExclMark | + Percent | + DoubleGreater | + DoubleLess | + Greater | + Less | EqualSign | + LessEqualSign | + GreaterEqualSign | DoubleEqualSign | - ExclMark | ExclMarkEqualSign | - - QuestionMark | - Colon | + Cap | + VerticalBar | + DoubleAmpersand | + DoubleVerticalBar | + + AsteriskEqualSign | + SlashEqualSign | + PercentEqualSign | + PlusEqualSign | + MinusEqualSign | + DoubleLessEqualSign | + DoubleGreaterEqualSign | + AmpersandEqualSign | + CapEqualSign | + VerticalBarEqualSign | Hash | DoubleHash | + Dot | + DoubleDot | + TripleDot | + + CommentStart | + CppInclude of includeArg | - CppDefine + CppDefine | + CppUndef | + CppIf | + CppIfdef | + CppIfndef | + CppElse | + CppElif | + CppEndif | + CppPragma val kwPrefix = #"@" val cppPrefix = #"$" @@ -147,9 +192,6 @@ let fun % repr = str cppPrefix ^ repr in [ - (OParen, "("), - (CParen, ")"), - (kwBreak, &"break"), (kwCase, &"case"), (kwChar, &"char"), @@ -178,24 +220,72 @@ in (kwVoid, &"void"), (kwVolatile, &"volatile"), + (LParen, "("), + (RParen, ")"), + (LBracket, "["), + (RBracket, "]"), + (LBrace, "{"), + (RBrace, "}"), + + (QuestionMark, "?"), + (Colon, ":"), + (Coma, ","), + (Semicolon, ";"), + + (Arrow, "->"), (Plus, "+"), (DoublePlus, "++"), (Minus, "-"), (DoubleMinus, "--"), - - (Semicolon, ";"), + (Ampersand, "&"), + (Asterisk, "*"), + (Slash, "/"), + (Tilde, "~"), + (ExclMark, "!"), + (Percent, "%"), + (DoubleLess, "<<"), + (DoubleGreater, ">>"), + (Less, "<"), + (Greater, ">"), (EqualSign, "="), + (LessEqualSign, "<="), + (GreaterEqualSign, ">="), (DoubleEqualSign, "=="), - (ExclMark, "!"), (ExclMarkEqualSign, "!="), - - (QuestionMark, "?"), - (Colon, ":"), + (Cap, "^"), + (VerticalBar, "|"), + (DoubleAmpersand, "&&"), + (DoubleVerticalBar, "||"), + + (AsteriskEqualSign, "*="), + (SlashEqualSign, "/="), + (PercentEqualSign, "%="), + (PlusEqualSign, "+="), + (MinusEqualSign, "-="), + (DoubleLessEqualSign, "<<="), + (DoubleGreaterEqualSign, ">>="), + (AmpersandEqualSign, "&="), + (CapEqualSign, "^="), + (VerticalBarEqualSign, "|="), (Hash, "#"), (DoubleHash, "##"), - (CppDefine, % "define") + (Dot, "."), + (DoubleDot, ".."), + (TripleDot, "..."), + + (CommentStart, "/*"), + + (CppDefine, %"define"), + (CppUndef, %"undef"), + (CppIf, %"if"), + (CppIfdef, %"ifdef"), + (CppIfndef, %"ifndef"), + (CppElse, %"else"), + (CppElif, %"elif"), + (CppEndif, %"endif"), + (CppPragma, %"pragma") ] end @@ -285,16 +375,26 @@ fun ungetc fun readline { stack = (fid, off, contents) :: rest, allFiles } = let + val prevIsSlash = + off > 0 andalso String.sub (contents, off - 1) = #"\\" + open String - fun read offset = + fun read prevIsSlash offset acc = + let + val c = sub (contents, offset) + in if offset = size contents then raise LineWithoutNl - else if sub (contents, offset) = #"\n" then - (String.extract (contents, off, SOME $ offset - off), offset + 1) + else if c = #"\n" then + if prevIsSlash then + read (c = #"\\") (offset + 1) (#" " :: tl acc) + else + (implode $ rev acc, offset + 1) else - read (offset + 1) + read (c = #"\\") (offset + 1) (c :: acc) + end - val (arg, newOffset) = read off + val (arg, newOffset) = read prevIsSlash off [] in (arg, { stack = (fid, newOffset, contents) :: rest, allFiles }) end @@ -356,7 +456,7 @@ fun isPrintable c = Char.isPrint c andalso c <> #" " (* FSM for parsing symbols *) -val maxStates = 40 +val maxStates = 51 fun fsmInsert (nextState, buf) curState tk (c :: cs) = let @@ -488,7 +588,7 @@ let _ => (NONE, stream) in case c of - NONE => (#1 $ sub (#2 fsmTable, curState), stream) + NONE => (#1 $ sub (#2 fsmTable, curState), stream) | SOME c => let val (tk, row) = sub (#2 fsmTable, curState) @@ -542,15 +642,18 @@ let val stream = ungetc stream val P as (_, startOff) = getPos stream - val startPos = getPposFromPos P stream - fun parse' stream acc = let val (c, stream) = (fn (c, s) => (SOME c, s)) $ getchar stream handle _ => (NONE, stream) val (acc, tk, stream) = parser acc (stream, startOff) c handle TkError (TkiDx dx, msg) => raise tkError2aug stream (dx, msg) - | TkError (TkiStart, msg) => raise TkErrorAug (startPos, msg) + | TkError (TkiStart, msg) => + let + val startPos = getPposFromPos P stream + in + raise TkErrorAug (startPos, msg) + end | TkError (TkiEOF, msg) => let val (file, line, _) = getPposFromPos P stream @@ -559,7 +662,7 @@ let end in case tk of - NONE => parse' stream acc + NONE => parse' stream acc | _ => (valOf tk, stream) end @@ -618,6 +721,7 @@ in SOME (tk, _) => tk | NONE => raise ExpectedCppDir end + | formCppDir kwElse = CppElse | formCppDir _ = raise ExpectedCppDir fun numParser () (stream, startOff) c = @@ -678,7 +782,7 @@ fun seqParser mode SeqInit (stream, _) (SOME c) = if c <> seqBound mode then let val (c, stream) = - if c <> #"\\" then (c, stream) else eatEscSeq stream + if c <> #"\\" then (c, stream) else eatEscSeq stream in (SeqValue (ord c), NONE, stream) end @@ -687,17 +791,25 @@ fun seqParser mode SeqInit (stream, _) (SOME c) = else raise seqExnConv SpmChr $ TkError (TkiDx 0, "expected value after %") | seqParser mode (SeqValue v) (stream, startOff) (SOME c) = - let - fun term s v = - if mode = SpmChr then CharConst (s, v) else StringConst $ stringCut s - in if seqBound mode = c then - (SeqTerm, SOME $ term (finishSeqRead startOff stream) v, stream) + let + fun term s v = + if mode = SpmChr then + CharConst (s, v) + else + StringConst $ stringCut s + in + (SeqTerm, SOME $ term (finishSeqRead startOff stream) v, stream) + end else if mode = SpmStr then - (SeqValue v, NONE, stream) + let + val (_, stream) = + if c <> #"\\" then (c, stream) else eatEscSeq stream + in + (SeqValue v, NONE, stream) + end else raise seqExnConv SpmChr $ TkError (TkiDx 0, "expected % after value") - end | seqParser _ SeqTerm _ (SOME _) = raise Unreachable | seqParser mode state (_, _) NONE = @@ -772,7 +884,9 @@ end fun postprocessCppDir tk tkl stream = let - val isCppDir = (fn Hash => true | _ => false) (#2 $ hd tkl) + val isCppDir = + (fn Hash => true | _ => false) (#2 $ hd tkl) + andalso isFirstOnLine (hd tkl) stream handle Empty => false val (pos, tk') = tk @@ -807,7 +921,60 @@ in raise TkErrorAug (pos, "unexpected character " ^ repr) end -fun tokenize stream tkl = +fun skipComment stream pos = +let + fun skip prevIsAsterisk stream = + let + val (c, stream) = getchar stream + in + if prevIsAsterisk andalso c = #"/" then + stream + else + skip (c = #"*") stream + end +in + skip false stream handle + EndOfFile => + let + val pos = getPposFromPos pos stream + in + raise TkErrorAug (pos, "unfinished comment") + end +end + +fun handleBackslash stream = +let + val (c, stream) = (fn (c, s) => (SOME c, s)) $ getchar stream handle + _ => (NONE, stream) + + val raiseErr = fn () => + let + val pos = getPosAfterCharRead stream + val pos = getPposFromPos pos stream + in + raise TkErrorAug (pos, "expected \\n after backslash") + end +in + case c of + SOME c => + if c = #"\n" then + stream + else + raiseErr () + | NONE => raiseErr () +end + +fun processSymbol stream tkl = +let + val (T as (p as (fid, off), tk), stream) = fsmEat stream +in + case tk of + CommentStart => tokenize (skipComment stream p) tkl + | DoubleDot => tokenize stream (((fid, off + 1), Dot) :: (p, Dot) :: tkl) + | _ => tokenize stream (T :: tkl) +end + +and tokenize stream tkl = let fun getcharSkipEof stream = getchar stream handle EndOfFile => getcharSkipEof (advanceToNewFile stream) @@ -838,7 +1005,11 @@ in else if c = #"\"" then @-> strParser SeqInit else if isStartForFsm c then - cont $ fsmEat stream + processSymbol stream tkl + else if c = #"\\" then + tokenize (handleBackslash stream) tkl + else if c = #"\f" then + tokenize stream tkl else unexpectedCharRaise stream c end |