summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore3
-rw-r--r--Makefile4
-rw-r--r--cpp.sml253
3 files changed, 217 insertions, 43 deletions
diff --git a/.gitignore b/.gitignore
index 57e20f1..678840a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
-test
+test*
cpp
todo*
+mlmon.out*
diff --git a/Makefile b/Makefile
index 1db9e55..e18503d 100644
--- a/Makefile
+++ b/Makefile
@@ -1,2 +1,4 @@
+history := -const "Exn.keepHistory true"
+
def:
- mlton -const "Exn.keepHistory true" cpp.mlb
+ mlton $(history) cpp.mlb
diff --git a/cpp.sml b/cpp.sml
index 3d3bd9f..cb04406 100644
--- a/cpp.sml
+++ b/cpp.sml
@@ -40,28 +40,73 @@ datatype token =
kwVoid |
kwVolatile |
- OParen |
- CParen |
+ LParen |
+ RParen |
+ LBracket |
+ RBracket |
+ LBrace |
+ RBrace |
+ QuestionMark |
+ Colon |
+ Coma |
+ Semicolon |
+
+ Arrow |
Plus |
DoublePlus|
Minus |
DoubleMinus |
-
- Semicolon |
+ Ampersand |
+ Asterisk |
+ Slash |
+ Tilde |
+ ExclMark |
+ Percent |
+ DoubleGreater |
+ DoubleLess |
+ Greater |
+ Less |
EqualSign |
+ LessEqualSign |
+ GreaterEqualSign |
DoubleEqualSign |
- ExclMark |
ExclMarkEqualSign |
-
- QuestionMark |
- Colon |
+ Cap |
+ VerticalBar |
+ DoubleAmpersand |
+ DoubleVerticalBar |
+
+ AsteriskEqualSign |
+ SlashEqualSign |
+ PercentEqualSign |
+ PlusEqualSign |
+ MinusEqualSign |
+ DoubleLessEqualSign |
+ DoubleGreaterEqualSign |
+ AmpersandEqualSign |
+ CapEqualSign |
+ VerticalBarEqualSign |
Hash |
DoubleHash |
+ Dot |
+ DoubleDot |
+ TripleDot |
+
+ CommentStart |
+
CppInclude of includeArg |
- CppDefine
+ CppDefine |
+ CppUndef |
+ CppIf |
+ CppIfdef |
+ CppIfndef |
+ CppElse |
+ CppElif |
+ CppEndif |
+ CppPragma
val kwPrefix = #"@"
val cppPrefix = #"$"
@@ -147,9 +192,6 @@ let
fun % repr = str cppPrefix ^ repr
in
[
- (OParen, "("),
- (CParen, ")"),
-
(kwBreak, &"break"),
(kwCase, &"case"),
(kwChar, &"char"),
@@ -178,24 +220,72 @@ in
(kwVoid, &"void"),
(kwVolatile, &"volatile"),
+ (LParen, "("),
+ (RParen, ")"),
+ (LBracket, "["),
+ (RBracket, "]"),
+ (LBrace, "{"),
+ (RBrace, "}"),
+
+ (QuestionMark, "?"),
+ (Colon, ":"),
+ (Coma, ","),
+ (Semicolon, ";"),
+
+ (Arrow, "->"),
(Plus, "+"),
(DoublePlus, "++"),
(Minus, "-"),
(DoubleMinus, "--"),
-
- (Semicolon, ";"),
+ (Ampersand, "&"),
+ (Asterisk, "*"),
+ (Slash, "/"),
+ (Tilde, "~"),
+ (ExclMark, "!"),
+ (Percent, "%"),
+ (DoubleLess, "<<"),
+ (DoubleGreater, ">>"),
+ (Less, "<"),
+ (Greater, ">"),
(EqualSign, "="),
+ (LessEqualSign, "<="),
+ (GreaterEqualSign, ">="),
(DoubleEqualSign, "=="),
- (ExclMark, "!"),
(ExclMarkEqualSign, "!="),
-
- (QuestionMark, "?"),
- (Colon, ":"),
+ (Cap, "^"),
+ (VerticalBar, "|"),
+ (DoubleAmpersand, "&&"),
+ (DoubleVerticalBar, "||"),
+
+ (AsteriskEqualSign, "*="),
+ (SlashEqualSign, "/="),
+ (PercentEqualSign, "%="),
+ (PlusEqualSign, "+="),
+ (MinusEqualSign, "-="),
+ (DoubleLessEqualSign, "<<="),
+ (DoubleGreaterEqualSign, ">>="),
+ (AmpersandEqualSign, "&="),
+ (CapEqualSign, "^="),
+ (VerticalBarEqualSign, "|="),
(Hash, "#"),
(DoubleHash, "##"),
- (CppDefine, % "define")
+ (Dot, "."),
+ (DoubleDot, ".."),
+ (TripleDot, "..."),
+
+ (CommentStart, "/*"),
+
+ (CppDefine, %"define"),
+ (CppUndef, %"undef"),
+ (CppIf, %"if"),
+ (CppIfdef, %"ifdef"),
+ (CppIfndef, %"ifndef"),
+ (CppElse, %"else"),
+ (CppElif, %"elif"),
+ (CppEndif, %"endif"),
+ (CppPragma, %"pragma")
]
end
@@ -285,16 +375,26 @@ fun ungetc
fun readline { stack = (fid, off, contents) :: rest, allFiles } =
let
+ val prevIsSlash =
+ off > 0 andalso String.sub (contents, off - 1) = #"\\"
+
open String
- fun read offset =
+ fun read prevIsSlash offset acc =
+ let
+ val c = sub (contents, offset)
+ in
if offset = size contents then
raise LineWithoutNl
- else if sub (contents, offset) = #"\n" then
- (String.extract (contents, off, SOME $ offset - off), offset + 1)
+ else if c = #"\n" then
+ if prevIsSlash then
+ read (c = #"\\") (offset + 1) (#" " :: tl acc)
+ else
+ (implode $ rev acc, offset + 1)
else
- read (offset + 1)
+ read (c = #"\\") (offset + 1) (c :: acc)
+ end
- val (arg, newOffset) = read off
+ val (arg, newOffset) = read prevIsSlash off []
in
(arg, { stack = (fid, newOffset, contents) :: rest, allFiles })
end
@@ -356,7 +456,7 @@ fun isPrintable c = Char.isPrint c andalso c <> #" "
(* FSM for parsing symbols *)
-val maxStates = 40
+val maxStates = 51
fun fsmInsert (nextState, buf) curState tk (c :: cs) =
let
@@ -488,7 +588,7 @@ let
_ => (NONE, stream)
in
case c of
- NONE => (#1 $ sub (#2 fsmTable, curState), stream)
+ NONE => (#1 $ sub (#2 fsmTable, curState), stream)
| SOME c =>
let
val (tk, row) = sub (#2 fsmTable, curState)
@@ -542,15 +642,18 @@ let
val stream = ungetc stream
val P as (_, startOff) = getPos stream
- val startPos = getPposFromPos P stream
-
fun parse' stream acc = let
val (c, stream) = (fn (c, s) => (SOME c, s)) $ getchar stream handle
_ => (NONE, stream)
val (acc, tk, stream) = parser acc (stream, startOff) c handle
TkError (TkiDx dx, msg) => raise tkError2aug stream (dx, msg)
- | TkError (TkiStart, msg) => raise TkErrorAug (startPos, msg)
+ | TkError (TkiStart, msg) =>
+ let
+ val startPos = getPposFromPos P stream
+ in
+ raise TkErrorAug (startPos, msg)
+ end
| TkError (TkiEOF, msg) =>
let
val (file, line, _) = getPposFromPos P stream
@@ -559,7 +662,7 @@ let
end
in
case tk of
- NONE => parse' stream acc
+ NONE => parse' stream acc
| _ => (valOf tk, stream)
end
@@ -618,6 +721,7 @@ in
SOME (tk, _) => tk
| NONE => raise ExpectedCppDir
end
+ | formCppDir kwElse = CppElse
| formCppDir _ = raise ExpectedCppDir
fun numParser () (stream, startOff) c =
@@ -678,7 +782,7 @@ fun seqParser mode SeqInit (stream, _) (SOME c) =
if c <> seqBound mode then
let
val (c, stream) =
- if c <> #"\\" then (c, stream) else eatEscSeq stream
+ if c <> #"\\" then (c, stream) else eatEscSeq stream
in
(SeqValue (ord c), NONE, stream)
end
@@ -687,17 +791,25 @@ fun seqParser mode SeqInit (stream, _) (SOME c) =
else
raise seqExnConv SpmChr $ TkError (TkiDx 0, "expected value after %")
| seqParser mode (SeqValue v) (stream, startOff) (SOME c) =
- let
- fun term s v =
- if mode = SpmChr then CharConst (s, v) else StringConst $ stringCut s
- in
if seqBound mode = c then
- (SeqTerm, SOME $ term (finishSeqRead startOff stream) v, stream)
+ let
+ fun term s v =
+ if mode = SpmChr then
+ CharConst (s, v)
+ else
+ StringConst $ stringCut s
+ in
+ (SeqTerm, SOME $ term (finishSeqRead startOff stream) v, stream)
+ end
else if mode = SpmStr then
- (SeqValue v, NONE, stream)
+ let
+ val (_, stream) =
+ if c <> #"\\" then (c, stream) else eatEscSeq stream
+ in
+ (SeqValue v, NONE, stream)
+ end
else
raise seqExnConv SpmChr $ TkError (TkiDx 0, "expected % after value")
- end
| seqParser _ SeqTerm _ (SOME _) =
raise Unreachable
| seqParser mode state (_, _) NONE =
@@ -772,7 +884,9 @@ end
fun postprocessCppDir tk tkl stream =
let
- val isCppDir = (fn Hash => true | _ => false) (#2 $ hd tkl)
+ val isCppDir =
+ (fn Hash => true | _ => false) (#2 $ hd tkl)
+ andalso isFirstOnLine (hd tkl) stream
handle Empty => false
val (pos, tk') = tk
@@ -807,7 +921,60 @@ in
raise TkErrorAug (pos, "unexpected character " ^ repr)
end
-fun tokenize stream tkl =
+fun skipComment stream pos =
+let
+ fun skip prevIsAsterisk stream =
+ let
+ val (c, stream) = getchar stream
+ in
+ if prevIsAsterisk andalso c = #"/" then
+ stream
+ else
+ skip (c = #"*") stream
+ end
+in
+ skip false stream handle
+ EndOfFile =>
+ let
+ val pos = getPposFromPos pos stream
+ in
+ raise TkErrorAug (pos, "unfinished comment")
+ end
+end
+
+fun handleBackslash stream =
+let
+ val (c, stream) = (fn (c, s) => (SOME c, s)) $ getchar stream handle
+ _ => (NONE, stream)
+
+ val raiseErr = fn () =>
+ let
+ val pos = getPosAfterCharRead stream
+ val pos = getPposFromPos pos stream
+ in
+ raise TkErrorAug (pos, "expected \\n after backslash")
+ end
+in
+ case c of
+ SOME c =>
+ if c = #"\n" then
+ stream
+ else
+ raiseErr ()
+ | NONE => raiseErr ()
+end
+
+fun processSymbol stream tkl =
+let
+ val (T as (p as (fid, off), tk), stream) = fsmEat stream
+in
+ case tk of
+ CommentStart => tokenize (skipComment stream p) tkl
+ | DoubleDot => tokenize stream (((fid, off + 1), Dot) :: (p, Dot) :: tkl)
+ | _ => tokenize stream (T :: tkl)
+end
+
+and tokenize stream tkl =
let
fun getcharSkipEof stream = getchar stream handle
EndOfFile => getcharSkipEof (advanceToNewFile stream)
@@ -838,7 +1005,11 @@ in
else if c = #"\"" then
@-> strParser SeqInit
else if isStartForFsm c then
- cont $ fsmEat stream
+ processSymbol stream tkl
+ else if c = #"\\" then
+ tokenize (handleBackslash stream) tkl
+ else if c = #"\f" then
+ tokenize stream tkl
else
unexpectedCharRaise stream c
end