summaryrefslogtreecommitdiff
path: root/cpp.sml
diff options
context:
space:
mode:
Diffstat (limited to 'cpp.sml')
-rw-r--r--cpp.sml330
1 files changed, 282 insertions, 48 deletions
diff --git a/cpp.sml b/cpp.sml
index 1050c03..e08a41d 100644
--- a/cpp.sml
+++ b/cpp.sml
@@ -3,6 +3,8 @@ infixr 0 $
fun printLn s = (print s; print "\n")
+datatype includeArg = IARel of string | IAFromRef of string
+
datatype token =
Invalid |
Number of string |
@@ -57,12 +59,85 @@ datatype token =
Hash |
DoubleHash |
- CppInclude |
+ CppInclude of includeArg |
CppDefine
val kwPrefix = #"@"
val cppPrefix = #"$"
+type fileId = int
+type fileOffset = int
+type pos = fileId * fileOffset
+type convPos = string * int * int option
+type fullToken = pos * token
+
+exception TkError of int * string
+exception TkErrorAug of convPos * string (* main exception *)
+
+exception ExpectedCppDir (* handled in postprocess *)
+
+(* handled in tokenizer *)
+exception EndOfStream
+exception EndOfFile
+
+(* Unreachable (should be) *)
+exception Unreachable
+exception FsmTableIsTooSmall
+exception InvalidStream
+exception InvalidStreamAdvance
+exception TokenWithoutRepr
+
+(* handled in readIncludeArg *)
+exception LineWithoutNl
+
+fun pos2str (pos, line, col) =
+let
+ val % = Int.toString
+in
+ case col of
+ SOME col => pos ^ ":" ^ %line ^ ":" ^ %col
+ | NONE => pos ^ ":" ^ %line
+end
+fun eprint s = printLn $ "error: " ^ s
+
+fun otherExn e =
+let
+ val hist = MLton.Exn.history e
+in
+ eprint $ "exception " ^ exnMessage e ^ " was raised";
+ if hist = [] then
+ (printLn "No stack trace is avaliable";
+ printLn "Recompile with -const \"Exn.keepHistory true\"")
+ else
+ List.app (fn x => printLn $ "\t" ^ x) hist
+end
+
+fun exit code = Posix.Process.exit $ Word8.fromInt code
+
+fun ioExn (IO.Io { name, function = _, cause }) =
+let
+ open OS
+ val prefix = name ^ ": "
+ val reason =
+ case cause of
+ SysErr (str, _) => str
+ | _ => exnMessage cause
+in
+ printLn $ prefix ^ reason
+end
+ | ioExn _ = (printLn "ioExn: unreachable"; exit 254)
+
+fun globalExnHandler e =
+ (case e of
+ FsmTableIsTooSmall =>
+ eprint "fsm table is too small. Increate 'maxState' value"
+ | IO.Io _ => ioExn e
+ | TkErrorAug (pos, msg) => eprint $ pos2str pos ^ ": " ^ msg
+ | _ => otherExn e;
+ exit 255)
+
+val () = MLton.Exn.setTopLevelHandler globalExnHandler
+
val tokenRepr =
let
fun & repr = str kwPrefix ^ repr
@@ -117,27 +192,28 @@ in
(Hash, "#"),
(DoubleHash, "##"),
- (CppInclude, % "include"),
(CppDefine, % "define")
]
end
-exception TokenWithoutRepr
-
val printToken = fn
- (Number s) => printLn $ "Num: " ^ s
- | (Id s) => printLn $ "Id: " ^ s
- | (CharConst (repr, _)) => printLn repr
+ Number s => printLn $ "Num: " ^ s
+ | Id s => printLn $ "Id: " ^ s
+ | CharConst (repr, _) => printLn repr
+ | CppInclude arg =>
+ let
+ val (start, end', arg) =
+ case arg of
+ IARel v => ("\"", "\"", v)
+ | IAFromRef v => ("<", ">", v)
+ in
+ printLn $ (str cppPrefix) ^ "include " ^ start ^ arg ^ end'
+ end
| v =>
case List.find (fn (x, _) => x = v) tokenRepr of
SOME (_, repr) => printLn repr
| NONE => raise TokenWithoutRepr
-type fileId = int
-type fileOffset = int
-type pos = fileId * fileOffset
-type fullToken = pos * token
-
type stream = {
(* list of file ids, file names and file contents *)
allFiles: (fileId * string * string) list,
@@ -145,11 +221,6 @@ type stream = {
stack: (fileId * fileOffset * string) list
}
-exception EndOfStream
-exception EndOfFile
-exception InvalidStream
-exception InvalidStreamAdvance
-
fun calcFilePos s offset =
let
fun calc s cur offset (line, col) =
@@ -207,6 +278,23 @@ fun ungetc
{ stack = (id, off - 1, contents) :: rest, allFiles }
| ungetc _ = raise InvalidStream
+fun readline { stack = (fid, off, contents) :: rest, allFiles } =
+ let
+ open String
+ fun read offset =
+ if offset = size contents then
+ raise LineWithoutNl
+ else if sub (contents, offset) = #"\n" then
+ (String.extract (contents, off, SOME $ offset - off), offset + 1)
+ else
+ read (offset + 1)
+
+ val (arg, newOffset) = read off
+ in
+ (arg, { stack = (fid, newOffset, contents) :: rest, allFiles })
+ end
+ | readline _ = raise InvalidStream
+
fun getOffset ({ stack = (_, off, _) :: _, ... }: stream) = off
| getOffset _ = raise InvalidStream
@@ -214,6 +302,20 @@ fun getPosAfterCharRead
({ stack = (id, off, _) :: _, ... }: stream) = (id, off - 1)
| getPosAfterCharRead _ = raise InvalidStream
+fun getPposFromPos (id, pos)
+ { stack = (_, _, _) :: _, allFiles} =
+let
+ val (fname, contents) =
+ case List.find (fn (fid, _, _) => fid = id) allFiles of
+ NONE => raise InvalidStream
+ | SOME (_, fname, contents) => (fname, contents)
+
+ val (line, col) = calcFilePos contents pos
+in
+ (fname, line, SOME col)
+end
+ | getPposFromPos _ _ = raise InvalidStream
+
fun getPos ({ stack = (id, off, _) :: _, ... }: stream) = (id, off)
| getPos _ = raise InvalidStream
@@ -245,12 +347,11 @@ fun isIdStart c = c = #"_" orelse isLetter c
fun isDigit c = ord c >= ord #"0" andalso ord c <= ord #"9"
fun isIdBody c = isIdStart c orelse isDigit c
-(* FSM for parsing symbols *)
+fun isPrintable c = Char.isPrint c andalso c <> #" "
-val maxStates = 16
+(* FSM for parsing symbols *)
-exception FsmTableIsTooSmall
-exception FsmTableIsTooLarge of int
+val maxStates = 40
fun fsmInsert (nextState, buf) curState tk (c :: cs) =
let
@@ -308,10 +409,10 @@ let
val r = ref 1
fun filterNeeded [] acc = acc
- | filterNeeded ((T as (tk, repr)) :: tks) acc =
+ | filterNeeded ((T as (_, repr)) :: tks) acc =
filterNeeded tks
(if isStartForFsm $ String.sub (repr, 0) then
- (printToken tk; T :: acc)
+ T :: acc
else
acc)
@@ -327,11 +428,47 @@ in
List.app (fn (v, p) => fsmInsert' T 0 v $ explode p) tokenRepr;
if !nextState <> maxStates then
- raise FsmTableIsTooLarge $ !nextState
+ printLn $ "note: Fsm table size can be smaller: "
+ ^ Int.toString (!nextState) ^ " is enough"
else ();
T
end
+(* Unused right now
+fun printTable (nextState, buf) =
+let
+ fun printRow i row =
+ if i = length row then
+ print "\n"
+ else
+ let
+ val state = sub (row, i)
+ in
+ if state = ~1 then
+ ()
+ else
+ print ((str (chr i)) ^ ": " ^ (Int.toString state) ^ ", ");
+ printRow (i + 1) row
+ end
+
+ fun print' rowNum buf =
+ if rowNum = !nextState then
+ ()
+ else
+ let
+ val (tk, row) = sub (buf, rowNum)
+ in
+ print ((token2string tk) ^ ": ");
+ printRow 0 row;
+ print' (rowNum + 1) buf
+ end
+in
+ print ("NextState: " ^ Int.toString (!nextState) ^ "\n");
+ print' 0 buf;
+ print "\n"
+end
+*)
+
val fsmTable = fsmTableCreate ()
fun fsmEat stream =
@@ -362,8 +499,6 @@ in
(fn (tk, stream) => ((pos, tk), stream)) $ get 0 stream
end
-exception UnexpectedCharacter of char
-
fun isFirstOnLine (tk: fullToken) (stream: stream) =
let
val (id, offset) = #1 tk
@@ -389,6 +524,15 @@ in
end
end
+fun tkError2aug stream (TkError (dx, msg)) =
+let
+ val (id, pos) = getPosAfterCharRead stream
+ val pos = getPposFromPos (id, pos + dx) stream
+in
+ TkErrorAug (pos, msg)
+end
+ | tkError2aug _ _ = raise Unreachable
+
fun parseGeneric stream parser acc =
let
val stream = ungetc stream
@@ -398,7 +542,8 @@ let
val (c, stream) = (fn (c, s) => (SOME c, s)) $ getchar stream handle
_ => (NONE, stream)
- val (acc, tk, stream) = parser acc (stream, startOff) c
+ val (acc, tk, stream) = parser acc (stream, startOff) c handle
+ e as TkError _ => raise tkError2aug stream e
in
case tk of
NONE => parse' stream acc
@@ -447,12 +592,9 @@ in
finalize (ungetc stream)
end
-val formCppDirExn = Fail $ "Expected cpp direcive\n"
-
-fun formCppDir (Id s) tkl =
+fun formCppDir (Id s) =
let
open String
- fun conv tk = ((#1 $ hd tkl, tk) :: tl tkl)
in
case List.find
(fn (_, repr) =>
@@ -460,10 +602,10 @@ in
extract (repr, 1, NONE) = s)
tokenRepr
of
- SOME (tk, _) => conv tk
- | NONE => raise formCppDirExn
+ SOME (tk, _) => tk
+ | NONE => raise ExpectedCppDir
end
- | formCppDir _ _ = raise formCppDirExn
+ | formCppDir _ = raise ExpectedCppDir
fun numParser () (stream, startOff) c =
let
@@ -479,12 +621,10 @@ in
finalize (ungetc stream)
end
-exception IncompleteEsqSeq
-
fun eatEscSeq stream =
let
val (c, stream) = getchar stream handle
- _ => raise IncompleteEsqSeq
+ _ => raise TkError (0, "unfinished escape sequence")
in
(case c of
#"\\" => #"\\"
@@ -495,12 +635,11 @@ in
end
datatype CharParseState = ChInit | ChStart | ChValue of int | ChTerm
-exception CharConstExn of string
-val chExnExpValue = CharConstExn "expected value after '"
-val chExnExpTerm = CharConstExn "expected ' after value"
-val chExnStart = CharConstExn "can not start parsing (unreachable)"
-val chExnAfterTerm = CharConstExn "can not finish parsing (unreachable)"
+val chExnExpValue = TkError (0, "expected value after '")
+val chExnExpTerm = TkError (0, "expected ' after value")
+val chExnStart = Unreachable
+val chExnAfterTerm = Unreachable
fun charParser ChInit (stream, _) (SOME c) =
if c = #"'" then
@@ -532,6 +671,103 @@ fun charParser ChInit (stream, _) (SOME c) =
| ChValue _ => chExnExpTerm
| ChTerm => chExnAfterTerm
+fun readIncludeArg stream =
+let
+ open String
+
+ fun triml s idx =
+ if idx = size s then
+ ""
+ else if isSpace $ sub (s, idx) then
+ triml s (idx + 1)
+ else
+ extract (s, idx, NONE)
+
+ fun trimr s idx =
+ if idx = 0 then
+ ""
+ else if isSpace $ sub (s, idx) then
+ trimr s (idx - 1)
+ else
+ extract (s, 0, SOME $ idx + 1)
+
+ fun trim s = triml (trimr s (size s - 1)) 0
+
+ fun getLinePos () =
+ let
+ val (fname, line, _) = getPposFromPos (getPos stream) stream
+ in
+ (fname, line, NONE)
+ end
+
+ fun determineType s =
+ let
+ fun --> msg = raise TkErrorAug (getLinePos (), msg)
+ fun isLast c = sub (s, size s - 1) = c
+ fun cut () = extract (s, 1, SOME $ size s - 2)
+ in
+ if s = "" then
+ --> "#include argument is empty"
+ else
+ case sub (s, 0) of
+ #"<" =>
+ if isLast #">" then
+ IAFromRef $ cut ()
+ else
+ --> "expected > at #include argument end"
+ | #"\"" =>
+ if isLast #"\"" then
+ IARel $ cut ()
+ else
+ --> "expected \" at #include argument end"
+ | _ => --> "#include argument should start with \" or <"
+ end
+
+ val (arg, stream) = readline stream handle
+ LineWithoutNl =>
+ raise TkErrorAug (getLinePos (),
+ "#include line does not end with \\n")
+in
+ (determineType $ trim arg, stream)
+end
+
+fun postprocessCppDir tk tkl stream =
+let
+ val isCppDir = (fn Hash => true | _ => false) (#2 $ hd tkl)
+ handle Empty => false
+ val (pos, tk') = tk
+
+ fun conv tk = ((#1 $ hd tkl, tk) :: tl tkl)
+in
+ if isCppDir andalso tk' = Id "include" then
+ let
+ val (arg, stream) = readIncludeArg stream
+ in
+ (conv $ CppInclude arg, stream)
+ end
+ else if isCppDir then
+ (conv $ formCppDir tk', stream) handle
+ ExpectedCppDir =>
+ raise TkErrorAug (getPposFromPos pos stream,
+ "expected preprocessor directive")
+
+ else
+ (tk :: tkl, stream)
+end
+
+fun unexpectedCharRaise stream c =
+let
+ val (id, pos) = getPosAfterCharRead stream
+ val pos = getPposFromPos (id, pos) stream
+ val repr =
+ if isPrintable c then
+ str c
+ else
+ "<" ^ Int.toString (ord c) ^ ">"
+in
+ raise TkErrorAug (pos, "unexpected character " ^ repr)
+end
+
fun tokenize stream tkl =
let
fun getcharSkipEof stream = getchar stream handle
@@ -545,18 +781,16 @@ let
fun @-> parser acc = cont $ parseGeneric stream parser acc
in
case c of
- NONE => (rev tkl, #allFiles stream)
+ NONE => (rev tkl, #allFiles stream)
| SOME c =>
if isSpace c then
tokenize stream tkl
else if isIdStart c then
let
- val isCppDir = (fn Hash => true | _ => false) (#2 $ hd tkl)
- handle Empty => false
val (tk, stream) = parseGeneric stream idParser ()
+ val (tkl, stream) = postprocessCppDir tk tkl stream
in
- tokenize stream
- (if isCppDir then (formCppDir (#2 tk) tkl) else (tk :: tkl))
+ tokenize stream tkl
end
else if isDigit c then
@-> numParser ()
@@ -565,7 +799,7 @@ in
else if isStartForFsm c then
cont $ fsmEat stream
else
- raise UnexpectedCharacter c
+ unexpectedCharRaise stream c
end
fun main [fname] =