diff options
Diffstat (limited to 'cpp.sml')
-rw-r--r-- | cpp.sml | 330 |
1 files changed, 282 insertions, 48 deletions
@@ -3,6 +3,8 @@ infixr 0 $ fun printLn s = (print s; print "\n") +datatype includeArg = IARel of string | IAFromRef of string + datatype token = Invalid | Number of string | @@ -57,12 +59,85 @@ datatype token = Hash | DoubleHash | - CppInclude | + CppInclude of includeArg | CppDefine val kwPrefix = #"@" val cppPrefix = #"$" +type fileId = int +type fileOffset = int +type pos = fileId * fileOffset +type convPos = string * int * int option +type fullToken = pos * token + +exception TkError of int * string +exception TkErrorAug of convPos * string (* main exception *) + +exception ExpectedCppDir (* handled in postprocess *) + +(* handled in tokenizer *) +exception EndOfStream +exception EndOfFile + +(* Unreachable (should be) *) +exception Unreachable +exception FsmTableIsTooSmall +exception InvalidStream +exception InvalidStreamAdvance +exception TokenWithoutRepr + +(* handled in readIncludeArg *) +exception LineWithoutNl + +fun pos2str (pos, line, col) = +let + val % = Int.toString +in + case col of + SOME col => pos ^ ":" ^ %line ^ ":" ^ %col + | NONE => pos ^ ":" ^ %line +end +fun eprint s = printLn $ "error: " ^ s + +fun otherExn e = +let + val hist = MLton.Exn.history e +in + eprint $ "exception " ^ exnMessage e ^ " was raised"; + if hist = [] then + (printLn "No stack trace is avaliable"; + printLn "Recompile with -const \"Exn.keepHistory true\"") + else + List.app (fn x => printLn $ "\t" ^ x) hist +end + +fun exit code = Posix.Process.exit $ Word8.fromInt code + +fun ioExn (IO.Io { name, function = _, cause }) = +let + open OS + val prefix = name ^ ": " + val reason = + case cause of + SysErr (str, _) => str + | _ => exnMessage cause +in + printLn $ prefix ^ reason +end + | ioExn _ = (printLn "ioExn: unreachable"; exit 254) + +fun globalExnHandler e = + (case e of + FsmTableIsTooSmall => + eprint "fsm table is too small. Increate 'maxState' value" + | IO.Io _ => ioExn e + | TkErrorAug (pos, msg) => eprint $ pos2str pos ^ ": " ^ msg + | _ => otherExn e; + exit 255) + +val () = MLton.Exn.setTopLevelHandler globalExnHandler + val tokenRepr = let fun & repr = str kwPrefix ^ repr @@ -117,27 +192,28 @@ in (Hash, "#"), (DoubleHash, "##"), - (CppInclude, % "include"), (CppDefine, % "define") ] end -exception TokenWithoutRepr - val printToken = fn - (Number s) => printLn $ "Num: " ^ s - | (Id s) => printLn $ "Id: " ^ s - | (CharConst (repr, _)) => printLn repr + Number s => printLn $ "Num: " ^ s + | Id s => printLn $ "Id: " ^ s + | CharConst (repr, _) => printLn repr + | CppInclude arg => + let + val (start, end', arg) = + case arg of + IARel v => ("\"", "\"", v) + | IAFromRef v => ("<", ">", v) + in + printLn $ (str cppPrefix) ^ "include " ^ start ^ arg ^ end' + end | v => case List.find (fn (x, _) => x = v) tokenRepr of SOME (_, repr) => printLn repr | NONE => raise TokenWithoutRepr -type fileId = int -type fileOffset = int -type pos = fileId * fileOffset -type fullToken = pos * token - type stream = { (* list of file ids, file names and file contents *) allFiles: (fileId * string * string) list, @@ -145,11 +221,6 @@ type stream = { stack: (fileId * fileOffset * string) list } -exception EndOfStream -exception EndOfFile -exception InvalidStream -exception InvalidStreamAdvance - fun calcFilePos s offset = let fun calc s cur offset (line, col) = @@ -207,6 +278,23 @@ fun ungetc { stack = (id, off - 1, contents) :: rest, allFiles } | ungetc _ = raise InvalidStream +fun readline { stack = (fid, off, contents) :: rest, allFiles } = + let + open String + fun read offset = + if offset = size contents then + raise LineWithoutNl + else if sub (contents, offset) = #"\n" then + (String.extract (contents, off, SOME $ offset - off), offset + 1) + else + read (offset + 1) + + val (arg, newOffset) = read off + in + (arg, { stack = (fid, newOffset, contents) :: rest, allFiles }) + end + | readline _ = raise InvalidStream + fun getOffset ({ stack = (_, off, _) :: _, ... }: stream) = off | getOffset _ = raise InvalidStream @@ -214,6 +302,20 @@ fun getPosAfterCharRead ({ stack = (id, off, _) :: _, ... }: stream) = (id, off - 1) | getPosAfterCharRead _ = raise InvalidStream +fun getPposFromPos (id, pos) + { stack = (_, _, _) :: _, allFiles} = +let + val (fname, contents) = + case List.find (fn (fid, _, _) => fid = id) allFiles of + NONE => raise InvalidStream + | SOME (_, fname, contents) => (fname, contents) + + val (line, col) = calcFilePos contents pos +in + (fname, line, SOME col) +end + | getPposFromPos _ _ = raise InvalidStream + fun getPos ({ stack = (id, off, _) :: _, ... }: stream) = (id, off) | getPos _ = raise InvalidStream @@ -245,12 +347,11 @@ fun isIdStart c = c = #"_" orelse isLetter c fun isDigit c = ord c >= ord #"0" andalso ord c <= ord #"9" fun isIdBody c = isIdStart c orelse isDigit c -(* FSM for parsing symbols *) +fun isPrintable c = Char.isPrint c andalso c <> #" " -val maxStates = 16 +(* FSM for parsing symbols *) -exception FsmTableIsTooSmall -exception FsmTableIsTooLarge of int +val maxStates = 40 fun fsmInsert (nextState, buf) curState tk (c :: cs) = let @@ -308,10 +409,10 @@ let val r = ref 1 fun filterNeeded [] acc = acc - | filterNeeded ((T as (tk, repr)) :: tks) acc = + | filterNeeded ((T as (_, repr)) :: tks) acc = filterNeeded tks (if isStartForFsm $ String.sub (repr, 0) then - (printToken tk; T :: acc) + T :: acc else acc) @@ -327,11 +428,47 @@ in List.app (fn (v, p) => fsmInsert' T 0 v $ explode p) tokenRepr; if !nextState <> maxStates then - raise FsmTableIsTooLarge $ !nextState + printLn $ "note: Fsm table size can be smaller: " + ^ Int.toString (!nextState) ^ " is enough" else (); T end +(* Unused right now +fun printTable (nextState, buf) = +let + fun printRow i row = + if i = length row then + print "\n" + else + let + val state = sub (row, i) + in + if state = ~1 then + () + else + print ((str (chr i)) ^ ": " ^ (Int.toString state) ^ ", "); + printRow (i + 1) row + end + + fun print' rowNum buf = + if rowNum = !nextState then + () + else + let + val (tk, row) = sub (buf, rowNum) + in + print ((token2string tk) ^ ": "); + printRow 0 row; + print' (rowNum + 1) buf + end +in + print ("NextState: " ^ Int.toString (!nextState) ^ "\n"); + print' 0 buf; + print "\n" +end +*) + val fsmTable = fsmTableCreate () fun fsmEat stream = @@ -362,8 +499,6 @@ in (fn (tk, stream) => ((pos, tk), stream)) $ get 0 stream end -exception UnexpectedCharacter of char - fun isFirstOnLine (tk: fullToken) (stream: stream) = let val (id, offset) = #1 tk @@ -389,6 +524,15 @@ in end end +fun tkError2aug stream (TkError (dx, msg)) = +let + val (id, pos) = getPosAfterCharRead stream + val pos = getPposFromPos (id, pos + dx) stream +in + TkErrorAug (pos, msg) +end + | tkError2aug _ _ = raise Unreachable + fun parseGeneric stream parser acc = let val stream = ungetc stream @@ -398,7 +542,8 @@ let val (c, stream) = (fn (c, s) => (SOME c, s)) $ getchar stream handle _ => (NONE, stream) - val (acc, tk, stream) = parser acc (stream, startOff) c + val (acc, tk, stream) = parser acc (stream, startOff) c handle + e as TkError _ => raise tkError2aug stream e in case tk of NONE => parse' stream acc @@ -447,12 +592,9 @@ in finalize (ungetc stream) end -val formCppDirExn = Fail $ "Expected cpp direcive\n" - -fun formCppDir (Id s) tkl = +fun formCppDir (Id s) = let open String - fun conv tk = ((#1 $ hd tkl, tk) :: tl tkl) in case List.find (fn (_, repr) => @@ -460,10 +602,10 @@ in extract (repr, 1, NONE) = s) tokenRepr of - SOME (tk, _) => conv tk - | NONE => raise formCppDirExn + SOME (tk, _) => tk + | NONE => raise ExpectedCppDir end - | formCppDir _ _ = raise formCppDirExn + | formCppDir _ = raise ExpectedCppDir fun numParser () (stream, startOff) c = let @@ -479,12 +621,10 @@ in finalize (ungetc stream) end -exception IncompleteEsqSeq - fun eatEscSeq stream = let val (c, stream) = getchar stream handle - _ => raise IncompleteEsqSeq + _ => raise TkError (0, "unfinished escape sequence") in (case c of #"\\" => #"\\" @@ -495,12 +635,11 @@ in end datatype CharParseState = ChInit | ChStart | ChValue of int | ChTerm -exception CharConstExn of string -val chExnExpValue = CharConstExn "expected value after '" -val chExnExpTerm = CharConstExn "expected ' after value" -val chExnStart = CharConstExn "can not start parsing (unreachable)" -val chExnAfterTerm = CharConstExn "can not finish parsing (unreachable)" +val chExnExpValue = TkError (0, "expected value after '") +val chExnExpTerm = TkError (0, "expected ' after value") +val chExnStart = Unreachable +val chExnAfterTerm = Unreachable fun charParser ChInit (stream, _) (SOME c) = if c = #"'" then @@ -532,6 +671,103 @@ fun charParser ChInit (stream, _) (SOME c) = | ChValue _ => chExnExpTerm | ChTerm => chExnAfterTerm +fun readIncludeArg stream = +let + open String + + fun triml s idx = + if idx = size s then + "" + else if isSpace $ sub (s, idx) then + triml s (idx + 1) + else + extract (s, idx, NONE) + + fun trimr s idx = + if idx = 0 then + "" + else if isSpace $ sub (s, idx) then + trimr s (idx - 1) + else + extract (s, 0, SOME $ idx + 1) + + fun trim s = triml (trimr s (size s - 1)) 0 + + fun getLinePos () = + let + val (fname, line, _) = getPposFromPos (getPos stream) stream + in + (fname, line, NONE) + end + + fun determineType s = + let + fun --> msg = raise TkErrorAug (getLinePos (), msg) + fun isLast c = sub (s, size s - 1) = c + fun cut () = extract (s, 1, SOME $ size s - 2) + in + if s = "" then + --> "#include argument is empty" + else + case sub (s, 0) of + #"<" => + if isLast #">" then + IAFromRef $ cut () + else + --> "expected > at #include argument end" + | #"\"" => + if isLast #"\"" then + IARel $ cut () + else + --> "expected \" at #include argument end" + | _ => --> "#include argument should start with \" or <" + end + + val (arg, stream) = readline stream handle + LineWithoutNl => + raise TkErrorAug (getLinePos (), + "#include line does not end with \\n") +in + (determineType $ trim arg, stream) +end + +fun postprocessCppDir tk tkl stream = +let + val isCppDir = (fn Hash => true | _ => false) (#2 $ hd tkl) + handle Empty => false + val (pos, tk') = tk + + fun conv tk = ((#1 $ hd tkl, tk) :: tl tkl) +in + if isCppDir andalso tk' = Id "include" then + let + val (arg, stream) = readIncludeArg stream + in + (conv $ CppInclude arg, stream) + end + else if isCppDir then + (conv $ formCppDir tk', stream) handle + ExpectedCppDir => + raise TkErrorAug (getPposFromPos pos stream, + "expected preprocessor directive") + + else + (tk :: tkl, stream) +end + +fun unexpectedCharRaise stream c = +let + val (id, pos) = getPosAfterCharRead stream + val pos = getPposFromPos (id, pos) stream + val repr = + if isPrintable c then + str c + else + "<" ^ Int.toString (ord c) ^ ">" +in + raise TkErrorAug (pos, "unexpected character " ^ repr) +end + fun tokenize stream tkl = let fun getcharSkipEof stream = getchar stream handle @@ -545,18 +781,16 @@ let fun @-> parser acc = cont $ parseGeneric stream parser acc in case c of - NONE => (rev tkl, #allFiles stream) + NONE => (rev tkl, #allFiles stream) | SOME c => if isSpace c then tokenize stream tkl else if isIdStart c then let - val isCppDir = (fn Hash => true | _ => false) (#2 $ hd tkl) - handle Empty => false val (tk, stream) = parseGeneric stream idParser () + val (tkl, stream) = postprocessCppDir tk tkl stream in - tokenize stream - (if isCppDir then (formCppDir (#2 tk) tkl) else (tk :: tkl)) + tokenize stream tkl end else if isDigit c then @-> numParser () @@ -565,7 +799,7 @@ in else if isStartForFsm c then cont $ fsmEat stream else - raise UnexpectedCharacter c + unexpectedCharRaise stream c end fun main [fname] = |