summaryrefslogtreecommitdiff
path: root/cpp.sml
diff options
context:
space:
mode:
Diffstat (limited to 'cpp.sml')
-rw-r--r--cpp.sml226
1 files changed, 226 insertions, 0 deletions
diff --git a/cpp.sml b/cpp.sml
new file mode 100644
index 0000000..ea3b35c
--- /dev/null
+++ b/cpp.sml
@@ -0,0 +1,226 @@
+fun $ (x, y) = x y
+infixr 0 $
+
+fun printLn s = (print s; print "\n")
+
+datatype token =
+ Number of substring |
+ Id of substring |
+ Plus |
+ Semicolon |
+ Hash |
+ CppInclude
+
+fun printToken (Number s) = printLn $ "Number: " ^ Substring.string s
+ | printToken (Id s) = printLn $ "Id: " ^ Substring.string s
+ | printToken Plus = printLn "+"
+ | printToken Semicolon = printLn ";"
+ | printToken Hash = printLn "#"
+ | printToken CppInclude = printLn "$include"
+
+type fileId = int
+type fileOffset = int
+type pos = fileId * fileOffset
+type fullToken = pos * token
+
+type stream = {
+ (* list of file ids, file names and file contents *)
+ allFiles: (fileId * string * string) list,
+ (* stack of file ids, file offsets and file contents *)
+ stack: (fileId * fileOffset * string) list
+}
+
+exception EndOfStream
+exception EndOfFile
+exception InvalidStream
+exception InvalidStreamAdvance
+
+fun calcFilePos s offset =
+let
+ fun calc s cur offset (line, col) =
+ if cur = offset then
+ (line, col)
+ else
+ if String.sub (s, cur) = #"\n" then
+ calc s (cur + 1) offset (line + 1, 1)
+ else
+ calc s (cur + 1) offset (line, col + 1)
+in
+ calc s 0 offset (1, 1)
+end
+
+fun printPos fileList (id, pos) =
+let
+ val triple = List.find (fn (fid, _, _) => fid = id) fileList
+in
+ case triple of
+ NONE => raise InvalidStream
+ | SOME (_, fname, contents) =>
+ let
+ val (line, col) = calcFilePos contents pos
+ val line = Int.toString line
+ val col = Int.toString col
+ in
+ print $ fname ^ ":" ^ line ^ ":" ^ col ^ ": "
+ end
+end
+
+fun readFile fname =
+let
+ open TextIO
+ val h = openIn fname
+ val s = inputAll h
+ val () = closeIn h
+in
+ s
+end
+
+fun getchar
+ (S as { stack = (id, off, contents) :: rest, allFiles }: stream)
+ : (char * stream) =
+ if off < String.size contents then
+ (String.sub (contents, off),
+ { stack = (id, off + 1, contents) :: rest, allFiles })
+ else
+ raise EndOfFile
+ | getchar _ = raise EndOfStream
+
+fun ungetc
+ (S as { stack = (id, off, contents) :: rest, allFiles }) =
+ if off = 0 then
+ raise InvalidStream
+ else
+ { stack = (id, off - 1, contents) :: rest, allFiles }
+ | ungetc _ = raise InvalidStream
+
+fun getOffset ({ stack = (id, off, contents) :: _, ... }: stream) = off
+ | getOffset _ = raise InvalidStream
+
+fun getPosAfterCharRead
+ ({ stack = (id, off, contents) :: _, ... }: stream) =
+ (id, off - 1)
+ | getPosAfterCharRead _ = raise InvalidStream
+
+fun getPos ({ stack = (id, off, contents) :: _, ... }: stream) = (id, off)
+ | getPos _ = raise InvalidStream
+
+fun getSubstr startOff endOff
+ ({ stack = (_, _, contents) :: _, ... }: stream) =
+ Substring.substring (contents, startOff, endOff - startOff)
+ | getSubstr _ _ _ = raise InvalidStream
+
+fun advanceToNewFile
+ (S as { stack = (id, off, contents) :: rest, allFiles }: stream) =
+ if off = String.size contents then
+ { stack = rest, allFiles }
+ else
+ raise InvalidStreamAdvance
+ | advanceToNewFile _ = raise InvalidStreamAdvance
+
+fun streamInit fname =
+let
+ val contents = readFile fname
+in
+ { allFiles = [(0, fname, contents)], stack = [(0, 0, contents)] }
+end
+
+exception UnexpectedCharacter of char
+
+fun isSpace c = c = #" " orelse c = #"\t" orelse c = #"\n"
+fun isLetter c =
+ ord c >= ord #"a" andalso ord c <= ord #"z" orelse
+ ord c >= ord #"A" andalso ord c <= ord #"Z"
+fun isIdStart c = c = #"_" orelse isLetter c
+fun isDigit c = ord c >= ord #"0" andalso ord c <= ord #"9"
+fun isIdBody c = isIdStart c orelse isDigit c
+
+fun isFirstOnLine (tk: fullToken) (stream: stream) =
+let
+ val (id, offset) = #1 tk
+in
+ case List.find (fn (fid, _, _) => id = fid) (#allFiles stream) of
+ NONE => raise InvalidStream
+ | SOME (_, _, contents) =>
+ let
+ fun returnToNL ~1 = true
+ | returnToNL offset =
+ let
+ val chr = String.sub (contents, offset)
+ in
+ if chr = #"\n" then
+ true
+ else if isSpace chr then
+ returnToNL (offset - 1)
+ else
+ false
+ end
+ in
+ returnToNL (offset - 1)
+ end
+end
+
+fun tokenize stream tkl =
+let
+ fun getcharSkipEof stream = getchar stream handle
+ EndOfFile => getcharSkipEof (advanceToNewFile stream)
+
+ val (c, stream) = (fn (c, s) => (SOME c, s)) $ getcharSkipEof stream
+ handle
+ EndOfStream => (NONE, stream)
+
+ fun % tk = (getPosAfterCharRead stream, tk)
+in
+ case c of
+ NONE => (rev tkl, #allFiles stream)
+ | SOME c =>
+ if isSpace c then
+ tokenize stream tkl
+ else if isIdStart c then
+ parseId c stream tkl
+ else case c of
+ #"+" => tokenize stream (% Plus :: tkl)
+ | #";" => tokenize stream (% Semicolon :: tkl)
+ | #"#" => tokenize stream (% Hash :: tkl)
+ | _ => raise UnexpectedCharacter c
+end and
+parseId c stream (tkl: fullToken list) =
+let
+ val P as (_, pos) = getPosAfterCharRead stream
+
+ fun eatId curId stream =
+ let
+ val (c, stream) = (fn (c, s) => (SOME c, s)) $ getchar stream handle
+ _ => (NONE, stream)
+ in
+ case c of
+ NONE => (curId, stream)
+ | SOME c =>
+ if isIdBody c then
+ eatId (curId + 1) stream
+ else
+ (curId, ungetc stream)
+ end
+
+ val (endOffset, stream) = eatId (pos + 1) stream
+ val id = getSubstr pos endOffset stream
+
+in
+ if ((fn Hash => true | _ => false) (#2 $ hd tkl) handle Empty => false)
+ andalso isFirstOnLine (hd tkl) stream
+ andalso Substring.string id = "include"
+ then
+ tokenize stream (((#1 $ hd tkl), CppInclude) :: tl tkl)
+ else
+ tokenize stream ((P, Id id) :: tkl)
+end
+
+fun main [fname] =
+let
+ val stream = streamInit fname
+ val (tkl, fileList) = tokenize stream []
+in
+ List.app (fn (p, x) => (printPos fileList p; printToken x)) tkl
+end
+ | main _ = printLn "Expected a signle argument: file name"
+
+val () = main $ CommandLine.arguments ()