fun $ (x, y) = x y infixr 0 $ fun printLn s = (print s; print "\n") datatype token = Number of substring | Id of substring | Plus | Semicolon | Hash | CppInclude fun printToken (Number s) = printLn $ "Number: " ^ Substring.string s | printToken (Id s) = printLn $ "Id: " ^ Substring.string s | printToken Plus = printLn "+" | printToken Semicolon = printLn ";" | printToken Hash = printLn "#" | printToken CppInclude = printLn "$include" type fileId = int type fileOffset = int type pos = fileId * fileOffset type fullToken = pos * token type stream = { (* list of file ids, file names and file contents *) allFiles: (fileId * string * string) list, (* stack of file ids, file offsets and file contents *) stack: (fileId * fileOffset * string) list } exception EndOfStream exception EndOfFile exception InvalidStream exception InvalidStreamAdvance fun calcFilePos s offset = let fun calc s cur offset (line, col) = if cur = offset then (line, col) else if String.sub (s, cur) = #"\n" then calc s (cur + 1) offset (line + 1, 1) else calc s (cur + 1) offset (line, col + 1) in calc s 0 offset (1, 1) end fun printPos fileList (id, pos) = let val triple = List.find (fn (fid, _, _) => fid = id) fileList in case triple of NONE => raise InvalidStream | SOME (_, fname, contents) => let val (line, col) = calcFilePos contents pos val line = Int.toString line val col = Int.toString col in print $ fname ^ ":" ^ line ^ ":" ^ col ^ ": " end end fun readFile fname = let open TextIO val h = openIn fname val s = inputAll h val () = closeIn h in s end fun getchar (S as { stack = (id, off, contents) :: rest, allFiles }: stream) : (char * stream) = if off < String.size contents then (String.sub (contents, off), { stack = (id, off + 1, contents) :: rest, allFiles }) else raise EndOfFile | getchar _ = raise EndOfStream fun ungetc (S as { stack = (id, off, contents) :: rest, allFiles }) = if off = 0 then raise InvalidStream else { stack = (id, off - 1, contents) :: rest, allFiles } | ungetc _ = raise InvalidStream fun getOffset ({ stack = (id, off, contents) :: _, ... }: stream) = off | getOffset _ = raise InvalidStream fun getPosAfterCharRead ({ stack = (id, off, contents) :: _, ... }: stream) = (id, off - 1) | getPosAfterCharRead _ = raise InvalidStream fun getPos ({ stack = (id, off, contents) :: _, ... }: stream) = (id, off) | getPos _ = raise InvalidStream fun getSubstr startOff endOff ({ stack = (_, _, contents) :: _, ... }: stream) = Substring.substring (contents, startOff, endOff - startOff) | getSubstr _ _ _ = raise InvalidStream fun advanceToNewFile (S as { stack = (id, off, contents) :: rest, allFiles }: stream) = if off = String.size contents then { stack = rest, allFiles } else raise InvalidStreamAdvance | advanceToNewFile _ = raise InvalidStreamAdvance fun streamInit fname = let val contents = readFile fname in { allFiles = [(0, fname, contents)], stack = [(0, 0, contents)] } end exception UnexpectedCharacter of char fun isSpace c = c = #" " orelse c = #"\t" orelse c = #"\n" fun isLetter c = ord c >= ord #"a" andalso ord c <= ord #"z" orelse ord c >= ord #"A" andalso ord c <= ord #"Z" fun isIdStart c = c = #"_" orelse isLetter c fun isDigit c = ord c >= ord #"0" andalso ord c <= ord #"9" fun isIdBody c = isIdStart c orelse isDigit c fun isFirstOnLine (tk: fullToken) (stream: stream) = let val (id, offset) = #1 tk in case List.find (fn (fid, _, _) => id = fid) (#allFiles stream) of NONE => raise InvalidStream | SOME (_, _, contents) => let fun returnToNL ~1 = true | returnToNL offset = let val chr = String.sub (contents, offset) in if chr = #"\n" then true else if isSpace chr then returnToNL (offset - 1) else false end in returnToNL (offset - 1) end end fun tokenize stream tkl = let fun getcharSkipEof stream = getchar stream handle EndOfFile => getcharSkipEof (advanceToNewFile stream) val (c, stream) = (fn (c, s) => (SOME c, s)) $ getcharSkipEof stream handle EndOfStream => (NONE, stream) fun % tk = (getPosAfterCharRead stream, tk) in case c of NONE => (rev tkl, #allFiles stream) | SOME c => if isSpace c then tokenize stream tkl else if isIdStart c then parseId c stream tkl else case c of #"+" => tokenize stream (% Plus :: tkl) | #";" => tokenize stream (% Semicolon :: tkl) | #"#" => tokenize stream (% Hash :: tkl) | _ => raise UnexpectedCharacter c end and parseId c stream (tkl: fullToken list) = let val P as (_, pos) = getPosAfterCharRead stream fun eatId curId stream = let val (c, stream) = (fn (c, s) => (SOME c, s)) $ getchar stream handle _ => (NONE, stream) in case c of NONE => (curId, stream) | SOME c => if isIdBody c then eatId (curId + 1) stream else (curId, ungetc stream) end val (endOffset, stream) = eatId (pos + 1) stream val id = getSubstr pos endOffset stream in if ((fn Hash => true | _ => false) (#2 $ hd tkl) handle Empty => false) andalso isFirstOnLine (hd tkl) stream andalso Substring.string id = "include" then tokenize stream (((#1 $ hd tkl), CppInclude) :: tl tkl) else tokenize stream ((P, Id id) :: tkl) end fun main [fname] = let val stream = streamInit fname val (tkl, fileList) = tokenize stream [] in List.app (fn (p, x) => (printPos fileList p; printToken x)) tkl end | main _ = printLn "Expected a signle argument: file name" val () = main $ CommandLine.arguments ()