fun $ (x, y) = x y infixr 0 $ fun printLn s = (print s; print "\n") datatype token = Invalid | Number of string | Id of string | Plus | DoublePlus| Minus | DoubleMinus | Semicolon | EqualSign | DoubleEqualSign | Hash | DoubleHash | CppInclude | CppDefine val tokenRepr = [ (Plus, "+"), (DoublePlus, "++"), (Minus, "-"), (DoubleMinus, "--"), (Semicolon, ";"), (EqualSign, "="), (DoubleEqualSign, "=="), (Hash, "#"), (DoubleHash, "##"), (CppInclude, "$include"), (CppDefine, "$define") ] fun isCppDir CppInclude = true | isCppDir _ = false exception TokenWithoutRepr fun printToken (Number s) = printLn $ "Num: " ^ s | printToken (Id s) = printLn $ "Id: " ^ s | printToken v = case List.find (fn (x, _) => x = v) tokenRepr of SOME (_, repr) => printLn repr | NONE => raise TokenWithoutRepr type fileId = int type fileOffset = int type pos = fileId * fileOffset type fullToken = pos * token type stream = { (* list of file ids, file names and file contents *) allFiles: (fileId * string * string) list, (* stack of file ids, file offsets and file contents *) stack: (fileId * fileOffset * string) list } exception EndOfStream exception EndOfFile exception InvalidStream exception InvalidStreamAdvance fun calcFilePos s offset = let fun calc s cur offset (line, col) = if cur = offset then (line, col) else calc s (cur + 1) offset (if String.sub (s, cur) = #"\n" then (line + 1, 1) else (line, col + 1)) in calc s 0 offset (1, 1) end fun printPos fileList (id, pos) = let val triple = List.find (fn (fid, _, _) => fid = id) fileList in case triple of NONE => raise InvalidStream | SOME (_, fname, contents) => let val (line, col) = calcFilePos contents pos val line = Int.toString line val col = Int.toString col in print $ fname ^ ":" ^ line ^ ":" ^ col ^ ": " end end fun readFile fname = let open TextIO val h = openIn fname val s = inputAll h val () = closeIn h in s end fun getchar (S as { stack = (id, off, contents) :: rest, allFiles }: stream) : (char * stream) = if off < String.size contents then (String.sub (contents, off), { stack = (id, off + 1, contents) :: rest, allFiles }) else raise EndOfFile | getchar _ = raise EndOfStream fun ungetc (S as { stack = (id, off, contents) :: rest, allFiles }) = if off = 0 then raise InvalidStream else { stack = (id, off - 1, contents) :: rest, allFiles } | ungetc _ = raise InvalidStream fun getOffset ({ stack = (id, off, contents) :: _, ... }: stream) = off | getOffset _ = raise InvalidStream fun getPosAfterCharRead ({ stack = (id, off, contents) :: _, ... }: stream) = (id, off - 1) | getPosAfterCharRead _ = raise InvalidStream fun getPos ({ stack = (id, off, contents) :: _, ... }: stream) = (id, off) | getPos _ = raise InvalidStream fun getSubstr startOff endOff ({ stack = (_, _, contents) :: _, ... }: stream) = String.substring (contents, startOff, endOff - startOff) | getSubstr _ _ _ = raise InvalidStream fun advanceToNewFile (S as { stack = (id, off, contents) :: rest, allFiles }: stream) = if off = String.size contents then { stack = rest, allFiles } else raise InvalidStreamAdvance | advanceToNewFile _ = raise InvalidStreamAdvance fun streamInit fname = let val contents = readFile fname in { allFiles = [(0, fname, contents)], stack = [(0, 0, contents)] } end (* FSM for parsing symbols *) val maxStates = 40 fun fsmInsert (nextState, buf) curState tk (c :: cs) = let open Array val (_, row) = sub (buf, curState) val potNextState = sub (row, ord c) in if potNextState <> ~1 then fsmInsert (nextState, buf) potNextState tk cs else ( update (row, ord c, !nextState); nextState := !nextState + 1; fsmInsert (nextState, buf) (!nextState - 1) tk cs ) end | fsmInsert (nextState, buf) curState tk [] = let open Array val (_, row) = sub (buf, curState) in update (buf, curState, (tk, row)) end fun fsmTableCreate () = let open Array val T as (_, buf) = (ref 1, array (maxStates, (Invalid, array (128, ~1)))) val r = ref 1 fun filterNeeded [] acc = acc | filterNeeded ((T as (tk, _)) :: tks) acc = filterNeeded tks (if isCppDir tk then acc else T :: acc) val tokenRepr = filterNeeded tokenRepr [] in while !r < length buf do ( update (buf, !r, (Invalid, array(128, ~1))); r := !r + 1 ); List.app (fn (v, p) => fsmInsert T 0 v (explode p)) tokenRepr; T end fun isStartForFsmGen () = let open Array val lookupTable = array (128, false) fun firstChr s = ord $ String.sub (s, 0) val () = List.app (fn (_, repr) => update (lookupTable, firstChr repr, true)) tokenRepr in fn c => sub (lookupTable, ord c) end val isStartForFsm = isStartForFsmGen () val fsmTable = fsmTableCreate () fun fsmEat stream = let open Array val stream = ungetc stream val pos = getPos stream fun get curState stream = let val (c, stream) = (fn (c, s) => (SOME c, s)) $ getchar stream handle _ => (NONE, stream) in case c of NONE => (#1 $ sub (#2 fsmTable, curState), stream) | SOME c => let val (tk, row) = sub (#2 fsmTable, curState) val nextState = sub (row, ord c) in if nextState = ~1 then (tk, ungetc stream) else get nextState stream end end in (fn (tk, stream) => ((pos, tk), stream)) $ get 0 stream end exception UnexpectedCharacter of char fun isSpace c = c = #" " orelse c = #"\t" orelse c = #"\n" fun isLetter c = ord c >= ord #"a" andalso ord c <= ord #"z" orelse ord c >= ord #"A" andalso ord c <= ord #"Z" fun isIdStart c = c = #"_" orelse isLetter c fun isDigit c = ord c >= ord #"0" andalso ord c <= ord #"9" fun isIdBody c = isIdStart c orelse isDigit c fun isFirstOnLine (tk: fullToken) (stream: stream) = let val (id, offset) = #1 tk in case List.find (fn (fid, _, _) => id = fid) (#allFiles stream) of NONE => raise InvalidStream | SOME (_, _, contents) => let fun returnToNL ~1 = true | returnToNL offset = let val chr = String.sub (contents, offset) in if chr = #"\n" then true else if isSpace chr then returnToNL (offset - 1) else false end in returnToNL (offset - 1) end end fun parseGeneric stream parser acc = let val stream = ungetc stream val P as (fid, startOff) = getPos stream fun parse' stream acc = let val (c, stream) = (fn (c, s) => (SOME c, s)) $ getchar stream handle _ => (NONE, stream) val (acc, tk, stream) = parser acc (stream, startOff) c in case tk of NONE => parse' stream acc | _ => (valOf tk, stream) end val (tk, stream) = parse' stream acc in ((P, tk): fullToken, stream) end fun finishSeqRead startOff stream = let val (_, endOff) = getPos stream val s = getSubstr startOff endOff stream in s end fun idParser () (stream, startOff) c = let fun finalize stream = ((), SOME $ Id (finishSeqRead startOff stream), stream) in case c of NONE => finalize stream | SOME c => if isIdBody c then ((), NONE, stream) else finalize (ungetc stream) end fun formCppDir tk tkl = let fun conv tk = ((#1 $ hd tkl, tk) :: tl tkl) in case tk of Id "include" => conv CppInclude | Id "define" => conv CppDefine | _ => raise Fail $ "Expected cpp direcive\n" end fun numParser () (stream, startOff) c = let fun finalize stream = ((), SOME $ Number (finishSeqRead startOff stream), stream) in case c of NONE => finalize stream | SOME c => if isDigit c then ((), NONE, stream) else finalize (ungetc stream) end fun tokenize stream (tkl: fullToken list) = let fun getcharSkipEof stream = getchar stream handle EndOfFile => getcharSkipEof (advanceToNewFile stream) val (c, stream) = (fn (c, s) => (SOME c, s)) $ getcharSkipEof stream handle EndOfStream => (NONE, stream) fun % tk = (getPosAfterCharRead stream, tk) fun @-> (tk, stream) = tokenize stream (tk :: tkl) in case c of NONE => (rev tkl, #allFiles stream) | SOME c => if isSpace c then tokenize stream tkl else if isIdStart c then let val isCppDir = (fn Hash => true | _ => false) (#2 $ hd tkl) handle Empty => false val (tk, stream) = parseGeneric stream idParser () in tokenize stream (if isCppDir then (formCppDir (#2 tk) tkl) else (tk :: tkl)) end else if isDigit c then @-> $ parseGeneric stream numParser () else if isStartForFsm c then @-> $ fsmEat stream else raise UnexpectedCharacter c end fun main [fname] = let val stream = streamInit fname val (tkl, fileList) = tokenize stream [] in List.app (fn (p, x) => (printPos fileList p; printToken x)) tkl end | main _ = printLn "Expected a signle argument: file name" val () = main $ CommandLine.arguments ()