From 9ed6d5386bcc9ad8a8e73b481d41fcaea80adaad Mon Sep 17 00:00:00 2001 From: Vladimir Azarov Date: Sat, 22 Mar 2025 01:45:46 +0100 Subject: Initial version --- .gitignore | 2 + Makefile | 2 + cpp.mlb | 6 ++ cpp.sml | 226 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 236 insertions(+) create mode 100644 .gitignore create mode 100644 Makefile create mode 100644 cpp.mlb create mode 100644 cpp.sml diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d088a4b --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +test +cpp diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..0efff47 --- /dev/null +++ b/Makefile @@ -0,0 +1,2 @@ +def: + mlton cpp.mlb diff --git a/cpp.mlb b/cpp.mlb new file mode 100644 index 0000000..ef33548 --- /dev/null +++ b/cpp.mlb @@ -0,0 +1,6 @@ +ann + "allowRecordPunExps true" +in + $(SML_LIB)/basis/basis.mlb + cpp.sml +end diff --git a/cpp.sml b/cpp.sml new file mode 100644 index 0000000..ea3b35c --- /dev/null +++ b/cpp.sml @@ -0,0 +1,226 @@ +fun $ (x, y) = x y +infixr 0 $ + +fun printLn s = (print s; print "\n") + +datatype token = + Number of substring | + Id of substring | + Plus | + Semicolon | + Hash | + CppInclude + +fun printToken (Number s) = printLn $ "Number: " ^ Substring.string s + | printToken (Id s) = printLn $ "Id: " ^ Substring.string s + | printToken Plus = printLn "+" + | printToken Semicolon = printLn ";" + | printToken Hash = printLn "#" + | printToken CppInclude = printLn "$include" + +type fileId = int +type fileOffset = int +type pos = fileId * fileOffset +type fullToken = pos * token + +type stream = { + (* list of file ids, file names and file contents *) + allFiles: (fileId * string * string) list, + (* stack of file ids, file offsets and file contents *) + stack: (fileId * fileOffset * string) list +} + +exception EndOfStream +exception EndOfFile +exception InvalidStream +exception InvalidStreamAdvance + +fun calcFilePos s offset = +let + fun calc s cur offset (line, col) = + if cur = offset then + (line, col) + else + if String.sub (s, cur) = #"\n" then + calc s (cur + 1) offset (line + 1, 1) + else + calc s (cur + 1) offset (line, col + 1) +in + calc s 0 offset (1, 1) +end + +fun printPos fileList (id, pos) = +let + val triple = List.find (fn (fid, _, _) => fid = id) fileList +in + case triple of + NONE => raise InvalidStream + | SOME (_, fname, contents) => + let + val (line, col) = calcFilePos contents pos + val line = Int.toString line + val col = Int.toString col + in + print $ fname ^ ":" ^ line ^ ":" ^ col ^ ": " + end +end + +fun readFile fname = +let + open TextIO + val h = openIn fname + val s = inputAll h + val () = closeIn h +in + s +end + +fun getchar + (S as { stack = (id, off, contents) :: rest, allFiles }: stream) + : (char * stream) = + if off < String.size contents then + (String.sub (contents, off), + { stack = (id, off + 1, contents) :: rest, allFiles }) + else + raise EndOfFile + | getchar _ = raise EndOfStream + +fun ungetc + (S as { stack = (id, off, contents) :: rest, allFiles }) = + if off = 0 then + raise InvalidStream + else + { stack = (id, off - 1, contents) :: rest, allFiles } + | ungetc _ = raise InvalidStream + +fun getOffset ({ stack = (id, off, contents) :: _, ... }: stream) = off + | getOffset _ = raise InvalidStream + +fun getPosAfterCharRead + ({ stack = (id, off, contents) :: _, ... }: stream) = + (id, off - 1) + | getPosAfterCharRead _ = raise InvalidStream + +fun getPos ({ stack = (id, off, contents) :: _, ... }: stream) = (id, off) + | getPos _ = raise InvalidStream + +fun getSubstr startOff endOff + ({ stack = (_, _, contents) :: _, ... }: stream) = + Substring.substring (contents, startOff, endOff - startOff) + | getSubstr _ _ _ = raise InvalidStream + +fun advanceToNewFile + (S as { stack = (id, off, contents) :: rest, allFiles }: stream) = + if off = String.size contents then + { stack = rest, allFiles } + else + raise InvalidStreamAdvance + | advanceToNewFile _ = raise InvalidStreamAdvance + +fun streamInit fname = +let + val contents = readFile fname +in + { allFiles = [(0, fname, contents)], stack = [(0, 0, contents)] } +end + +exception UnexpectedCharacter of char + +fun isSpace c = c = #" " orelse c = #"\t" orelse c = #"\n" +fun isLetter c = + ord c >= ord #"a" andalso ord c <= ord #"z" orelse + ord c >= ord #"A" andalso ord c <= ord #"Z" +fun isIdStart c = c = #"_" orelse isLetter c +fun isDigit c = ord c >= ord #"0" andalso ord c <= ord #"9" +fun isIdBody c = isIdStart c orelse isDigit c + +fun isFirstOnLine (tk: fullToken) (stream: stream) = +let + val (id, offset) = #1 tk +in + case List.find (fn (fid, _, _) => id = fid) (#allFiles stream) of + NONE => raise InvalidStream + | SOME (_, _, contents) => + let + fun returnToNL ~1 = true + | returnToNL offset = + let + val chr = String.sub (contents, offset) + in + if chr = #"\n" then + true + else if isSpace chr then + returnToNL (offset - 1) + else + false + end + in + returnToNL (offset - 1) + end +end + +fun tokenize stream tkl = +let + fun getcharSkipEof stream = getchar stream handle + EndOfFile => getcharSkipEof (advanceToNewFile stream) + + val (c, stream) = (fn (c, s) => (SOME c, s)) $ getcharSkipEof stream + handle + EndOfStream => (NONE, stream) + + fun % tk = (getPosAfterCharRead stream, tk) +in + case c of + NONE => (rev tkl, #allFiles stream) + | SOME c => + if isSpace c then + tokenize stream tkl + else if isIdStart c then + parseId c stream tkl + else case c of + #"+" => tokenize stream (% Plus :: tkl) + | #";" => tokenize stream (% Semicolon :: tkl) + | #"#" => tokenize stream (% Hash :: tkl) + | _ => raise UnexpectedCharacter c +end and +parseId c stream (tkl: fullToken list) = +let + val P as (_, pos) = getPosAfterCharRead stream + + fun eatId curId stream = + let + val (c, stream) = (fn (c, s) => (SOME c, s)) $ getchar stream handle + _ => (NONE, stream) + in + case c of + NONE => (curId, stream) + | SOME c => + if isIdBody c then + eatId (curId + 1) stream + else + (curId, ungetc stream) + end + + val (endOffset, stream) = eatId (pos + 1) stream + val id = getSubstr pos endOffset stream + +in + if ((fn Hash => true | _ => false) (#2 $ hd tkl) handle Empty => false) + andalso isFirstOnLine (hd tkl) stream + andalso Substring.string id = "include" + then + tokenize stream (((#1 $ hd tkl), CppInclude) :: tl tkl) + else + tokenize stream ((P, Id id) :: tkl) +end + +fun main [fname] = +let + val stream = streamInit fname + val (tkl, fileList) = tokenize stream [] +in + List.app (fn (p, x) => (printPos fileList p; printToken x)) tkl +end + | main _ = printLn "Expected a signle argument: file name" + +val () = main $ CommandLine.arguments () -- cgit v1.2.3