diff options
author | Vladimir Azarov <avm@intermediate-node.net> | 2025-04-11 21:54:16 +0200 |
---|---|---|
committer | Vladimir Azarov <avm@intermediate-node.net> | 2025-04-11 21:54:16 +0200 |
commit | e99a8dc48ede26696be2ba75a8cb0d5122d94598 (patch) | |
tree | c3dcd1d6a9b96aaedd081f13b9dc7e7d6c07e2bd | |
parent | 8e2dc7712de206b87e1c46df9383c3fa1e18a43a (diff) |
#include directive
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | ccross.mlb | 25 | ||||
-rw-r--r-- | ccross.sig | 2 | ||||
-rw-r--r-- | ccross.sml | 8 | ||||
-rw-r--r-- | common.sml (renamed from general.sml) | 0 | ||||
-rw-r--r-- | cpp.fun | 260 | ||||
-rw-r--r-- | cpp.sig | 15 | ||||
-rw-r--r-- | driver.fun | 39 | ||||
-rw-r--r-- | driver.sig | 3 | ||||
-rw-r--r-- | exn_handler.fun | 1 | ||||
-rw-r--r-- | stream.sig | 9 | ||||
-rw-r--r-- | stream.sml | 18 |
12 files changed, 294 insertions, 87 deletions
@@ -3,3 +3,4 @@ ccross doc/todo.txt mlmon.out* *dot +log* @@ -4,23 +4,14 @@ ann in $(SML_LIB)/basis/basis.mlb $(SML_LIB)/basis/mlton.mlb - general.sml + common.sml - stream.sig - stream.sml + stream.sig stream.sml + hashtable.sig hashtable.sml + tokenizer.sig tokenizer.fun + cpp.sig cpp.fun + exn_handler.sig exn_handler.fun + driver.sig driver.fun - hashtable.sig - hashtable.sml - - tokenizer.sig - tokenizer.fun - - cpp.sig - cpp.fun - - exn_handler.sig - exn_handler.fun - - ccross.sig - ccross.sml + ccross.sig ccross.sml end @@ -1,4 +1,4 @@ signature CCROSS = sig - structure P: CPP + structure D: DRIVER structure ExnHandler: EXN_HANDLER end @@ -4,13 +4,11 @@ structure ccross:> CCROSS = struct structure P:> CPP = Cpp(T) + structure D:> DRIVER = Driver(P) + structure ExnHandler:> EXN_HANDLER = ExnHandler(structure T = T; structure P = P) end val () = MLton.Exn.setTopLevelHandler ccross.ExnHandler.handler - -fun main [fname] = ccross.P.debugPrint fname - | main _ = printLn "Expected a single argument: file name" - -val () = main $ CommandLine.arguments () +val () = ccross.D.exec () @@ -5,7 +5,8 @@ functor Cpp(T: TOKENIZER): CPP = struct type tkPos = T.S.pos type t = { streams: T.S.t list, fileInfo: T.S.fileInfo list, - lastPos: tkPos option, firstId: T.S.fileId }; + lastPos: tkPos option, firstId: T.S.fileId, + incDirs: string list } datatype tkExp = Tk of T.token | @@ -16,44 +17,29 @@ functor Cpp(T: TOKENIZER): CPP = struct BinOp | Op - type tkExpectedValue = string * tkExp list + type tkExpectedVal = string * tkExp list exception StreamTooOld - exception TkExpected of tkExpectedValue + exception TkExpected of tkExpectedVal - fun create fname = + type tkErrorVal = string * string + exception TkError of tkErrorVal + + fun create fname incDirs = let val stream = T.S.create fname val info = T.S.convert stream in - { streams = [stream] , fileInfo = [info], lastPos = NONE, firstId = #1 info } - end - - fun getToken - ({ streams = stream :: tail, fileInfo, lastPos, firstId }: t) = - let - val (tk, stream) = T.getToken stream - in - case tk of - NONE => getToken { streams = tail, fileInfo, lastPos, firstId } - | SOME (pos, tk) => - ( tk, { streams = stream :: tail, fileInfo, - lastPos = SOME pos, firstId }) + { streams = [stream] , fileInfo = [info], lastPos = NONE, + firstId = #1 info, incDirs } end - | getToken - { streams = [], fileInfo, lastPos = SOME lastPos, firstId } = - let - val pos = SOME (#1 lastPos, ~1) (* EOF *) - in - (T.EOS, {streams = [], fileInfo, lastPos = pos, firstId }) - end - | getToken { streams = [], fileInfo, lastPos = NONE, firstId } = - (T.EOS, { streams = [], fileInfo, - lastPos = SOME (firstId, ~1), firstId }) fun getLastPos ({ lastPos = NONE, ... }: t) = raise Unreachable | getLastPos { lastPos = SOME p, ... } = p + fun getTopFileInfo ({ streams = top :: _, ... }: t) = T.S.convert top + | getTopFileInfo _ = raise Unreachable + val tkExp2str = fn (Tk tk) => T.token2str tk | Id: tkExp => "identifier" @@ -63,17 +49,24 @@ functor Cpp(T: TOKENIZER): CPP = struct | BinOp => "binary operator" | Op => "operator" - fun prepAndRaise (stream: t) (id, pos) expList = + fun tkPos2str stream (id, pos) = let val fileInfo = case List.find (fn (id', _, _) => id' = id) $ #fileInfo stream of NONE => raise StreamTooOld | SOME fileInfo => fileInfo - val pos = T.S.ppos2str $ T.S.pos2pposWithFI (id, pos) fileInfo in - raise TkExpected (pos, expList) + T.S.ppos2str $ T.S.pos2pposWithFI (id, pos) fileInfo end + fun prepTkError stream pos msg = raise TkError (tkPos2str stream pos, msg) + fun prepLastTkError stream = prepTkError stream (getLastPos stream) + + fun prepTkExpected (stream: t) pos expList = + raise TkExpected (tkPos2str stream pos, expList) + fun prepLastTkExpected stream = prepTkExpected stream + (getLastPos stream) + fun tkExpectedPrint (pos, expList) = let fun tkExps2str [e] [] = tkExp2str e @@ -88,39 +81,206 @@ functor Cpp(T: TOKENIZER): CPP = struct printLn $ tkExps2str expList [] end - fun debugPrint fname = + fun tkErrorPrint (pos, msg) = printLn $ pos ^ ": " ^ msg + + exception EmptyPath + + fun getDirOfCurFile ({ streams = top :: _, ... }: t) = + OS.Path.getParent $ T.S.getFname top + | getDirOfCurFile _ = raise Unreachable + + + (* TODO: properly handle Size and Path exceptions from concat *) + fun findPath _ "" _ = raise EmptyPath + | findPath stream fname true = + let + val path = OS.Path.concat (getDirOfCurFile stream, fname) + in + (path, TextIO.openIn path) + end + | findPath stream fname false = + let + fun try (dir :: tail) = + let + val fname = OS.Path.concat (dir, fname) + in + SOME (fname, TextIO.openIn fname) handle _ => try tail + end + | try [] = NONE + + val instream = try (#incDirs stream) + in + case instream of + NONE => findPath stream fname true + | SOME pair => pair + end + + fun predSkip stream pred = + let + val (c, stream) = T.S.getchar stream + in + case c of + NONE => (NONE, stream) + | SOME c => + if c = #"\n" then + (NONE, stream) + else if pred c then + (SOME $ #2 $ T.S.getPosAfterCharRead stream, stream) + else + predSkip stream pred + end + + fun tryCollect stream = let - val stream = create fname - val cache = T.S.pposCacheInit $ hd $ #fileInfo stream + val top = hd $ #streams stream - fun print' cache stream first = + fun x f = f stream + + fun returnBack top = { + streams = top :: tl (#streams stream), + fileInfo = x#fileInfo, + lastPos = x#lastPos, + firstId = x#firstId, + incDirs = x#incDirs + } + + val (start, s) = + case predSkip top (fn c => c = #"<") of + (NONE, _) => raise Unreachable + | (SOME p, s) => (p, s) + + val (res, s) = predSkip s (fn c => c = #">") + in + case res of + NONE => (NONE, stream) + | SOME endOff => + (SOME $ T.S.getSubstr (start + 1) endOff s, returnBack s) + end + + fun getToken + ({ streams = stream :: tail, fileInfo, lastPos, firstId, incDirs }: t) = + let + val (tk, stream) = T.getToken stream + in + case tk of + NONE => getToken { streams = tail, fileInfo, lastPos, firstId, incDirs } + | SOME (pos, tk) => + if tk = T.CppInclude then + handleInclude { streams = stream :: tail, fileInfo, + lastPos, firstId, incDirs } + else + (tk, { streams = stream :: tail, fileInfo, + lastPos = SOME pos, firstId, incDirs }) + end + | getToken + { streams = [], fileInfo, lastPos = SOME lastPos, firstId, incDirs } = + let + val pos = SOME (#1 lastPos, ~1) (* EOF *) + in + (T.EOS, {streams = [], fileInfo, lastPos = pos, firstId, incDirs }) + end + | getToken { streams = [], fileInfo, lastPos = NONE, firstId, incDirs } = + (T.EOS, { streams = [], fileInfo, + lastPos = SOME (firstId, ~1), firstId, incDirs }) + + and handleInclude (stream: t) = + let + val (tk, streamNew) = getToken stream + + fun die () = + prepLastTkError streamNew + "#include with macro argument is not implemented" + in + case tk of + T.StringConst path => includeFile streamNew path true + | T.EOS => die () + | tk => + if String.sub (T.token2str tk, 0) = #"<" then + let + val (path, stream) = tryCollect stream + in + case path of + SOME path => includeFile stream path false + | NONE => die () + end + else + die () + end + + and includeFile stream fname localhdr = + let + val (fname, instream) = findPath stream fname localhdr + handle EmptyPath => + prepLastTkError stream "#include path can not be empty" + val newStream = T.S.createFromInstream fname instream + val newFileInfo = T.S.convert newStream + in + getToken { + streams = newStream :: #streams stream, + fileInfo = newFileInfo :: #fileInfo stream, + lastPos = #lastPos stream, + firstId = #firstId stream, + incDirs = #incDirs stream + } + end + + + fun debugPrintToken cache tk (line, col) printLineRegardless = + let + val ` = Int.toString + in + if printLineRegardless orelse T.S.pposCacheGetLine cache <> line then + print $ "\n" ^ T.S.pposCacheGetFname cache ^ ":" ^ `line ^ "\n\t" + else + (); + print $ `col ^ ":" ^ T.token2str tk ^ " " + end + + fun adjustCacheStack (s as (top :: rest)) pos stream = + let + fun idMatches cache = T.S.pposCacheGetId cache = #1 pos + fun metBefore [] = NONE + | metBefore (c :: cs) = + if idMatches c then + SOME (c :: cs) + else + metBefore cs + in + if idMatches top then + (false, s) + else + (true, case metBefore rest of + NONE => T.S.pposCacheInit (getTopFileInfo stream) :: s + | SOME stack => stack) + end + | adjustCacheStack _ _ _ = raise Unreachable + + fun debugPrint' cacheStack stream first = let val (tk, stream) = getToken stream - val ` = Int.toString in case tk of - T.NewLine => print' cache stream first + T.NewLine => debugPrint' cacheStack stream first | T.EOS => () | tk => let - val ((line, col), cache') = T.S.pposCacheAdvance - (getLastPos stream) cache - fun printTk () = - print $ `col ^ ":" ^ T.token2str tk ^ " " + val pos = getLastPos stream + val (stackChanged, cacheStack) = + adjustCacheStack cacheStack pos stream + + val oldTop = hd cacheStack + val (pair, top) = T.S.pposCacheAdvance pos oldTop in - if T.S.pposCacheGetLine cache = line andalso not first then - printTk () - else - (if not first then print "\n" else (); - printLn $ T.S.pposCacheGetFname cache' ^ ":" ^ `line; - print "\t"; - printTk ()); - print' cache' stream false + debugPrintToken oldTop tk pair (first orelse stackChanged); + debugPrint' (top :: tl cacheStack) stream false end end + + fun debugPrint stream = + let + val cache = T.S.pposCacheInit $ hd $ #fileInfo stream in - print' cache stream true; + debugPrint' [cache] stream true; print "\n" end - end @@ -3,9 +3,12 @@ signature CPP = sig type t type tkPos - type tkExpectedValue - exception TkExpected of tkExpectedValue + type tkExpectedVal + exception TkExpected of tkExpectedVal + + type tkErrorVal + exception TkError of tkErrorVal datatype tkExp = Tk of T.token | @@ -16,12 +19,12 @@ signature CPP = sig BinOp | Op - val create: string -> t + val create: string -> string list -> t val getToken: t -> T.token * t val getLastPos: t -> tkPos - val prepAndRaise: t -> tkPos -> tkExp list -> 'a - val tkExpectedPrint: tkExpectedValue -> unit + val tkExpectedPrint: tkExpectedVal -> unit + val tkErrorPrint: tkErrorVal -> unit - val debugPrint: string -> unit + val debugPrint: t -> unit end diff --git a/driver.fun b/driver.fun new file mode 100644 index 0000000..78ca877 --- /dev/null +++ b/driver.fun @@ -0,0 +1,39 @@ +functor Driver(P: CPP): DRIVER = struct + structure P = P + + type config = { + file: string option, + includeDirs: string list + } + + val initConfig: config = { file = NONE, includeDirs = [] } + + fun die msg = (printLn msg; Posix.Process.exit $ Word8.fromInt 1) + + fun parseCmdArgs { file, includeDirs } [] = + if file = NONE then + die "missing input file" + else + { file, includeDirs = rev includeDirs } + | parseCmdArgs _ ("-I" :: []) = + die "-I: expected directory path after flag" + | parseCmdArgs { file, includeDirs } ("-I" :: path :: tail) = + parseCmdArgs { file, includeDirs = path :: includeDirs } tail + | parseCmdArgs { file, includeDirs } (arg :: tail) = + if String.sub (arg, 0) = #"-" then + die $ arg ^ ": unknown flag" + else + case file of + NONE => parseCmdArgs { file = SOME arg, includeDirs } tail + | SOME _ => die $ arg ^ ": file already specified" + + fun exec () = + let + val config = parseCmdArgs initConfig (CommandLine.arguments ()) + + val cpp = P.create (valOf $ #file config) (#includeDirs config) + in + P.debugPrint cpp + end +end + diff --git a/driver.sig b/driver.sig new file mode 100644 index 0000000..341b090 --- /dev/null +++ b/driver.sig @@ -0,0 +1,3 @@ +signature DRIVER = sig + val exec: unit -> unit +end diff --git a/exn_handler.fun b/exn_handler.fun index 6e069d6..7e0aa4d 100644 --- a/exn_handler.fun +++ b/exn_handler.fun @@ -37,6 +37,7 @@ struct | IO.Io _ => ioExn e | T.TkErrorAug (pos, msg) => eprint $ T.S.ppos2str pos ^ ": " ^ msg | P.TkExpected v => P.tkExpectedPrint v + | P.TkError v => P.tkErrorPrint v | _ => otherExn e; exit 255) end @@ -32,13 +32,18 @@ signature STREAM = sig val pposWithoutCol: ppos -> ppos val getSubstr: fileOffset -> fileOffset -> t -> string + val getFname: t -> string val isFirstOnLine: pos -> t -> bool - (* throws IO.Io *) + (* both throw IO.Io *) val create: string -> t + val createFromInstream: string -> TextIO.instream -> t val pposCacheInit: fileInfo -> pposCache - val pposCacheAdvance: pos -> pposCache -> (int * int) * pposCache + + val pposCacheGetId: pposCache -> fileId val pposCacheGetLine: pposCache -> int val pposCacheGetFname: pposCache -> string + + val pposCacheAdvance: pos -> pposCache -> (int * int) * pposCache end @@ -74,17 +74,22 @@ structure Stream :> STREAM = struct fun getSubstr startOff endOff (_, _, _, contents) = String.substring (contents, startOff, endOff - startOff) - fun create fname = + fun getFname (stream: t) = #2 stream + + val lastUsedId = ref ~1 + + fun createFromInstream fname instream = let open TextIO - - val h = openIn fname - val contents = inputAll h - val () = closeIn h + val contents = inputAll instream + val () = closeIn instream in - (0, fname, 0, contents) + lastUsedId := !lastUsedId + 1; + (!lastUsedId, fname, 0, contents) end + fun create fname = createFromInstream fname (TextIO.openIn fname) + fun isFirstOnLine (_, offset) ((_, _, _, contents) : t) = let fun returnToNL ~1 = true @@ -119,6 +124,7 @@ structure Stream :> STREAM = struct offset = pos, line, col }) end + fun pposCacheGetId (cache: pposCache) = #id cache fun pposCacheGetLine (cache: pposCache) = #line cache fun pposCacheGetFname (cache: pposCache) = #fname cache end |