summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore4
-rw-r--r--ccross.mlb9
-rw-r--r--ccross.sig4
-rw-r--r--ccross.sml14
-rw-r--r--cpp.fun (renamed from cpp.sml)26
-rw-r--r--cpp.sig6
-rw-r--r--exn_handler.fun (renamed from exn_handler.sml)16
-rw-r--r--exn_handler.sig3
-rw-r--r--tokenizer.fun (renamed from tokenizer.sml)97
-rw-r--r--tokenizer.sig10
10 files changed, 107 insertions, 82 deletions
diff --git a/.gitignore b/.gitignore
index 28bb688..c495a68 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,5 @@
-test*
+test
ccross
-todo*
+doc/todo.txt
mlmon.out*
*dot
diff --git a/ccross.mlb b/ccross.mlb
index 76eeb47..6e36d72 100644
--- a/ccross.mlb
+++ b/ccross.mlb
@@ -13,11 +13,14 @@ in
hashtable.sml
tokenizer.sig
- tokenizer.sml
+ tokenizer.fun
cpp.sig
- cpp.sml
+ cpp.fun
- exn_handler.sml
+ exn_handler.sig
+ exn_handler.fun
+
+ ccross.sig
ccross.sml
end
diff --git a/ccross.sig b/ccross.sig
new file mode 100644
index 0000000..3bfaf9e
--- /dev/null
+++ b/ccross.sig
@@ -0,0 +1,4 @@
+signature CCROSS = sig
+ structure P: CPP
+ structure ExnHandler: EXN_HANDLER
+end
diff --git a/ccross.sml b/ccross.sml
index 58f9c32..749687b 100644
--- a/ccross.sml
+++ b/ccross.sml
@@ -1,4 +1,16 @@
-fun main [fname] = Cpp.debugPrint fname
+structure ccross:> CCROSS = struct
+ structure T:> TOKENIZER =
+ Tokenizer(structure H = Hashtable; structure S = Stream)
+
+ structure P:> CPP = Cpp(T)
+
+ structure ExnHandler:> EXN_HANDLER =
+ ExnHandler(structure T = T; structure P = P)
+end
+
+val () = MLton.Exn.setTopLevelHandler ccross.ExnHandler.handler
+
+fun main [fname] = ccross.P.debugPrint fname
| main _ = printLn "Expected a single argument: file name"
val () = main $ CommandLine.arguments ()
diff --git a/cpp.sml b/cpp.fun
index 91f7f82..424d3ca 100644
--- a/cpp.sml
+++ b/cpp.fun
@@ -1,12 +1,14 @@
-structure Cpp:> CPP = struct
- type tkPos = Stream.pos
+functor Cpp(T: TOKENIZER): CPP = struct
+ structure T = T
+
+ type tkPos = T.S.pos
type t =
- { streams: Stream.t list, fileInfo: Stream.fileInfo list,
- lastPos: tkPos option, firstId: Stream.fileId };
+ { streams: T.S.t list, fileInfo: T.S.fileInfo list,
+ lastPos: tkPos option, firstId: T.S.fileId };
datatype tkExp =
- Tk of Tokenizer.token |
+ Tk of T.token |
Id |
NumConst |
StrLiteral |
@@ -21,8 +23,8 @@ structure Cpp:> CPP = struct
fun create fname =
let
- val stream = Stream.create fname
- val info = Stream.convert stream
+ val stream = T.S.create fname
+ val info = T.S.convert stream
in
{ streams = [stream] , fileInfo = [info], lastPos = NONE, firstId = #1 info }
end
@@ -30,7 +32,7 @@ structure Cpp:> CPP = struct
fun getToken
({ streams = stream :: tail, fileInfo, lastPos, firstId }: t) =
let
- val (tk, stream) = Tokenizer.getToken stream
+ val (tk, stream) = T.getToken stream
in
case tk of
NONE => getToken { streams = tail, fileInfo, lastPos, firstId }
@@ -43,17 +45,17 @@ structure Cpp:> CPP = struct
let
val pos = SOME (#1 lastPos, ~1) (* EOF *)
in
- (Tokenizer.EOS, {streams = [], fileInfo, lastPos = pos, firstId })
+ (T.EOS, {streams = [], fileInfo, lastPos = pos, firstId })
end
| getToken { streams = [], fileInfo, lastPos = NONE, firstId } =
- (Tokenizer.EOS, { streams = [], fileInfo,
+ (T.EOS, { streams = [], fileInfo,
lastPos = SOME (firstId, ~1), firstId })
fun getLastPos ({ lastPos = NONE, ... }: t) = raise Unreachable
| getLastPos { lastPos = SOME p, ... } = p
val tkExp2str = fn
- (Tk tk) => Tokenizer.token2str tk
+ (Tk tk) => T.token2str tk
| Id: tkExp => "identifier"
| NumConst => "numeric constant"
| StrLiteral => "string literal"
@@ -67,7 +69,7 @@ structure Cpp:> CPP = struct
case List.find (fn (id', _, _) => id' = id) $ #fileInfo stream of
NONE => raise StreamTooOld
| SOME fileInfo => fileInfo
- val pos = Stream.ppos2str $ Stream.pos2pposWithFI (id, pos) fileInfo
+ val pos = T.S.ppos2str $ T.S.pos2pposWithFI (id, pos) fileInfo
in
raise TkExpected (pos, expList)
end
diff --git a/cpp.sig b/cpp.sig
index 378a5d4..beed8c2 100644
--- a/cpp.sig
+++ b/cpp.sig
@@ -1,4 +1,6 @@
signature CPP = sig
+ structure T: TOKENIZER
+
type t
type tkPos
type tkExpectedValue
@@ -6,7 +8,7 @@ signature CPP = sig
exception TkExpected of tkExpectedValue
datatype tkExp =
- Tk of Tokenizer.token |
+ Tk of T.token |
Id |
NumConst |
StrLiteral |
@@ -15,7 +17,7 @@ signature CPP = sig
Op
val create: string -> t
- val getToken: t -> Tokenizer.token * t
+ val getToken: t -> T.token * t
val getLastPos: t -> tkPos
val prepAndRaise: t -> tkPos -> tkExp list -> 'a
diff --git a/exn_handler.sml b/exn_handler.fun
index 50fb8dc..6e069d6 100644
--- a/exn_handler.sml
+++ b/exn_handler.fun
@@ -1,4 +1,6 @@
-structure GlobalExnHandler: sig val handler: exn -> unit end = struct
+functor ExnHandler(structure T: TOKENIZER; structure P: CPP):
+ EXN_HANDLER =
+struct
fun eprint s = printLn $ "error: " ^ s
@@ -29,18 +31,12 @@ structure GlobalExnHandler: sig val handler: exn -> unit end = struct
| ioExn _ = (printLn "ioExn: unreachable"; exit 254)
fun handler e =
- let
- open Tokenizer Cpp
- in
(case e of
- FsmTableIsTooSmall =>
+ T.FsmTableIsTooSmall =>
eprint "fsm table is too small. Increate 'maxState' value"
| IO.Io _ => ioExn e
- | TkErrorAug (pos, msg) => eprint $ Stream.ppos2str pos ^ ": " ^ msg
- | TkExpected v => tkExpectedPrint v
+ | T.TkErrorAug (pos, msg) => eprint $ T.S.ppos2str pos ^ ": " ^ msg
+ | P.TkExpected v => P.tkExpectedPrint v
| _ => otherExn e;
exit 255)
- end
end
-
-val () = MLton.Exn.setTopLevelHandler GlobalExnHandler.handler
diff --git a/exn_handler.sig b/exn_handler.sig
new file mode 100644
index 0000000..ac5c574
--- /dev/null
+++ b/exn_handler.sig
@@ -0,0 +1,3 @@
+signature EXN_HANDLER = sig
+ val handler: exn -> unit
+end
diff --git a/tokenizer.sml b/tokenizer.fun
index 10c2e77..5cca203 100644
--- a/tokenizer.sml
+++ b/tokenizer.fun
@@ -1,4 +1,8 @@
-structure Tokenizer:> TOKENIZER = struct
+functor Tokenizer(structure H: HASHTABLE; structure S: STREAM)
+ : TOKENIZER =
+struct
+
+ structure S = S
datatype intType = ItDec | ItOct | ItHex
datatype intSfx = IsNone | IsU | IsL | IsUL | IsLL | IsULL
@@ -120,12 +124,12 @@ structure Tokenizer:> TOKENIZER = struct
val kwPrefix = #"@"
val cppPrefix = #"$"
- type fullToken = Stream.pos * token
+ type fullToken = S.pos * token
datatype tkErrorAuxInfo = TkiEOF | TkiDx of int | TkiStart
exception TkError of tkErrorAuxInfo * string
- exception TkErrorAug of Stream.ppos * string
+ exception TkErrorAug of S.ppos * string
exception ExpectedCppDir (* handled in postprocess *)
@@ -425,11 +429,11 @@ structure Tokenizer:> TOKENIZER = struct
fun fsmEat stream =
let
open Array
- val pos = Stream.getPos stream
+ val pos = S.getPos stream
fun get curState stream =
let
- val (c, stream) = Stream.getchar stream
+ val (c, stream) = S.getchar stream
in
case c of
NONE => (#1 $ sub (#2 $ fsmTable (), curState), stream)
@@ -439,7 +443,7 @@ structure Tokenizer:> TOKENIZER = struct
val nextState = sub (row, ord c)
in
if nextState = ~1 then
- (tk, Stream.ungetc stream)
+ (tk, S.ungetc stream)
else
get nextState stream
end
@@ -450,32 +454,31 @@ structure Tokenizer:> TOKENIZER = struct
fun tkError2aug stream (dx, msg) =
let
- val (id, pos) = Stream.getPosAfterCharRead stream
- val pos = Stream.pos2ppos (id, pos + dx) stream
+ val (id, pos) = S.getPosAfterCharRead stream
+ val pos = S.pos2ppos (id, pos + dx) stream
in
TkErrorAug (pos, msg)
end
- fun parseGeneric stream parser acc =
+ fun parserWrapper stream parser acc =
let
- val stream = Stream.ungetc stream
- val P as (_, startOff) = Stream.getPos stream
+ val stream = S.ungetc stream
+ val P as (_, startOff) = S.getPos stream
fun parse' stream acc = let
- val (c, stream) = Stream.getchar stream
+ val (c, stream) = S.getchar stream
val (acc, tk, stream) = parser acc (stream, startOff) c handle
TkError (TkiDx dx, msg) => raise tkError2aug stream (dx, msg)
| TkError (TkiStart, msg) =>
let
- val startPos = Stream.pos2ppos P stream
+ val startPos = S.pos2ppos P stream
in
raise TkErrorAug (startPos, msg)
end
| TkError (TkiEOF, msg) =>
let
- open Stream
- val pos = pposWithoutCol $ pos2ppos P stream
+ val pos = S.pposWithoutCol $ S.pos2ppos P stream
in
raise TkErrorAug (pos, msg)
end
@@ -492,21 +495,20 @@ structure Tokenizer:> TOKENIZER = struct
fun finishSeqRead startOff stream =
let
- val (_, endOff) = Stream.getPos stream
- val s = Stream.getSubstr startOff endOff stream
+ val (_, endOff) = S.getPos stream
+ val s = S.getSubstr startOff endOff stream
in
s
end
fun keywordHashtableGen () =
let
- open Hashtable
- val table = create 128
+ val table = H.create 128
val () =
List.app
(fn (tk, repr) =>
if String.sub (repr, 0) = kwPrefix then
- insert table (String.extract (repr, 1, NONE)) tk
+ H.insert table (String.extract (repr, 1, NONE)) tk
else
())
tokenRepr
@@ -517,7 +519,7 @@ structure Tokenizer:> TOKENIZER = struct
val keywordHashtable = lazy keywordHashtableGen
fun findKeyword str =
- case Hashtable.lookup (keywordHashtable ()) str of
+ case H.lookup (keywordHashtable ()) str of
NONE => Id str
| SOME tk => tk
@@ -537,7 +539,7 @@ structure Tokenizer:> TOKENIZER = struct
if isIdBody c then
((), NONE, stream)
else
- finalize (Stream.ungetc stream)
+ finalize (S.ungetc stream)
end
datatype intMode = ImDec | ImOct | ImInvalidOct | ImHex
@@ -547,7 +549,7 @@ structure Tokenizer:> TOKENIZER = struct
fun getLongestSeq acc pred stream =
let
- val (c, stream') = Stream.getchar stream
+ val (c, stream') = S.getchar stream
in
if isSome c andalso pred $ valOf c then
getLongestSeq (valOf c :: acc) pred stream'
@@ -588,7 +590,7 @@ structure Tokenizer:> TOKENIZER = struct
fun numParser NpInit (stream, _) (SOME c) =
if c = #"0" then
let
- val (c, stream') = Stream.getchar stream
+ val (c, stream') = S.getchar stream
in
case c of
NONE =>
@@ -617,7 +619,7 @@ structure Tokenizer:> TOKENIZER = struct
fun finish () =
let
val () = checkAndRaise ImInvalidOct "invalid octal constant"
- val stream = Stream.ungetc stream
+ val stream = S.ungetc stream
val str = finishSeqRead (startOff + offset) stream
val (sfx, stream) = getIntSuffix stream
in
@@ -640,9 +642,9 @@ structure Tokenizer:> TOKENIZER = struct
end
| numParser (FloatMode FmDot) (stream, startOff) _ =
let
- val (len, stream) = skipDigitSeq 0 $ Stream.ungetc stream
+ val (len, stream) = skipDigitSeq 0 $ S.ungetc stream
- val (c, stream') = Stream.getchar stream
+ val (c, stream') = S.getchar stream
fun finish () =
let
@@ -666,7 +668,7 @@ structure Tokenizer:> TOKENIZER = struct
if c = NONE then
raise TkError (TkiDx 0, "expected digit")
else if valOf c <> #"+" andalso valOf c <> #"-" then
- (0, Stream.ungetc stream)
+ (0, S.ungetc stream)
else
(~1, stream)
@@ -694,7 +696,7 @@ structure Tokenizer:> TOKENIZER = struct
(SOME $ chr acc, stream)
else
let
- val (c, stream) = Stream.getchar stream
+ val (c, stream) = S.getchar stream
in
case c of
NONE => (SOME $ chr acc, stream)
@@ -702,7 +704,7 @@ structure Tokenizer:> TOKENIZER = struct
if isOctal c then
follow stream (acc * 8 + chrIntVal c) (count + 1)
else
- (SOME $ chr acc, Stream.ungetc stream)
+ (SOME $ chr acc, S.ungetc stream)
end
in
follow stream (chrIntVal c) 1
@@ -712,7 +714,7 @@ structure Tokenizer:> TOKENIZER = struct
let
fun follow stream acc count =
let
- val (c, stream) = Stream.getchar stream
+ val (c, stream) = S.getchar stream
val noHex = TkError (TkiDx 0, "\\x without hex digits")
in
@@ -732,7 +734,7 @@ structure Tokenizer:> TOKENIZER = struct
if count = 0 then
raise noHex
else
- (SOME $ chr acc, Stream.ungetc stream)
+ (SOME $ chr acc, S.ungetc stream)
end
in
follow stream 0 0
@@ -742,7 +744,7 @@ structure Tokenizer:> TOKENIZER = struct
let
fun raiseErr0 msg = raise TkError (TkiDx 0, msg)
- val (c, stream) = Stream.getchar stream
+ val (c, stream) = S.getchar stream
val c =
case c of
NONE => raiseErr0 "unfinished escape sequence"
@@ -866,14 +868,13 @@ structure Tokenizer:> TOKENIZER = struct
in
(prevPos, formCppDir tk') handle
ExpectedCppDir =>
- raise TkErrorAug (Stream.pos2ppos pos stream,
+ raise TkErrorAug (S.pos2ppos pos stream,
"expected preprocessor directive")
end
fun unexpectedCharRaise stream c =
let
- open Stream
- val pos = pos2ppos (getPosAfterCharRead stream) stream
+ val pos = S.pos2ppos (S.getPosAfterCharRead stream) stream
val repr =
if isPrintable c then
str c
@@ -888,10 +889,10 @@ structure Tokenizer:> TOKENIZER = struct
fun skip prevIsAsterisk stream =
let
val (c, stream) =
- case Stream.getchar stream of
+ case S.getchar stream of
(NONE, _) =>
let
- val pos = Stream.pos2ppos pos stream
+ val pos = S.pos2ppos pos stream
in
raise TkErrorAug (pos, "unfinished comment")
end
@@ -908,12 +909,12 @@ structure Tokenizer:> TOKENIZER = struct
fun handleBackslash stream =
let
- val (c, stream) = Stream.getchar stream
+ val (c, stream) = S.getchar stream
val raiseErr = fn () =>
let
- val pos = Stream.getPosAfterCharRead stream
- val pos = Stream.pos2ppos pos stream
+ val pos = S.getPosAfterCharRead stream
+ val pos = S.pos2ppos pos stream
in
raise TkErrorAug (pos, "expected \\n after backslash")
end
@@ -929,19 +930,19 @@ structure Tokenizer:> TOKENIZER = struct
fun processSymbol stream =
let
- val (T as (p, tk), stream) = fsmEat $ Stream.ungetc stream
+ val (T as (p, tk), stream) = fsmEat $ S.ungetc stream
in
case tk of
CommentStart => getToken $ skipComment stream p
- | DoubleDot => (SOME (p, Dot), Stream.ungetc stream)
+ | DoubleDot => (SOME (p, Dot), S.ungetc stream)
| Hash =>
- if Stream.isFirstOnLine p stream then
+ if S.isFirstOnLine p stream then
let
val (tk, stream) = getToken stream
in
case tk of
NONE =>
- raise TkErrorAug (Stream.pos2ppos p stream,
+ raise TkErrorAug (S.pos2ppos p stream,
"unfinished preprecessor directive")
| SOME tk =>
(SOME $ handleCppDir tk p stream, stream)
@@ -953,16 +954,16 @@ structure Tokenizer:> TOKENIZER = struct
and getToken stream =
let
- val (c, stream) = Stream.getchar stream
+ val (c, stream) = S.getchar stream
fun @-> parser acc =
- (fn (tk, s) => (SOME tk, s)) $ parseGeneric stream parser acc
+ (fn (tk, s) => (SOME tk, s)) $ parserWrapper stream parser acc
in
case c of
NONE => (NONE, stream)
| SOME c =>
if c = #"\n" then
- (SOME (Stream.getPosAfterCharRead stream, NewLine), stream)
+ (SOME (S.getPosAfterCharRead stream, NewLine), stream)
else if Char.isSpace c then
getToken stream
else if isIdStart c then
diff --git a/tokenizer.sig b/tokenizer.sig
index 245cfbe..c9626e0 100644
--- a/tokenizer.sig
+++ b/tokenizer.sig
@@ -1,4 +1,6 @@
signature TOKENIZER = sig
+ structure S: STREAM
+
datatype intType = ItDec | ItOct | ItHex
datatype intSfx = IsNone | IsU | IsL | IsUL | IsLL | IsULL
datatype floatSfx = FsNone | FsF | FsL
@@ -116,15 +118,15 @@ signature TOKENIZER = sig
CppError |
CppPragma
- type fullToken = Stream.pos * token
+ type fullToken = S.pos * token
(* Fatal. both may be thrown by tokenize *)
exception FsmTableIsTooSmall
- exception TkErrorAug of Stream.ppos * string
+ exception TkErrorAug of S.ppos * string
- val getToken: Stream.t -> fullToken option * Stream.t
+ val getToken: S.t -> fullToken option * S.t
- val tokenize: Stream.t -> fullToken list
+ val tokenize: S.t -> fullToken list
val token2str: token -> string
val printToken: token -> unit