summaryrefslogtreecommitdiff
path: root/cpp.sml
diff options
context:
space:
mode:
Diffstat (limited to 'cpp.sml')
-rw-r--r--cpp.sml198
1 files changed, 147 insertions, 51 deletions
diff --git a/cpp.sml b/cpp.sml
index 168df7f..1050c03 100644
--- a/cpp.sml
+++ b/cpp.sml
@@ -9,6 +9,34 @@ datatype token =
Id of string |
CharConst of string * int |
+ kwBreak |
+ kwCase |
+ kwChar |
+ kwConst |
+ kwContinue |
+ kwDefault |
+ kwDouble |
+ kwElse |
+ kwEnum |
+ kwExtern |
+ kwFloat |
+ kwFor |
+ kwGoto |
+ kwInt |
+ kwLong |
+ kwRegister |
+ kwReturn |
+ kwShort |
+ kwSigned |
+ kwSizeof |
+ kwStruct |
+ kwSwitch |
+ kwTypedef |
+ kwUnion |
+ kwUnsigned |
+ kwVoid |
+ kwVolatile |
+
OParen |
CParen |
@@ -32,10 +60,46 @@ datatype token =
CppInclude |
CppDefine
-val tokenRepr = [
+val kwPrefix = #"@"
+val cppPrefix = #"$"
+
+val tokenRepr =
+let
+ fun & repr = str kwPrefix ^ repr
+ fun % repr = str cppPrefix ^ repr
+in
+ [
(OParen, "("),
(CParen, ")"),
+ (kwBreak, &"break"),
+ (kwCase, &"case"),
+ (kwChar, &"char"),
+ (kwConst, &"const"),
+ (kwContinue, &"continue"),
+ (kwDefault, &"default"),
+ (kwDouble, &"double"),
+ (kwElse, &"else"),
+ (kwEnum, &"enum"),
+ (kwExtern, &"extern"),
+ (kwFloat, &"float"),
+ (kwFor, &"for"),
+ (kwGoto, &"goto"),
+ (kwInt, &"int"),
+ (kwLong, &"long"),
+ (kwRegister, &"register"),
+ (kwReturn, &"return"),
+ (kwShort, &"short"),
+ (kwSigned, &"signed"),
+ (kwSizeof, &"sizeof"),
+ (kwStruct, &"struct"),
+ (kwSwitch, &"switch"),
+ (kwTypedef, &"typedef"),
+ (kwUnion, &"union"),
+ (kwUnsigned, &"unsigned"),
+ (kwVoid, &"void"),
+ (kwVolatile, &"volatile"),
+
(Plus, "+"),
(DoublePlus, "++"),
(Minus, "-"),
@@ -53,19 +117,17 @@ val tokenRepr = [
(Hash, "#"),
(DoubleHash, "##"),
- (CppInclude, "$include"),
- (CppDefine, "$define")
+ (CppInclude, % "include"),
+ (CppDefine, % "define")
]
-
-fun isCppDir CppInclude = true
- | isCppDir _ = false
+end
exception TokenWithoutRepr
val printToken = fn
(Number s) => printLn $ "Num: " ^ s
| (Id s) => printLn $ "Id: " ^ s
- | (CharConst (repr, _)) => printLn $ "chr: " ^ repr
+ | (CharConst (repr, _)) => printLn repr
| v =>
case List.find (fn (x, _) => x = v) tokenRepr of
SOME (_, repr) => printLn repr
@@ -128,7 +190,7 @@ in
end
fun getchar
- (S as { stack = (id, off, contents) :: rest, allFiles }: stream)
+ ({ stack = (id, off, contents) :: rest, allFiles }: stream)
: (char * stream) =
if off < String.size contents then
(String.sub (contents, off),
@@ -138,21 +200,21 @@ fun getchar
| getchar _ = raise EndOfStream
fun ungetc
- (S as { stack = (id, off, contents) :: rest, allFiles }) =
+ ({ stack = (id, off, contents) :: rest, allFiles }) =
if off = 0 then
raise InvalidStream
else
{ stack = (id, off - 1, contents) :: rest, allFiles }
| ungetc _ = raise InvalidStream
-fun getOffset ({ stack = (id, off, contents) :: _, ... }: stream) = off
+fun getOffset ({ stack = (_, off, _) :: _, ... }: stream) = off
| getOffset _ = raise InvalidStream
fun getPosAfterCharRead
- ({ stack = (id, off, contents) :: _, ... }: stream) = (id, off - 1)
+ ({ stack = (id, off, _) :: _, ... }: stream) = (id, off - 1)
| getPosAfterCharRead _ = raise InvalidStream
-fun getPos ({ stack = (id, off, contents) :: _, ... }: stream) = (id, off)
+fun getPos ({ stack = (id, off, _) :: _, ... }: stream) = (id, off)
| getPos _ = raise InvalidStream
fun getSubstr startOff endOff
@@ -161,7 +223,7 @@ fun getSubstr startOff endOff
| getSubstr _ _ _ = raise InvalidStream
fun advanceToNewFile
- (S as { stack = (id, off, contents) :: rest, allFiles }: stream) =
+ ({ stack = (_, off, contents) :: rest, allFiles }: stream) =
if off = String.size contents then
{ stack = rest, allFiles }
else
@@ -175,9 +237,17 @@ in
{ allFiles = [(0, fname, contents)], stack = [(0, 0, contents)] }
end
+fun isSpace c = c = #" " orelse c = #"\t" orelse c = #"\n"
+fun isLetter c =
+ ord c >= ord #"a" andalso ord c <= ord #"z" orelse
+ ord c >= ord #"A" andalso ord c <= ord #"Z"
+fun isIdStart c = c = #"_" orelse isLetter c
+fun isDigit c = ord c >= ord #"0" andalso ord c <= ord #"9"
+fun isIdBody c = isIdStart c orelse isDigit c
+
(* FSM for parsing symbols *)
-val maxStates = 21
+val maxStates = 16
exception FsmTableIsTooSmall
exception FsmTableIsTooLarge of int
@@ -197,7 +267,7 @@ in
fsmInsert (nextState, buf) (!nextState - 1) tk cs
)
end
- | fsmInsert (nextState, buf) curState tk [] =
+ | fsmInsert (_, buf) curState tk [] =
let
open Array
val (_, row) = sub (buf, curState)
@@ -205,6 +275,30 @@ in
update (buf, curState, (tk, row))
end
+fun isStartForFsmGen () =
+let
+ open Array
+
+ val lookupTable = array (128, false)
+
+ fun firstChr s = ord $ String.sub (s, 0)
+ val () = List.app
+ (fn (_, repr) => update (lookupTable, firstChr repr, true))
+ $ List.filter
+ (fn (_, repr) =>
+ let
+ val c = String.sub (repr, 0)
+ in
+ c <> kwPrefix andalso c <> cppPrefix
+ end)
+ tokenRepr
+
+in
+ fn c => sub (lookupTable, ord c)
+end
+
+val isStartForFsm = isStartForFsmGen ()
+
fun fsmTableCreate () =
let
open Array
@@ -214,8 +308,12 @@ let
val r = ref 1
fun filterNeeded [] acc = acc
- | filterNeeded ((T as (tk, _)) :: tks) acc =
- filterNeeded tks (if isCppDir tk then acc else T :: acc)
+ | filterNeeded ((T as (tk, repr)) :: tks) acc =
+ filterNeeded tks
+ (if isStartForFsm $ String.sub (repr, 0) then
+ (printToken tk; T :: acc)
+ else
+ acc)
val tokenRepr = filterNeeded tokenRepr []
@@ -234,25 +332,8 @@ in
T
end
-fun isStartForFsmGen () =
-let
- open Array
-
- val lookupTable = array (128, false)
-
- fun firstChr s = ord $ String.sub (s, 0)
- val () = List.app
- (fn (_, repr) => update (lookupTable, firstChr repr, true)) tokenRepr
-in
- fn c => sub (lookupTable, ord c)
-end
-
-val isStartForFsm = isStartForFsmGen ()
-
val fsmTable = fsmTableCreate ()
-val () = printLn $ "states: " ^ Int.toString (!(#1 fsmTable))
-
fun fsmEat stream =
let
open Array
@@ -283,14 +364,6 @@ end
exception UnexpectedCharacter of char
-fun isSpace c = c = #" " orelse c = #"\t" orelse c = #"\n"
-fun isLetter c =
- ord c >= ord #"a" andalso ord c <= ord #"z" orelse
- ord c >= ord #"A" andalso ord c <= ord #"Z"
-fun isIdStart c = c = #"_" orelse isLetter c
-fun isDigit c = ord c >= ord #"0" andalso ord c <= ord #"9"
-fun isIdBody c = isIdStart c orelse isDigit c
-
fun isFirstOnLine (tk: fullToken) (stream: stream) =
let
val (id, offset) = #1 tk
@@ -319,7 +392,7 @@ end
fun parseGeneric stream parser acc =
let
val stream = ungetc stream
- val P as (fid, startOff) = getPos stream
+ val P as (_, startOff) = getPos stream
fun parse' stream acc = let
val (c, stream) = (fn (c, s) => (SOME c, s)) $ getchar stream handle
@@ -345,10 +418,25 @@ in
s
end
+fun findKeyword s =
+ case List.find
+ (fn (_, repr) =>
+ String.sub (repr, 0) = kwPrefix andalso
+ String.extract (repr, 1, NONE) = s)
+ tokenRepr
+ of
+ SOME (tk, _) => tk
+ | NONE => Id s
+
fun idParser () (stream, startOff) c =
let
fun finalize stream =
- ((), SOME $ Id (finishSeqRead startOff stream), stream)
+ let
+ val s = finishSeqRead startOff stream
+ val tk = findKeyword s
+ in
+ ((), SOME tk, stream)
+ end
in
case c of
NONE => finalize stream
@@ -359,15 +447,23 @@ in
finalize (ungetc stream)
end
-fun formCppDir tk tkl =
+val formCppDirExn = Fail $ "Expected cpp direcive\n"
+
+fun formCppDir (Id s) tkl =
let
+ open String
fun conv tk = ((#1 $ hd tkl, tk) :: tl tkl)
in
- case tk of
- Id "include" => conv CppInclude
- | Id "define" => conv CppDefine
- | _ => raise Fail $ "Expected cpp direcive\n"
+ case List.find
+ (fn (_, repr) =>
+ sub (repr, 0) = cppPrefix andalso
+ extract (repr, 1, NONE) = s)
+ tokenRepr
+ of
+ SOME (tk, _) => conv tk
+ | NONE => raise formCppDirExn
end
+ | formCppDir _ _ = raise formCppDirExn
fun numParser () (stream, startOff) c =
let
@@ -427,9 +523,9 @@ fun charParser ChInit (stream, _) (SOME c) =
SOME $ CharConst (finishSeqRead startOff stream, v), stream)
else
raise chExnExpTerm
- | charParser chTerm _ (SOME _) =
+ | charParser ChTerm _ (SOME _) =
raise chExnAfterTerm
- | charParser state (stream, _) NONE =
+ | charParser state (_, _) NONE =
raise case state of
ChInit => chExnStart
| ChStart => chExnExpValue