diff options
Diffstat (limited to 'cpp.sml')
-rw-r--r-- | cpp.sml | 198 |
1 files changed, 147 insertions, 51 deletions
@@ -9,6 +9,34 @@ datatype token = Id of string | CharConst of string * int | + kwBreak | + kwCase | + kwChar | + kwConst | + kwContinue | + kwDefault | + kwDouble | + kwElse | + kwEnum | + kwExtern | + kwFloat | + kwFor | + kwGoto | + kwInt | + kwLong | + kwRegister | + kwReturn | + kwShort | + kwSigned | + kwSizeof | + kwStruct | + kwSwitch | + kwTypedef | + kwUnion | + kwUnsigned | + kwVoid | + kwVolatile | + OParen | CParen | @@ -32,10 +60,46 @@ datatype token = CppInclude | CppDefine -val tokenRepr = [ +val kwPrefix = #"@" +val cppPrefix = #"$" + +val tokenRepr = +let + fun & repr = str kwPrefix ^ repr + fun % repr = str cppPrefix ^ repr +in + [ (OParen, "("), (CParen, ")"), + (kwBreak, &"break"), + (kwCase, &"case"), + (kwChar, &"char"), + (kwConst, &"const"), + (kwContinue, &"continue"), + (kwDefault, &"default"), + (kwDouble, &"double"), + (kwElse, &"else"), + (kwEnum, &"enum"), + (kwExtern, &"extern"), + (kwFloat, &"float"), + (kwFor, &"for"), + (kwGoto, &"goto"), + (kwInt, &"int"), + (kwLong, &"long"), + (kwRegister, &"register"), + (kwReturn, &"return"), + (kwShort, &"short"), + (kwSigned, &"signed"), + (kwSizeof, &"sizeof"), + (kwStruct, &"struct"), + (kwSwitch, &"switch"), + (kwTypedef, &"typedef"), + (kwUnion, &"union"), + (kwUnsigned, &"unsigned"), + (kwVoid, &"void"), + (kwVolatile, &"volatile"), + (Plus, "+"), (DoublePlus, "++"), (Minus, "-"), @@ -53,19 +117,17 @@ val tokenRepr = [ (Hash, "#"), (DoubleHash, "##"), - (CppInclude, "$include"), - (CppDefine, "$define") + (CppInclude, % "include"), + (CppDefine, % "define") ] - -fun isCppDir CppInclude = true - | isCppDir _ = false +end exception TokenWithoutRepr val printToken = fn (Number s) => printLn $ "Num: " ^ s | (Id s) => printLn $ "Id: " ^ s - | (CharConst (repr, _)) => printLn $ "chr: " ^ repr + | (CharConst (repr, _)) => printLn repr | v => case List.find (fn (x, _) => x = v) tokenRepr of SOME (_, repr) => printLn repr @@ -128,7 +190,7 @@ in end fun getchar - (S as { stack = (id, off, contents) :: rest, allFiles }: stream) + ({ stack = (id, off, contents) :: rest, allFiles }: stream) : (char * stream) = if off < String.size contents then (String.sub (contents, off), @@ -138,21 +200,21 @@ fun getchar | getchar _ = raise EndOfStream fun ungetc - (S as { stack = (id, off, contents) :: rest, allFiles }) = + ({ stack = (id, off, contents) :: rest, allFiles }) = if off = 0 then raise InvalidStream else { stack = (id, off - 1, contents) :: rest, allFiles } | ungetc _ = raise InvalidStream -fun getOffset ({ stack = (id, off, contents) :: _, ... }: stream) = off +fun getOffset ({ stack = (_, off, _) :: _, ... }: stream) = off | getOffset _ = raise InvalidStream fun getPosAfterCharRead - ({ stack = (id, off, contents) :: _, ... }: stream) = (id, off - 1) + ({ stack = (id, off, _) :: _, ... }: stream) = (id, off - 1) | getPosAfterCharRead _ = raise InvalidStream -fun getPos ({ stack = (id, off, contents) :: _, ... }: stream) = (id, off) +fun getPos ({ stack = (id, off, _) :: _, ... }: stream) = (id, off) | getPos _ = raise InvalidStream fun getSubstr startOff endOff @@ -161,7 +223,7 @@ fun getSubstr startOff endOff | getSubstr _ _ _ = raise InvalidStream fun advanceToNewFile - (S as { stack = (id, off, contents) :: rest, allFiles }: stream) = + ({ stack = (_, off, contents) :: rest, allFiles }: stream) = if off = String.size contents then { stack = rest, allFiles } else @@ -175,9 +237,17 @@ in { allFiles = [(0, fname, contents)], stack = [(0, 0, contents)] } end +fun isSpace c = c = #" " orelse c = #"\t" orelse c = #"\n" +fun isLetter c = + ord c >= ord #"a" andalso ord c <= ord #"z" orelse + ord c >= ord #"A" andalso ord c <= ord #"Z" +fun isIdStart c = c = #"_" orelse isLetter c +fun isDigit c = ord c >= ord #"0" andalso ord c <= ord #"9" +fun isIdBody c = isIdStart c orelse isDigit c + (* FSM for parsing symbols *) -val maxStates = 21 +val maxStates = 16 exception FsmTableIsTooSmall exception FsmTableIsTooLarge of int @@ -197,7 +267,7 @@ in fsmInsert (nextState, buf) (!nextState - 1) tk cs ) end - | fsmInsert (nextState, buf) curState tk [] = + | fsmInsert (_, buf) curState tk [] = let open Array val (_, row) = sub (buf, curState) @@ -205,6 +275,30 @@ in update (buf, curState, (tk, row)) end +fun isStartForFsmGen () = +let + open Array + + val lookupTable = array (128, false) + + fun firstChr s = ord $ String.sub (s, 0) + val () = List.app + (fn (_, repr) => update (lookupTable, firstChr repr, true)) + $ List.filter + (fn (_, repr) => + let + val c = String.sub (repr, 0) + in + c <> kwPrefix andalso c <> cppPrefix + end) + tokenRepr + +in + fn c => sub (lookupTable, ord c) +end + +val isStartForFsm = isStartForFsmGen () + fun fsmTableCreate () = let open Array @@ -214,8 +308,12 @@ let val r = ref 1 fun filterNeeded [] acc = acc - | filterNeeded ((T as (tk, _)) :: tks) acc = - filterNeeded tks (if isCppDir tk then acc else T :: acc) + | filterNeeded ((T as (tk, repr)) :: tks) acc = + filterNeeded tks + (if isStartForFsm $ String.sub (repr, 0) then + (printToken tk; T :: acc) + else + acc) val tokenRepr = filterNeeded tokenRepr [] @@ -234,25 +332,8 @@ in T end -fun isStartForFsmGen () = -let - open Array - - val lookupTable = array (128, false) - - fun firstChr s = ord $ String.sub (s, 0) - val () = List.app - (fn (_, repr) => update (lookupTable, firstChr repr, true)) tokenRepr -in - fn c => sub (lookupTable, ord c) -end - -val isStartForFsm = isStartForFsmGen () - val fsmTable = fsmTableCreate () -val () = printLn $ "states: " ^ Int.toString (!(#1 fsmTable)) - fun fsmEat stream = let open Array @@ -283,14 +364,6 @@ end exception UnexpectedCharacter of char -fun isSpace c = c = #" " orelse c = #"\t" orelse c = #"\n" -fun isLetter c = - ord c >= ord #"a" andalso ord c <= ord #"z" orelse - ord c >= ord #"A" andalso ord c <= ord #"Z" -fun isIdStart c = c = #"_" orelse isLetter c -fun isDigit c = ord c >= ord #"0" andalso ord c <= ord #"9" -fun isIdBody c = isIdStart c orelse isDigit c - fun isFirstOnLine (tk: fullToken) (stream: stream) = let val (id, offset) = #1 tk @@ -319,7 +392,7 @@ end fun parseGeneric stream parser acc = let val stream = ungetc stream - val P as (fid, startOff) = getPos stream + val P as (_, startOff) = getPos stream fun parse' stream acc = let val (c, stream) = (fn (c, s) => (SOME c, s)) $ getchar stream handle @@ -345,10 +418,25 @@ in s end +fun findKeyword s = + case List.find + (fn (_, repr) => + String.sub (repr, 0) = kwPrefix andalso + String.extract (repr, 1, NONE) = s) + tokenRepr + of + SOME (tk, _) => tk + | NONE => Id s + fun idParser () (stream, startOff) c = let fun finalize stream = - ((), SOME $ Id (finishSeqRead startOff stream), stream) + let + val s = finishSeqRead startOff stream + val tk = findKeyword s + in + ((), SOME tk, stream) + end in case c of NONE => finalize stream @@ -359,15 +447,23 @@ in finalize (ungetc stream) end -fun formCppDir tk tkl = +val formCppDirExn = Fail $ "Expected cpp direcive\n" + +fun formCppDir (Id s) tkl = let + open String fun conv tk = ((#1 $ hd tkl, tk) :: tl tkl) in - case tk of - Id "include" => conv CppInclude - | Id "define" => conv CppDefine - | _ => raise Fail $ "Expected cpp direcive\n" + case List.find + (fn (_, repr) => + sub (repr, 0) = cppPrefix andalso + extract (repr, 1, NONE) = s) + tokenRepr + of + SOME (tk, _) => conv tk + | NONE => raise formCppDirExn end + | formCppDir _ _ = raise formCppDirExn fun numParser () (stream, startOff) c = let @@ -427,9 +523,9 @@ fun charParser ChInit (stream, _) (SOME c) = SOME $ CharConst (finishSeqRead startOff stream, v), stream) else raise chExnExpTerm - | charParser chTerm _ (SOME _) = + | charParser ChTerm _ (SOME _) = raise chExnAfterTerm - | charParser state (stream, _) NONE = + | charParser state (_, _) NONE = raise case state of ChInit => chExnStart | ChStart => chExnExpValue |