summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--cpp.sml3
-rw-r--r--stream.sig2
-rw-r--r--tokenizer.sig2
-rw-r--r--tokenizer.sml248
4 files changed, 142 insertions, 113 deletions
diff --git a/cpp.sml b/cpp.sml
index b2762b5..e2ffc2c 100644
--- a/cpp.sml
+++ b/cpp.sml
@@ -4,8 +4,7 @@ let
val tkl = Tokenizer.tokenize stream
val fileInfo = Stream.convert stream
in
- List.app
- (fn (p, x) => (Stream.printPos fileInfo p; Tokenizer.printToken x)) tkl
+ Tokenizer.printTokens tkl (#2 fileInfo)
end
| main _ = printLn "Expected a signle argument: file name"
diff --git a/stream.sig b/stream.sig
index c09b7c1..795d193 100644
--- a/stream.sig
+++ b/stream.sig
@@ -5,7 +5,7 @@ signature STREAM = sig
type ppos (* pretty pos *)
type t
- type fileInfo
+ type fileInfo = fileId * string * string
val convert: t -> fileInfo
diff --git a/tokenizer.sig b/tokenizer.sig
index dd0eab4..ffbec52 100644
--- a/tokenizer.sig
+++ b/tokenizer.sig
@@ -8,4 +8,6 @@ signature TOKENIZER = sig
val tokenize: Stream.t -> fullToken list
val printToken: token -> unit
+
+ val printTokens: fullToken list -> string -> unit
end
diff --git a/tokenizer.sml b/tokenizer.sml
index 8b1ace4..ba9d0df 100644
--- a/tokenizer.sml
+++ b/tokenizer.sml
@@ -1,108 +1,110 @@
structure Tokenizer:> TOKENIZER = struct
datatype token =
- Invalid |
- Number of string |
- Id of string |
- CharConst of string * int |
- StringConst of string |
-
- kwBreak |
- kwCase |
- kwChar |
- kwConst |
- kwContinue |
- kwDefault |
- kwDouble |
- kwElse |
- kwEnum |
- kwExtern |
- kwFloat |
- kwFor |
- kwGoto |
- kwInt |
- kwLong |
- kwRegister |
- kwReturn |
- kwShort |
- kwSigned |
- kwSizeof |
- kwStruct |
- kwSwitch |
- kwTypedef |
- kwUnion |
- kwUnsigned |
- kwVoid |
- kwVolatile |
-
- LParen |
- RParen |
- LBracket |
- RBracket |
- LBrace |
- RBrace |
-
- QuestionMark |
- Colon |
- Coma |
- Semicolon |
-
- Arrow |
- Plus |
- DoublePlus|
- Minus |
- DoubleMinus |
- Ampersand |
- Asterisk |
- Slash |
- Tilde |
- ExclMark |
- Percent |
- DoubleGreater |
- DoubleLess |
- Greater |
- Less |
- EqualSign |
- LessEqualSign |
- GreaterEqualSign |
- DoubleEqualSign |
- ExclMarkEqualSign |
- Cap |
- VerticalBar |
- DoubleAmpersand |
- DoubleVerticalBar |
-
- AsteriskEqualSign |
- SlashEqualSign |
- PercentEqualSign |
- PlusEqualSign |
- MinusEqualSign |
- DoubleLessEqualSign |
- DoubleGreaterEqualSign |
- AmpersandEqualSign |
- CapEqualSign |
- VerticalBarEqualSign |
-
- Hash |
- DoubleHash |
-
- Dot |
- DoubleDot |
- TripleDot |
-
- CommentStart |
-
- CppInclude |
- CppDefine |
- CppUndef |
- CppIf |
- CppIfdef |
- CppIfndef |
- CppElse |
- CppElif |
- CppEndif |
- CppWarning |
- CppError |
- CppPragma
+ Invalid |
+ NewLine |
+
+ Number of string |
+ Id of string |
+ CharConst of string * int |
+ StringConst of string |
+
+ kwBreak |
+ kwCase |
+ kwChar |
+ kwConst |
+ kwContinue |
+ kwDefault |
+ kwDouble |
+ kwElse |
+ kwEnum |
+ kwExtern |
+ kwFloat |
+ kwFor |
+ kwGoto |
+ kwInt |
+ kwLong |
+ kwRegister |
+ kwReturn |
+ kwShort |
+ kwSigned |
+ kwSizeof |
+ kwStruct |
+ kwSwitch |
+ kwTypedef |
+ kwUnion |
+ kwUnsigned |
+ kwVoid |
+ kwVolatile |
+
+ LParen |
+ RParen |
+ LBracket |
+ RBracket |
+ LBrace |
+ RBrace |
+
+ QuestionMark |
+ Colon |
+ Coma |
+ Semicolon |
+
+ Arrow |
+ Plus |
+ DoublePlus|
+ Minus |
+ DoubleMinus |
+ Ampersand |
+ Asterisk |
+ Slash |
+ Tilde |
+ ExclMark |
+ Percent |
+ DoubleGreater |
+ DoubleLess |
+ Greater |
+ Less |
+ EqualSign |
+ LessEqualSign |
+ GreaterEqualSign |
+ DoubleEqualSign |
+ ExclMarkEqualSign |
+ Cap |
+ VerticalBar |
+ DoubleAmpersand |
+ DoubleVerticalBar |
+
+ AsteriskEqualSign |
+ SlashEqualSign |
+ PercentEqualSign |
+ PlusEqualSign |
+ MinusEqualSign |
+ DoubleLessEqualSign |
+ DoubleGreaterEqualSign |
+ AmpersandEqualSign |
+ CapEqualSign |
+ VerticalBarEqualSign |
+
+ Hash |
+ DoubleHash |
+
+ Dot |
+ DoubleDot |
+ TripleDot |
+
+ CommentStart |
+
+ CppInclude |
+ CppDefine |
+ CppUndef |
+ CppIf |
+ CppIfdef |
+ CppIfndef |
+ CppElse |
+ CppElif |
+ CppEndif |
+ CppWarning |
+ CppError |
+ CppPragma
val kwPrefix = #"@"
val cppPrefix = #"$"
@@ -127,6 +129,8 @@ structure Tokenizer:> TOKENIZER = struct
fun % repr = str cppPrefix ^ repr
in
[
+ (NewLine, "NewLine"),
+
(kwBreak, &"break"),
(kwCase, &"case"),
(kwChar, &"char"),
@@ -228,14 +232,14 @@ structure Tokenizer:> TOKENIZER = struct
end
val printToken = fn
- Number s => printLn $ "Num: " ^ s
- | Id s => printLn $ "Id: " ^ s
- | CharConst (repr, _) => printLn repr
+ Number s => print $ "`" ^ s ^ "`"
+ | Id s => print $ "id:" ^ s
+ | CharConst (repr, _) => print repr
| StringConst s =>
- printLn $ "\"" ^ s ^ "\""
+ print $ "\"" ^ s ^ "\""
| v =>
case List.find (fn (x, _) => x = v) tokenRepr of
- SOME (_, repr) => printLn repr
+ SOME (_, repr) => print repr
| NONE => raise TokenWithoutRepr
fun isIdStart c = Char.isAlpha c orelse c = #"_"
@@ -280,11 +284,12 @@ structure Tokenizer:> TOKENIZER = struct
val () = List.app
(fn (_, repr) => update (lookupTable, firstChr repr, true))
$ List.filter
- (fn (_, repr) =>
+ (fn (tk, repr) =>
let
val c = String.sub (repr, 0)
in
c <> kwPrefix andalso c <> cppPrefix
+ andalso tk <> NewLine
end)
tokenRepr
@@ -773,7 +778,9 @@ structure Tokenizer:> TOKENIZER = struct
case c of
NONE => (NONE, stream)
| SOME c =>
- if Char.isSpace c then
+ if c = #"\n" then
+ (SOME (Stream.getPosAfterCharRead stream, NewLine), stream)
+ else if Char.isSpace c then
getToken stream
else if isIdStart c then
@-> idParser ()
@@ -805,4 +812,25 @@ structure Tokenizer:> TOKENIZER = struct
aux [] stream
end
+ fun printTokens tkl fname =
+ let
+ fun print' line _ ((_, NewLine) :: tks) =
+ print' (line + 1) true tks
+ | print' line firstOnLine ((_, tk) :: tks) = (
+ if firstOnLine then (
+ print "\n";
+ printLn $ fname ^ ":" ^ Int.toString line;
+ print "\t")
+ else
+ ();
+ printToken tk;
+ print " ";
+ print' line false tks
+ )
+ | print' _ _ [] = ()
+ in
+ print' 1 true tkl;
+ print "\n"
+ end
+
end