diff options
author | Vladimir Azarov <avm@intermediate-node.net> | 2025-03-25 01:45:47 +0100 |
---|---|---|
committer | Vladimir Azarov <avm@intermediate-node.net> | 2025-03-25 01:45:47 +0100 |
commit | 90854ccf3514dc67a30556500e1716dd1933954c (patch) | |
tree | b65b7f0739169337b6a4ce5b9ecdd18dc3617a56 | |
parent | a0be166ef23e1b34b09060b99c03cdadb6ac2132 (diff) |
Adequate token printing
-rw-r--r-- | cpp.sml | 3 | ||||
-rw-r--r-- | stream.sig | 2 | ||||
-rw-r--r-- | tokenizer.sig | 2 | ||||
-rw-r--r-- | tokenizer.sml | 248 |
4 files changed, 142 insertions, 113 deletions
@@ -4,8 +4,7 @@ let val tkl = Tokenizer.tokenize stream val fileInfo = Stream.convert stream in - List.app - (fn (p, x) => (Stream.printPos fileInfo p; Tokenizer.printToken x)) tkl + Tokenizer.printTokens tkl (#2 fileInfo) end | main _ = printLn "Expected a signle argument: file name" @@ -5,7 +5,7 @@ signature STREAM = sig type ppos (* pretty pos *) type t - type fileInfo + type fileInfo = fileId * string * string val convert: t -> fileInfo diff --git a/tokenizer.sig b/tokenizer.sig index dd0eab4..ffbec52 100644 --- a/tokenizer.sig +++ b/tokenizer.sig @@ -8,4 +8,6 @@ signature TOKENIZER = sig val tokenize: Stream.t -> fullToken list val printToken: token -> unit + + val printTokens: fullToken list -> string -> unit end diff --git a/tokenizer.sml b/tokenizer.sml index 8b1ace4..ba9d0df 100644 --- a/tokenizer.sml +++ b/tokenizer.sml @@ -1,108 +1,110 @@ structure Tokenizer:> TOKENIZER = struct datatype token = - Invalid | - Number of string | - Id of string | - CharConst of string * int | - StringConst of string | - - kwBreak | - kwCase | - kwChar | - kwConst | - kwContinue | - kwDefault | - kwDouble | - kwElse | - kwEnum | - kwExtern | - kwFloat | - kwFor | - kwGoto | - kwInt | - kwLong | - kwRegister | - kwReturn | - kwShort | - kwSigned | - kwSizeof | - kwStruct | - kwSwitch | - kwTypedef | - kwUnion | - kwUnsigned | - kwVoid | - kwVolatile | - - LParen | - RParen | - LBracket | - RBracket | - LBrace | - RBrace | - - QuestionMark | - Colon | - Coma | - Semicolon | - - Arrow | - Plus | - DoublePlus| - Minus | - DoubleMinus | - Ampersand | - Asterisk | - Slash | - Tilde | - ExclMark | - Percent | - DoubleGreater | - DoubleLess | - Greater | - Less | - EqualSign | - LessEqualSign | - GreaterEqualSign | - DoubleEqualSign | - ExclMarkEqualSign | - Cap | - VerticalBar | - DoubleAmpersand | - DoubleVerticalBar | - - AsteriskEqualSign | - SlashEqualSign | - PercentEqualSign | - PlusEqualSign | - MinusEqualSign | - DoubleLessEqualSign | - DoubleGreaterEqualSign | - AmpersandEqualSign | - CapEqualSign | - VerticalBarEqualSign | - - Hash | - DoubleHash | - - Dot | - DoubleDot | - TripleDot | - - CommentStart | - - CppInclude | - CppDefine | - CppUndef | - CppIf | - CppIfdef | - CppIfndef | - CppElse | - CppElif | - CppEndif | - CppWarning | - CppError | - CppPragma + Invalid | + NewLine | + + Number of string | + Id of string | + CharConst of string * int | + StringConst of string | + + kwBreak | + kwCase | + kwChar | + kwConst | + kwContinue | + kwDefault | + kwDouble | + kwElse | + kwEnum | + kwExtern | + kwFloat | + kwFor | + kwGoto | + kwInt | + kwLong | + kwRegister | + kwReturn | + kwShort | + kwSigned | + kwSizeof | + kwStruct | + kwSwitch | + kwTypedef | + kwUnion | + kwUnsigned | + kwVoid | + kwVolatile | + + LParen | + RParen | + LBracket | + RBracket | + LBrace | + RBrace | + + QuestionMark | + Colon | + Coma | + Semicolon | + + Arrow | + Plus | + DoublePlus| + Minus | + DoubleMinus | + Ampersand | + Asterisk | + Slash | + Tilde | + ExclMark | + Percent | + DoubleGreater | + DoubleLess | + Greater | + Less | + EqualSign | + LessEqualSign | + GreaterEqualSign | + DoubleEqualSign | + ExclMarkEqualSign | + Cap | + VerticalBar | + DoubleAmpersand | + DoubleVerticalBar | + + AsteriskEqualSign | + SlashEqualSign | + PercentEqualSign | + PlusEqualSign | + MinusEqualSign | + DoubleLessEqualSign | + DoubleGreaterEqualSign | + AmpersandEqualSign | + CapEqualSign | + VerticalBarEqualSign | + + Hash | + DoubleHash | + + Dot | + DoubleDot | + TripleDot | + + CommentStart | + + CppInclude | + CppDefine | + CppUndef | + CppIf | + CppIfdef | + CppIfndef | + CppElse | + CppElif | + CppEndif | + CppWarning | + CppError | + CppPragma val kwPrefix = #"@" val cppPrefix = #"$" @@ -127,6 +129,8 @@ structure Tokenizer:> TOKENIZER = struct fun % repr = str cppPrefix ^ repr in [ + (NewLine, "NewLine"), + (kwBreak, &"break"), (kwCase, &"case"), (kwChar, &"char"), @@ -228,14 +232,14 @@ structure Tokenizer:> TOKENIZER = struct end val printToken = fn - Number s => printLn $ "Num: " ^ s - | Id s => printLn $ "Id: " ^ s - | CharConst (repr, _) => printLn repr + Number s => print $ "`" ^ s ^ "`" + | Id s => print $ "id:" ^ s + | CharConst (repr, _) => print repr | StringConst s => - printLn $ "\"" ^ s ^ "\"" + print $ "\"" ^ s ^ "\"" | v => case List.find (fn (x, _) => x = v) tokenRepr of - SOME (_, repr) => printLn repr + SOME (_, repr) => print repr | NONE => raise TokenWithoutRepr fun isIdStart c = Char.isAlpha c orelse c = #"_" @@ -280,11 +284,12 @@ structure Tokenizer:> TOKENIZER = struct val () = List.app (fn (_, repr) => update (lookupTable, firstChr repr, true)) $ List.filter - (fn (_, repr) => + (fn (tk, repr) => let val c = String.sub (repr, 0) in c <> kwPrefix andalso c <> cppPrefix + andalso tk <> NewLine end) tokenRepr @@ -773,7 +778,9 @@ structure Tokenizer:> TOKENIZER = struct case c of NONE => (NONE, stream) | SOME c => - if Char.isSpace c then + if c = #"\n" then + (SOME (Stream.getPosAfterCharRead stream, NewLine), stream) + else if Char.isSpace c then getToken stream else if isIdStart c then @-> idParser () @@ -805,4 +812,25 @@ structure Tokenizer:> TOKENIZER = struct aux [] stream end + fun printTokens tkl fname = + let + fun print' line _ ((_, NewLine) :: tks) = + print' (line + 1) true tks + | print' line firstOnLine ((_, tk) :: tks) = ( + if firstOnLine then ( + print "\n"; + printLn $ fname ^ ":" ^ Int.toString line; + print "\t") + else + (); + printToken tk; + print " "; + print' line false tks + ) + | print' _ _ [] = () + in + print' 1 true tkl; + print "\n" + end + end |