diff options
-rw-r--r-- | exn_handler.sml | 2 | ||||
-rw-r--r-- | general.sml | 2 | ||||
-rw-r--r-- | stream.sig | 31 | ||||
-rw-r--r-- | stream.sml | 155 | ||||
-rw-r--r-- | tokenizer.sig | 5 | ||||
-rw-r--r-- | tokenizer.sml | 133 |
6 files changed, 87 insertions, 241 deletions
diff --git a/exn_handler.sml b/exn_handler.sml index 2e3a0b2..32ad20b 100644 --- a/exn_handler.sml +++ b/exn_handler.sml @@ -37,7 +37,7 @@ structure GlobalExnHandler: sig val handler: exn -> unit end = struct FsmTableIsTooSmall => eprint "fsm table is too small. Increate 'maxState' value" | IO.Io _ => ioExn e - | TkErrorAug (pos, msg) => eprint $ Stream.pos2str pos ^ ": " ^ msg + | TkErrorAug (pos, msg) => eprint $ Stream.ppos2str pos ^ ": " ^ msg | _ => otherExn e; exit 255) end diff --git a/general.sml b/general.sml index cde0514..1434d5b 100644 --- a/general.sml +++ b/general.sml @@ -1,3 +1,5 @@ +exception Unreachable + fun $ (x, y) = x y infixr 0 $ @@ -2,35 +2,34 @@ signature STREAM = sig type fileId = int type fileOffset = int type pos = fileId * fileOffset - type convPos = string * int * int option - - type filesInfo = (fileId * string * string) list - - exception EndOfStream - exception EndOfFile - - exception LineWithoutNl + type ppos (* pretty pos *) type t + type fileInfo - val extractFilesInfo: t -> filesInfo + val recycle: t -> fileInfo - val pos2str: convPos -> string - val printPos: (fileId * string * string) list -> pos -> unit + val ppos2str: ppos -> string + val printPos: fileInfo -> pos -> unit - val getchar: t -> char * t + val getchar: t -> char option * t + + (* Will throw UngetcError, if applied at the beginning of the stream. + * Can be always avoided, so is not provided in sig file *) val ungetc: t -> t - val readline: t -> string * t val getOffset: t -> fileOffset val getPos: t -> pos val getPosAfterCharRead: t -> pos - val getPposFromPos: pos -> t -> convPos + val getPposFromPos: pos -> t -> ppos + + (* Assumed to be called once for given pos, so will throw Unreachable on + * second call *) + val pposWithoutCol: ppos -> ppos val getSubstr: fileOffset -> fileOffset -> t -> string val isFirstOnLine: pos -> t -> bool - val advanceToNewFile: t -> t - + (* throws IO.Io *) val streamInit: string -> t end @@ -2,20 +2,12 @@ structure Stream :> STREAM = struct type fileId = int type fileOffset = int type pos = fileId * fileOffset - type convPos = string * int * int option + type ppos = string * int * int option + type fileInfo = fileId * string * string - type filesInfo = (fileId * string * string) list + exception UngetcError - exception EndOfStream - exception EndOfFile - - (* unreachable *) - exception InvalidStream - exception InvalidStreamAdvance - - exception LineWithoutNl - - fun pos2str (pos, line, col) = + fun ppos2str (pos, line, col) = let val % = Int.toString in @@ -24,14 +16,9 @@ structure Stream :> STREAM = struct | NONE => pos ^ ":" ^ %line end - type t = { - (* stack of file ids, file offsets and file contents *) - stack: (fileId * fileOffset * string) list, - (* list of file ids, file names and file contents *) - allFiles: filesInfo - } + type t = fileId * string * fileOffset * string - fun extractFilesInfo (s: t) = #allFiles s + fun recycle (fid, fname, _, contents) = (fid, fname, contents) fun calcFilePos s offset = let @@ -46,20 +33,13 @@ structure Stream :> STREAM = struct calc s 0 offset (1, 1) end - fun printPos fileList (id, pos) = + fun printPos (_, fname, contents) (_, pos) = let - val triple = List.find (fn (fid, _, _) => fid = id) fileList + val (line, col) = calcFilePos contents pos + val line = Int.toString line + val col = Int.toString col in - case triple of - NONE => raise InvalidStream - | SOME (_, fname, contents) => - let - val (line, col) = calcFilePos contents pos - val line = Int.toString line - val col = Int.toString col - in - print $ fname ^ ":" ^ line ^ ":" ^ col ^ ": " - end + print $ fname ^ ":" ^ line ^ ":" ^ col ^ ": " end fun readFile fname = @@ -72,114 +52,59 @@ structure Stream :> STREAM = struct s end - fun getchar - ({ stack = (id, off, contents) :: rest, allFiles }: t) - : (char * t) = + fun getchar (S as (fid, fname, off, contents)) = if off < String.size contents then - (String.sub (contents, off), - { stack = (id, off + 1, contents) :: rest, allFiles }) + (SOME $ String.sub (contents, off), (fid, fname, off + 1, contents)) else - raise EndOfFile - | getchar _ = raise EndOfStream + (NONE, S) - fun ungetc - ({ stack = (id, off, contents) :: rest, allFiles }) = + fun ungetc (fid, fname, off, contents) = if off = 0 then - raise InvalidStream + raise UngetcError else - { stack = (id, off - 1, contents) :: rest, allFiles } - | ungetc _ = raise InvalidStream - - fun readline { stack = (fid, off, contents) :: rest, allFiles } = - let - val prevIsSlash = - off > 0 andalso String.sub (contents, off - 1) = #"\\" - - open String - fun read prevIsSlash offset acc = - let - val c = sub (contents, offset) - in - if offset = size contents then - raise LineWithoutNl - else if c = #"\n" then - if prevIsSlash then - read (c = #"\\") (offset + 1) (#" " :: tl acc) - else - (implode $ rev acc, offset + 1) - else - read (c = #"\\") (offset + 1) (c :: acc) - end - - val (arg, newOffset) = read prevIsSlash off [] - in - (arg, { stack = (fid, newOffset, contents) :: rest, allFiles }) - end - | readline _ = raise InvalidStream + (fid, fname, off - 1, contents) - fun getOffset ({ stack = (_, off, _) :: _, ... }: t) = off - | getOffset _ = raise InvalidStream + fun getOffset (_, _, off, _) = off - fun getPosAfterCharRead - ({ stack = (id, off, _) :: _, ... }: t) = (id, off - 1) - | getPosAfterCharRead _ = raise InvalidStream + fun getPosAfterCharRead (fid, _, off, _) = (fid, off - 1) - fun getPposFromPos (id, pos) - { stack = (_, _, _) :: _, allFiles} = + fun getPposFromPos (_, pos) (_, fname, _, contents) = let - val (fname, contents) = - case List.find (fn (fid, _, _) => fid = id) allFiles of - NONE => raise InvalidStream - | SOME (_, fname, contents) => (fname, contents) - val (line, col) = calcFilePos contents pos in (fname, line, SOME col) end - | getPposFromPos _ _ = raise InvalidStream - fun getPos ({ stack = (id, off, _) :: _, ... }: t) = (id, off) - | getPos _ = raise InvalidStream + fun pposWithoutCol (fname, line, SOME _) = (fname, line, NONE) + | pposWithoutCol (_, _, NONE) = raise Unreachable - fun getSubstr startOff endOff - ({ stack = (_, _, contents) :: _, ... }: t) = - String.substring (contents, startOff, endOff - startOff) - | getSubstr _ _ _ = raise InvalidStream + fun getPos (id, _, off, _) = (id, off) - fun advanceToNewFile - ({ stack = (_, off, contents) :: rest, allFiles }: t) = - if off = String.size contents then - { stack = rest, allFiles } - else - raise InvalidStreamAdvance - | advanceToNewFile _ = raise InvalidStreamAdvance + fun getSubstr startOff endOff (_, _, _, contents) = + String.substring (contents, startOff, endOff - startOff) fun streamInit fname = let val contents = readFile fname in - { allFiles = [(0, fname, contents)], stack = [(0, 0, contents)] } + (0, fname, 0, contents) end - fun isFirstOnLine (id, offset) (stream: t) = - case List.find (fn (fid, _, _) => id = fid) (#allFiles stream) of - NONE => raise InvalidStream - | SOME (_, _, contents) => + fun isFirstOnLine (_, offset) ((_, _, _, contents) : t) = + let + fun returnToNL ~1 = true + | returnToNL offset = let - fun returnToNL ~1 = true - | returnToNL offset = - let - val chr = String.sub (contents, offset) - in - if chr = #"\n" then - true - else if Char.isSpace chr then - returnToNL (offset - 1) - else - false - end + val chr = String.sub (contents, offset) in - returnToNL (offset - 1) + if chr = #"\n" then + true + else if Char.isSpace chr then + returnToNL (offset - 1) + else + false end - + in + returnToNL (offset - 1) + end end diff --git a/tokenizer.sig b/tokenizer.sig index c31a7e6..5956273 100644 --- a/tokenizer.sig +++ b/tokenizer.sig @@ -4,9 +4,8 @@ signature TOKENIZER = sig (* Fatal. both may be thrown by tokenize *) exception FsmTableIsTooSmall - exception TkErrorAug of Stream.convPos * string - - val tokenize: Stream.t -> fullToken list -> fullToken list * Stream.filesInfo + exception TkErrorAug of Stream.ppos * string + val tokenize: Stream.t -> fullToken list -> fullToken list * Stream.fileInfo val printToken: token -> unit end diff --git a/tokenizer.sml b/tokenizer.sml index 53bb396..b40e03f 100644 --- a/tokenizer.sml +++ b/tokenizer.sml @@ -1,6 +1,4 @@ structure Tokenizer:> TOKENIZER = struct - datatype includeArg = IARel of string | IAFromRef of string - datatype token = Invalid | Number of string | @@ -93,7 +91,7 @@ structure Tokenizer:> TOKENIZER = struct CommentStart | - CppInclude of includeArg | + CppInclude | CppDefine | CppUndef | CppIf | @@ -112,14 +110,13 @@ structure Tokenizer:> TOKENIZER = struct datatype tkErrorAuxInfo = TkiEOF | TkiDx of int | TkiStart exception TkError of tkErrorAuxInfo * string - exception TkErrorAug of Stream.convPos * string + exception TkErrorAug of Stream.ppos * string exception ExpectedCppDir (* handled in postprocess *) exception FsmTableIsTooSmall (* Unreachable (should be) *) - exception Unreachable exception TokenWithoutRepr val tokenRepr = @@ -213,6 +210,7 @@ structure Tokenizer:> TOKENIZER = struct (CommentStart, "/*"), + (CppInclude, %"include"), (CppDefine, %"define"), (CppUndef, %"undef"), (CppIf, %"if"), @@ -229,15 +227,6 @@ structure Tokenizer:> TOKENIZER = struct Number s => printLn $ "Num: " ^ s | Id s => printLn $ "Id: " ^ s | CharConst (repr, _) => printLn repr - | CppInclude arg => - let - val (start, end', arg) = - case arg of - IARel v => ("\"", "\"", v) - | IAFromRef v => ("<", ">", v) - in - printLn $ (str cppPrefix) ^ "include " ^ start ^ arg ^ end' - end | StringConst s => printLn $ "\"" ^ s ^ "\"" | v => @@ -380,9 +369,7 @@ structure Tokenizer:> TOKENIZER = struct fun get curState stream = let - val (c, stream) = (fn (c, s) => (SOME c, s)) $ Stream.getchar stream - handle - _ => (NONE, stream) + val (c, stream) = Stream.getchar stream in case c of NONE => (#1 $ sub (#2 $ fsmTable (), curState), stream) @@ -415,8 +402,7 @@ structure Tokenizer:> TOKENIZER = struct val P as (_, startOff) = Stream.getPos stream fun parse' stream acc = let - val (c, stream) = (fn (c, s) => (SOME c, s)) $ Stream.getchar stream handle - _ => (NONE, stream) + val (c, stream) = Stream.getchar stream val (acc, tk, stream) = parser acc (stream, startOff) c handle TkError (TkiDx dx, msg) => raise tkError2aug stream (dx, msg) @@ -428,9 +414,10 @@ structure Tokenizer:> TOKENIZER = struct end | TkError (TkiEOF, msg) => let - val (file, line, _) = Stream.getPposFromPos P stream + open Stream + val pos = pposWithoutCol $ getPposFromPos P stream in - raise TkErrorAug ((file, line, NONE), msg) + raise TkErrorAug (pos, msg) end in case tk of @@ -512,8 +499,11 @@ structure Tokenizer:> TOKENIZER = struct fun eatEscSeq stream = let - val (c, stream) = Stream.getchar stream handle - _ => raise TkError (TkiDx 0, "unfinished escape sequence") + val (c, stream) = Stream.getchar stream + val c = + case c of + NONE => raise TkError (TkiDx 0, "unfinished escape sequence") + | SOME c => c in (case c of #"\\" => #"\\" @@ -595,65 +585,6 @@ structure Tokenizer:> TOKENIZER = struct val charParser = seqParser SpmChr val strParser = seqParser SpmStr - fun readIncludeArg stream = - let - open String - - fun triml s idx = - if idx = size s then - "" - else if Char.isSpace $ sub (s, idx) then - triml s (idx + 1) - else - extract (s, idx, NONE) - - fun trimr s idx = - if idx = 0 then - "" - else if Char.isSpace $ sub (s, idx) then - trimr s (idx - 1) - else - extract (s, 0, SOME $ idx + 1) - - fun trim s = triml (trimr s (size s - 1)) 0 - - fun getLinePos () = - let - val (fname, line, _) = Stream.getPposFromPos (Stream.getPos stream) stream - in - (fname, line, NONE) - end - - fun determineType s = - let - fun --> msg = raise TkErrorAug (getLinePos (), msg) - fun isLast c = sub (s, size s - 1) = c - in - if s = "" then - --> "#include argument is empty" - else - case sub (s, 0) of - #"<" => - if isLast #">" then - IAFromRef $ stringCut s - else - --> "expected > at #include argument end" - | #"\"" => - if isLast #"\"" then - IARel $ stringCut s - else - --> "expected \" at #include argument end" - | _ => --> "#include argument should start with \" or <" - end - - val (arg, stream) = Stream.readline stream handle - Stream.LineWithoutNl => - raise TkErrorAug (getLinePos (), - "#include line does not end with \\n") - in - (determineType $ trim arg, stream) - end - fun postprocessCppDir tk tkl stream = let val isCppDir = @@ -664,13 +595,7 @@ structure Tokenizer:> TOKENIZER = struct fun conv tk = ((#1 $ hd tkl, tk) :: tl tkl) in - if isCppDir andalso tk' = Id "include" then - let - val (arg, stream) = readIncludeArg stream - in - (conv $ CppInclude arg, stream) - end - else if isCppDir then + if isCppDir then (conv $ formCppDir tk', stream) handle ExpectedCppDir => raise TkErrorAug (Stream.getPposFromPos pos stream, @@ -697,7 +622,15 @@ structure Tokenizer:> TOKENIZER = struct let fun skip prevIsAsterisk stream = let - val (c, stream) = Stream.getchar stream + val (c, stream) = + case Stream.getchar stream of + (NONE, _) => + let + val pos = Stream.getPposFromPos pos stream + in + raise TkErrorAug (pos, "unfinished comment") + end + | (SOME c, stream) => (c, stream) in if prevIsAsterisk andalso c = #"/" then stream @@ -705,19 +638,12 @@ structure Tokenizer:> TOKENIZER = struct skip (c = #"*") stream end in - skip false stream handle - Stream.EndOfFile => - let - val pos = Stream.getPposFromPos pos stream - in - raise TkErrorAug (pos, "unfinished comment") - end + skip false stream end fun handleBackslash stream = let - val (c, stream) = (fn (c, s) => (SOME c, s)) $ Stream.getchar stream handle - _ => (NONE, stream) + val (c, stream) = Stream.getchar stream val raiseErr = fn () => let @@ -748,18 +674,13 @@ structure Tokenizer:> TOKENIZER = struct and tokenize stream tkl = let - fun getcharSkipEof stream = Stream.getchar stream handle - Stream.EndOfFile => getcharSkipEof (Stream.advanceToNewFile stream) - - val (c, stream) = (fn (c, s) => (SOME c, s)) $ getcharSkipEof stream - handle - Stream.EndOfStream => (NONE, stream) + val (c, stream) = Stream.getchar stream fun cont (tk, stream) = tokenize stream (tk :: tkl) fun @-> parser acc = cont $ parseGeneric stream parser acc in case c of - NONE => (rev tkl, Stream.extractFilesInfo stream) + NONE => (rev tkl, Stream.recycle stream) | SOME c => if Char.isSpace c then tokenize stream tkl |