summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVladimir Azarov <avm@intermediate-node.net>2025-03-24 22:37:21 +0100
committerVladimir Azarov <avm@intermediate-node.net>2025-03-24 22:37:21 +0100
commit226f58656b4b7f92f6de9a817ab9106937e061e9 (patch)
treed2e4c23e520d99c59685d18b24058f8a45def43e
parent87217fe5ba58f5199d30586b5d9bec104dece445 (diff)
Simplified stream
-rw-r--r--exn_handler.sml2
-rw-r--r--general.sml2
-rw-r--r--stream.sig31
-rw-r--r--stream.sml155
-rw-r--r--tokenizer.sig5
-rw-r--r--tokenizer.sml133
6 files changed, 87 insertions, 241 deletions
diff --git a/exn_handler.sml b/exn_handler.sml
index 2e3a0b2..32ad20b 100644
--- a/exn_handler.sml
+++ b/exn_handler.sml
@@ -37,7 +37,7 @@ structure GlobalExnHandler: sig val handler: exn -> unit end = struct
FsmTableIsTooSmall =>
eprint "fsm table is too small. Increate 'maxState' value"
| IO.Io _ => ioExn e
- | TkErrorAug (pos, msg) => eprint $ Stream.pos2str pos ^ ": " ^ msg
+ | TkErrorAug (pos, msg) => eprint $ Stream.ppos2str pos ^ ": " ^ msg
| _ => otherExn e;
exit 255)
end
diff --git a/general.sml b/general.sml
index cde0514..1434d5b 100644
--- a/general.sml
+++ b/general.sml
@@ -1,3 +1,5 @@
+exception Unreachable
+
fun $ (x, y) = x y
infixr 0 $
diff --git a/stream.sig b/stream.sig
index 98cfb65..febd4b7 100644
--- a/stream.sig
+++ b/stream.sig
@@ -2,35 +2,34 @@ signature STREAM = sig
type fileId = int
type fileOffset = int
type pos = fileId * fileOffset
- type convPos = string * int * int option
-
- type filesInfo = (fileId * string * string) list
-
- exception EndOfStream
- exception EndOfFile
-
- exception LineWithoutNl
+ type ppos (* pretty pos *)
type t
+ type fileInfo
- val extractFilesInfo: t -> filesInfo
+ val recycle: t -> fileInfo
- val pos2str: convPos -> string
- val printPos: (fileId * string * string) list -> pos -> unit
+ val ppos2str: ppos -> string
+ val printPos: fileInfo -> pos -> unit
- val getchar: t -> char * t
+ val getchar: t -> char option * t
+
+ (* Will throw UngetcError, if applied at the beginning of the stream.
+ * Can be always avoided, so is not provided in sig file *)
val ungetc: t -> t
- val readline: t -> string * t
val getOffset: t -> fileOffset
val getPos: t -> pos
val getPosAfterCharRead: t -> pos
- val getPposFromPos: pos -> t -> convPos
+ val getPposFromPos: pos -> t -> ppos
+
+ (* Assumed to be called once for given pos, so will throw Unreachable on
+ * second call *)
+ val pposWithoutCol: ppos -> ppos
val getSubstr: fileOffset -> fileOffset -> t -> string
val isFirstOnLine: pos -> t -> bool
- val advanceToNewFile: t -> t
-
+ (* throws IO.Io *)
val streamInit: string -> t
end
diff --git a/stream.sml b/stream.sml
index 10b02cc..92deb61 100644
--- a/stream.sml
+++ b/stream.sml
@@ -2,20 +2,12 @@ structure Stream :> STREAM = struct
type fileId = int
type fileOffset = int
type pos = fileId * fileOffset
- type convPos = string * int * int option
+ type ppos = string * int * int option
+ type fileInfo = fileId * string * string
- type filesInfo = (fileId * string * string) list
+ exception UngetcError
- exception EndOfStream
- exception EndOfFile
-
- (* unreachable *)
- exception InvalidStream
- exception InvalidStreamAdvance
-
- exception LineWithoutNl
-
- fun pos2str (pos, line, col) =
+ fun ppos2str (pos, line, col) =
let
val % = Int.toString
in
@@ -24,14 +16,9 @@ structure Stream :> STREAM = struct
| NONE => pos ^ ":" ^ %line
end
- type t = {
- (* stack of file ids, file offsets and file contents *)
- stack: (fileId * fileOffset * string) list,
- (* list of file ids, file names and file contents *)
- allFiles: filesInfo
- }
+ type t = fileId * string * fileOffset * string
- fun extractFilesInfo (s: t) = #allFiles s
+ fun recycle (fid, fname, _, contents) = (fid, fname, contents)
fun calcFilePos s offset =
let
@@ -46,20 +33,13 @@ structure Stream :> STREAM = struct
calc s 0 offset (1, 1)
end
- fun printPos fileList (id, pos) =
+ fun printPos (_, fname, contents) (_, pos) =
let
- val triple = List.find (fn (fid, _, _) => fid = id) fileList
+ val (line, col) = calcFilePos contents pos
+ val line = Int.toString line
+ val col = Int.toString col
in
- case triple of
- NONE => raise InvalidStream
- | SOME (_, fname, contents) =>
- let
- val (line, col) = calcFilePos contents pos
- val line = Int.toString line
- val col = Int.toString col
- in
- print $ fname ^ ":" ^ line ^ ":" ^ col ^ ": "
- end
+ print $ fname ^ ":" ^ line ^ ":" ^ col ^ ": "
end
fun readFile fname =
@@ -72,114 +52,59 @@ structure Stream :> STREAM = struct
s
end
- fun getchar
- ({ stack = (id, off, contents) :: rest, allFiles }: t)
- : (char * t) =
+ fun getchar (S as (fid, fname, off, contents)) =
if off < String.size contents then
- (String.sub (contents, off),
- { stack = (id, off + 1, contents) :: rest, allFiles })
+ (SOME $ String.sub (contents, off), (fid, fname, off + 1, contents))
else
- raise EndOfFile
- | getchar _ = raise EndOfStream
+ (NONE, S)
- fun ungetc
- ({ stack = (id, off, contents) :: rest, allFiles }) =
+ fun ungetc (fid, fname, off, contents) =
if off = 0 then
- raise InvalidStream
+ raise UngetcError
else
- { stack = (id, off - 1, contents) :: rest, allFiles }
- | ungetc _ = raise InvalidStream
-
- fun readline { stack = (fid, off, contents) :: rest, allFiles } =
- let
- val prevIsSlash =
- off > 0 andalso String.sub (contents, off - 1) = #"\\"
-
- open String
- fun read prevIsSlash offset acc =
- let
- val c = sub (contents, offset)
- in
- if offset = size contents then
- raise LineWithoutNl
- else if c = #"\n" then
- if prevIsSlash then
- read (c = #"\\") (offset + 1) (#" " :: tl acc)
- else
- (implode $ rev acc, offset + 1)
- else
- read (c = #"\\") (offset + 1) (c :: acc)
- end
-
- val (arg, newOffset) = read prevIsSlash off []
- in
- (arg, { stack = (fid, newOffset, contents) :: rest, allFiles })
- end
- | readline _ = raise InvalidStream
+ (fid, fname, off - 1, contents)
- fun getOffset ({ stack = (_, off, _) :: _, ... }: t) = off
- | getOffset _ = raise InvalidStream
+ fun getOffset (_, _, off, _) = off
- fun getPosAfterCharRead
- ({ stack = (id, off, _) :: _, ... }: t) = (id, off - 1)
- | getPosAfterCharRead _ = raise InvalidStream
+ fun getPosAfterCharRead (fid, _, off, _) = (fid, off - 1)
- fun getPposFromPos (id, pos)
- { stack = (_, _, _) :: _, allFiles} =
+ fun getPposFromPos (_, pos) (_, fname, _, contents) =
let
- val (fname, contents) =
- case List.find (fn (fid, _, _) => fid = id) allFiles of
- NONE => raise InvalidStream
- | SOME (_, fname, contents) => (fname, contents)
-
val (line, col) = calcFilePos contents pos
in
(fname, line, SOME col)
end
- | getPposFromPos _ _ = raise InvalidStream
- fun getPos ({ stack = (id, off, _) :: _, ... }: t) = (id, off)
- | getPos _ = raise InvalidStream
+ fun pposWithoutCol (fname, line, SOME _) = (fname, line, NONE)
+ | pposWithoutCol (_, _, NONE) = raise Unreachable
- fun getSubstr startOff endOff
- ({ stack = (_, _, contents) :: _, ... }: t) =
- String.substring (contents, startOff, endOff - startOff)
- | getSubstr _ _ _ = raise InvalidStream
+ fun getPos (id, _, off, _) = (id, off)
- fun advanceToNewFile
- ({ stack = (_, off, contents) :: rest, allFiles }: t) =
- if off = String.size contents then
- { stack = rest, allFiles }
- else
- raise InvalidStreamAdvance
- | advanceToNewFile _ = raise InvalidStreamAdvance
+ fun getSubstr startOff endOff (_, _, _, contents) =
+ String.substring (contents, startOff, endOff - startOff)
fun streamInit fname =
let
val contents = readFile fname
in
- { allFiles = [(0, fname, contents)], stack = [(0, 0, contents)] }
+ (0, fname, 0, contents)
end
- fun isFirstOnLine (id, offset) (stream: t) =
- case List.find (fn (fid, _, _) => id = fid) (#allFiles stream) of
- NONE => raise InvalidStream
- | SOME (_, _, contents) =>
+ fun isFirstOnLine (_, offset) ((_, _, _, contents) : t) =
+ let
+ fun returnToNL ~1 = true
+ | returnToNL offset =
let
- fun returnToNL ~1 = true
- | returnToNL offset =
- let
- val chr = String.sub (contents, offset)
- in
- if chr = #"\n" then
- true
- else if Char.isSpace chr then
- returnToNL (offset - 1)
- else
- false
- end
+ val chr = String.sub (contents, offset)
in
- returnToNL (offset - 1)
+ if chr = #"\n" then
+ true
+ else if Char.isSpace chr then
+ returnToNL (offset - 1)
+ else
+ false
end
-
+ in
+ returnToNL (offset - 1)
+ end
end
diff --git a/tokenizer.sig b/tokenizer.sig
index c31a7e6..5956273 100644
--- a/tokenizer.sig
+++ b/tokenizer.sig
@@ -4,9 +4,8 @@ signature TOKENIZER = sig
(* Fatal. both may be thrown by tokenize *)
exception FsmTableIsTooSmall
- exception TkErrorAug of Stream.convPos * string
-
- val tokenize: Stream.t -> fullToken list -> fullToken list * Stream.filesInfo
+ exception TkErrorAug of Stream.ppos * string
+ val tokenize: Stream.t -> fullToken list -> fullToken list * Stream.fileInfo
val printToken: token -> unit
end
diff --git a/tokenizer.sml b/tokenizer.sml
index 53bb396..b40e03f 100644
--- a/tokenizer.sml
+++ b/tokenizer.sml
@@ -1,6 +1,4 @@
structure Tokenizer:> TOKENIZER = struct
- datatype includeArg = IARel of string | IAFromRef of string
-
datatype token =
Invalid |
Number of string |
@@ -93,7 +91,7 @@ structure Tokenizer:> TOKENIZER = struct
CommentStart |
- CppInclude of includeArg |
+ CppInclude |
CppDefine |
CppUndef |
CppIf |
@@ -112,14 +110,13 @@ structure Tokenizer:> TOKENIZER = struct
datatype tkErrorAuxInfo = TkiEOF | TkiDx of int | TkiStart
exception TkError of tkErrorAuxInfo * string
- exception TkErrorAug of Stream.convPos * string
+ exception TkErrorAug of Stream.ppos * string
exception ExpectedCppDir (* handled in postprocess *)
exception FsmTableIsTooSmall
(* Unreachable (should be) *)
- exception Unreachable
exception TokenWithoutRepr
val tokenRepr =
@@ -213,6 +210,7 @@ structure Tokenizer:> TOKENIZER = struct
(CommentStart, "/*"),
+ (CppInclude, %"include"),
(CppDefine, %"define"),
(CppUndef, %"undef"),
(CppIf, %"if"),
@@ -229,15 +227,6 @@ structure Tokenizer:> TOKENIZER = struct
Number s => printLn $ "Num: " ^ s
| Id s => printLn $ "Id: " ^ s
| CharConst (repr, _) => printLn repr
- | CppInclude arg =>
- let
- val (start, end', arg) =
- case arg of
- IARel v => ("\"", "\"", v)
- | IAFromRef v => ("<", ">", v)
- in
- printLn $ (str cppPrefix) ^ "include " ^ start ^ arg ^ end'
- end
| StringConst s =>
printLn $ "\"" ^ s ^ "\""
| v =>
@@ -380,9 +369,7 @@ structure Tokenizer:> TOKENIZER = struct
fun get curState stream =
let
- val (c, stream) = (fn (c, s) => (SOME c, s)) $ Stream.getchar stream
- handle
- _ => (NONE, stream)
+ val (c, stream) = Stream.getchar stream
in
case c of
NONE => (#1 $ sub (#2 $ fsmTable (), curState), stream)
@@ -415,8 +402,7 @@ structure Tokenizer:> TOKENIZER = struct
val P as (_, startOff) = Stream.getPos stream
fun parse' stream acc = let
- val (c, stream) = (fn (c, s) => (SOME c, s)) $ Stream.getchar stream handle
- _ => (NONE, stream)
+ val (c, stream) = Stream.getchar stream
val (acc, tk, stream) = parser acc (stream, startOff) c handle
TkError (TkiDx dx, msg) => raise tkError2aug stream (dx, msg)
@@ -428,9 +414,10 @@ structure Tokenizer:> TOKENIZER = struct
end
| TkError (TkiEOF, msg) =>
let
- val (file, line, _) = Stream.getPposFromPos P stream
+ open Stream
+ val pos = pposWithoutCol $ getPposFromPos P stream
in
- raise TkErrorAug ((file, line, NONE), msg)
+ raise TkErrorAug (pos, msg)
end
in
case tk of
@@ -512,8 +499,11 @@ structure Tokenizer:> TOKENIZER = struct
fun eatEscSeq stream =
let
- val (c, stream) = Stream.getchar stream handle
- _ => raise TkError (TkiDx 0, "unfinished escape sequence")
+ val (c, stream) = Stream.getchar stream
+ val c =
+ case c of
+ NONE => raise TkError (TkiDx 0, "unfinished escape sequence")
+ | SOME c => c
in
(case c of
#"\\" => #"\\"
@@ -595,65 +585,6 @@ structure Tokenizer:> TOKENIZER = struct
val charParser = seqParser SpmChr
val strParser = seqParser SpmStr
- fun readIncludeArg stream =
- let
- open String
-
- fun triml s idx =
- if idx = size s then
- ""
- else if Char.isSpace $ sub (s, idx) then
- triml s (idx + 1)
- else
- extract (s, idx, NONE)
-
- fun trimr s idx =
- if idx = 0 then
- ""
- else if Char.isSpace $ sub (s, idx) then
- trimr s (idx - 1)
- else
- extract (s, 0, SOME $ idx + 1)
-
- fun trim s = triml (trimr s (size s - 1)) 0
-
- fun getLinePos () =
- let
- val (fname, line, _) = Stream.getPposFromPos (Stream.getPos stream) stream
- in
- (fname, line, NONE)
- end
-
- fun determineType s =
- let
- fun --> msg = raise TkErrorAug (getLinePos (), msg)
- fun isLast c = sub (s, size s - 1) = c
- in
- if s = "" then
- --> "#include argument is empty"
- else
- case sub (s, 0) of
- #"<" =>
- if isLast #">" then
- IAFromRef $ stringCut s
- else
- --> "expected > at #include argument end"
- | #"\"" =>
- if isLast #"\"" then
- IARel $ stringCut s
- else
- --> "expected \" at #include argument end"
- | _ => --> "#include argument should start with \" or <"
- end
-
- val (arg, stream) = Stream.readline stream handle
- Stream.LineWithoutNl =>
- raise TkErrorAug (getLinePos (),
- "#include line does not end with \\n")
- in
- (determineType $ trim arg, stream)
- end
-
fun postprocessCppDir tk tkl stream =
let
val isCppDir =
@@ -664,13 +595,7 @@ structure Tokenizer:> TOKENIZER = struct
fun conv tk = ((#1 $ hd tkl, tk) :: tl tkl)
in
- if isCppDir andalso tk' = Id "include" then
- let
- val (arg, stream) = readIncludeArg stream
- in
- (conv $ CppInclude arg, stream)
- end
- else if isCppDir then
+ if isCppDir then
(conv $ formCppDir tk', stream) handle
ExpectedCppDir =>
raise TkErrorAug (Stream.getPposFromPos pos stream,
@@ -697,7 +622,15 @@ structure Tokenizer:> TOKENIZER = struct
let
fun skip prevIsAsterisk stream =
let
- val (c, stream) = Stream.getchar stream
+ val (c, stream) =
+ case Stream.getchar stream of
+ (NONE, _) =>
+ let
+ val pos = Stream.getPposFromPos pos stream
+ in
+ raise TkErrorAug (pos, "unfinished comment")
+ end
+ | (SOME c, stream) => (c, stream)
in
if prevIsAsterisk andalso c = #"/" then
stream
@@ -705,19 +638,12 @@ structure Tokenizer:> TOKENIZER = struct
skip (c = #"*") stream
end
in
- skip false stream handle
- Stream.EndOfFile =>
- let
- val pos = Stream.getPposFromPos pos stream
- in
- raise TkErrorAug (pos, "unfinished comment")
- end
+ skip false stream
end
fun handleBackslash stream =
let
- val (c, stream) = (fn (c, s) => (SOME c, s)) $ Stream.getchar stream handle
- _ => (NONE, stream)
+ val (c, stream) = Stream.getchar stream
val raiseErr = fn () =>
let
@@ -748,18 +674,13 @@ structure Tokenizer:> TOKENIZER = struct
and tokenize stream tkl =
let
- fun getcharSkipEof stream = Stream.getchar stream handle
- Stream.EndOfFile => getcharSkipEof (Stream.advanceToNewFile stream)
-
- val (c, stream) = (fn (c, s) => (SOME c, s)) $ getcharSkipEof stream
- handle
- Stream.EndOfStream => (NONE, stream)
+ val (c, stream) = Stream.getchar stream
fun cont (tk, stream) = tokenize stream (tk :: tkl)
fun @-> parser acc = cont $ parseGeneric stream parser acc
in
case c of
- NONE => (rev tkl, Stream.extractFilesInfo stream)
+ NONE => (rev tkl, Stream.recycle stream)
| SOME c =>
if Char.isSpace c then
tokenize stream tkl