summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--COPYRIGHT193
-rw-r--r--Makefile39
-rw-r--r--arch/x86_64/bits/syscall.h.in1
-rw-r--r--tools/common.c427
-rw-r--r--tools/common.h108
-rw-r--r--tools/sed.c1089
7 files changed, 1849 insertions, 9 deletions
diff --git a/.gitignore b/.gitignore
index 0a7aba4..8bfc8ef 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
*.o
*.a
generated
+tools/sed
diff --git a/COPYRIGHT b/COPYRIGHT
new file mode 100644
index 0000000..c1628e9
--- /dev/null
+++ b/COPYRIGHT
@@ -0,0 +1,193 @@
+musl as a whole is licensed under the following standard MIT license:
+
+----------------------------------------------------------------------
+Copyright © 2005-2020 Rich Felker, et al.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+----------------------------------------------------------------------
+
+Authors/contributors include:
+
+A. Wilcox
+Ada Worcester
+Alex Dowad
+Alex Suykov
+Alexander Monakov
+Andre McCurdy
+Andrew Kelley
+Anthony G. Basile
+Aric Belsito
+Arvid Picciani
+Bartosz Brachaczek
+Benjamin Peterson
+Bobby Bingham
+Boris Brezillon
+Brent Cook
+Chris Spiegel
+Clément Vasseur
+Daniel Micay
+Daniel Sabogal
+Daurnimator
+David Carlier
+David Edelsohn
+Denys Vlasenko
+Dmitry Ivanov
+Dmitry V. Levin
+Drew DeVault
+Emil Renner Berthing
+Fangrui Song
+Felix Fietkau
+Felix Janda
+Gianluca Anzolin
+Hauke Mehrtens
+He X
+Hiltjo Posthuma
+Isaac Dunham
+Jaydeep Patil
+Jens Gustedt
+Jeremy Huntwork
+Jo-Philipp Wich
+Joakim Sindholt
+John Spencer
+Julien Ramseier
+Justin Cormack
+Kaarle Ritvanen
+Khem Raj
+Kylie McClain
+Leah Neukirchen
+Luca Barbato
+Luka Perkov
+M Farkas-Dyck (Strake)
+Mahesh Bodapati
+Markus Wichmann
+Masanori Ogino
+Michael Clark
+Michael Forney
+Mikhail Kremnyov
+Natanael Copa
+Nicholas J. Kain
+orc
+Pascal Cuoq
+Patrick Oppenlander
+Petr Hosek
+Petr Skocik
+Pierre Carrier
+Reini Urban
+Rich Felker
+Richard Pennington
+Ryan Fairfax
+Samuel Holland
+Segev Finer
+Shiz
+sin
+Solar Designer
+Stefan Kristiansson
+Stefan O'Rear
+Szabolcs Nagy
+Timo Teräs
+Trutz Behn
+Valentin Ochs
+Will Dietz
+William Haddon
+William Pitcock
+
+Portions of this software are derived from third-party works licensed
+under terms compatible with the above MIT license:
+
+The TRE regular expression implementation (src/regex/reg* and
+src/regex/tre*) is Copyright © 2001-2008 Ville Laurikari and licensed
+under a 2-clause BSD license (license text in the source files). The
+included version has been heavily modified by Rich Felker in 2012, in
+the interests of size, simplicity, and namespace cleanliness.
+
+Much of the math library code (src/math/* and src/complex/*) is
+Copyright © 1993,2004 Sun Microsystems or
+Copyright © 2003-2011 David Schultz or
+Copyright © 2003-2009 Steven G. Kargl or
+Copyright © 2003-2009 Bruce D. Evans or
+Copyright © 2008 Stephen L. Moshier or
+Copyright © 2017-2018 Arm Limited
+and labelled as such in comments in the individual source files. All
+have been licensed under extremely permissive terms.
+
+The ARM memcpy code (src/string/arm/memcpy.S) is Copyright © 2008
+The Android Open Source Project and is licensed under a two-clause BSD
+license. It was taken from Bionic libc, used on Android.
+
+The AArch64 memcpy and memset code (src/string/aarch64/*) are
+Copyright © 1999-2019, Arm Limited.
+
+The implementation of DES for crypt (src/crypt/crypt_des.c) is
+Copyright © 1994 David Burren. It is licensed under a BSD license.
+
+The implementation of blowfish crypt (src/crypt/crypt_blowfish.c) was
+originally written by Solar Designer and placed into the public
+domain. The code also comes with a fallback permissive license for use
+in jurisdictions that may not recognize the public domain.
+
+The smoothsort implementation (src/stdlib/qsort.c) is Copyright © 2011
+Valentin Ochs and is licensed under an MIT-style license.
+
+The x86_64 port was written by Nicholas J. Kain and is licensed under
+the standard MIT terms.
+
+The mips and microblaze ports were originally written by Richard
+Pennington for use in the ellcc project. The original code was adapted
+by Rich Felker for build system and code conventions during upstream
+integration. It is licensed under the standard MIT terms.
+
+The mips64 port was contributed by Imagination Technologies and is
+licensed under the standard MIT terms.
+
+The powerpc port was also originally written by Richard Pennington,
+and later supplemented and integrated by John Spencer. It is licensed
+under the standard MIT terms.
+
+All other files which have no copyright comments are original works
+produced specifically for use as part of this library, written either
+by Rich Felker, the main author of the library, or by one or more
+contibutors listed above. Details on authorship of individual files
+can be found in the git version control history of the project. The
+omission of copyright and license comments in each file is in the
+interest of source tree size.
+
+In addition, permission is hereby granted for all public header files
+(include/* and arch/*/bits/*) and crt files intended to be linked into
+applications (crt/*, ldso/dlstart.c, and arch/*/crt_arch.h) to omit
+the copyright notice and permission notice otherwise required by the
+license, and to use these files without any requirement of
+attribution. These files include substantial contributions from:
+
+Bobby Bingham
+John Spencer
+Nicholas J. Kain
+Rich Felker
+Richard Pennington
+Stefan Kristiansson
+Szabolcs Nagy
+
+all of whom have explicitly granted such permission.
+
+This file previously contained text expressing a belief that most of
+the files covered by the above exception were sufficiently trivial not
+to be subject to copyright, resulting in confusion over whether it
+negated the permissions granted in the license. In the spirit of
+permissive licensing, and of not having licensing issues being an
+obstacle to adoption, that text has been removed.
diff --git a/Makefile b/Makefile
index aeb1325..113c375 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
AR = ar
CC = gcc
-CFLAGS = -Wall -g -O3 -march=native
+CFLAGS = -O0 -g -Wall
arch = x86_64
@@ -41,20 +41,42 @@ all_inc = $(sort $(inc) $(generated_headers:generated/%=%) \
$(arch_inc:arch/$(arch)/%=include/%) \
$(generic_inc:arch/generic/%=include/%))
+dirs = $(sort $(dir $(generated_headers)))
+
+### Tools ###
+
+gcc_prefix = $(dir $(shell gcc -print-libgcc-file-name))
+
+tool_cflags = -Wall -nostdinc -Wno-main \
+ -ffreestanding -fno-pic -fno-stack-protector \
+ -Igenerated/include -Iarch/$(arch) \
+ -I /usr/lib/clang/17/include \
+ -Iarch/generic $(CFLAGS)
+
+.PRECIOUS: $(addprefix tools/, common.o sed.o)
+
+tools/%.o: tools/%.c tools/common.h
+ $(CC) $(tool_cflags) -c -o $@ $<
+
+tools/%: tools/%.o tools/common.o
+ ld -L$(gcc_prefix) -nostdlib -o $@ $^ -lgcc
+
+### Musl ###
+
default: $(lib_obj)
-generated_dirs:
- mkdir -p $(dir $(generated_headers))
+$(dirs):
+ mkdir -p $@
-$(generated_headers): | generated_dirs
+$(generated_headers): | $(dirs)
generated/include/bits/alltypes.h: arch/$(arch)/bits/alltypes.h.in \
- include/alltypes.h.in
- sed -f tools/mkalltypes.sed $^ >$@
+ include/alltypes.h.in tools/sed
+ cat $(filter %.in, $^) | tools/sed -f tools/mkalltypes.sed >$@
-generated/include/bits/syscall.h: arch/$(arch)/bits/syscall.h.in
+generated/include/bits/syscall.h: arch/$(arch)/bits/syscall.h.in tools/sed
cp $< $@
- sed -n -e 's/__NR_/SYS_/p' < $< >>$@
+ tools/sed 's/__NR_/SYS_/' < $< >>$@
src/internal/version.o: cflags += -DVERSION=\"$(shell cat VERSION)\"
crt/crt1.o: arch/$(arch)/crt_arch.h
@@ -105,3 +127,4 @@ install: install-libs install-headers
clean:
rm -rf generated
rm -f crt/*.o src/*/*.o libc.a crt1.o crti.o crtn.o
+ rm -f tools/*.o tools/sed
diff --git a/arch/x86_64/bits/syscall.h.in b/arch/x86_64/bits/syscall.h.in
index 6543bbb..cdacbc5 100644
--- a/arch/x86_64/bits/syscall.h.in
+++ b/arch/x86_64/bits/syscall.h.in
@@ -361,4 +361,3 @@
#define __NR_set_mempolicy_home_node 450
#define __NR_cachestat 451
#define __NR_fchmodat2 452
-
diff --git a/tools/common.c b/tools/common.c
new file mode 100644
index 0000000..00f7d3c
--- /dev/null
+++ b/tools/common.c
@@ -0,0 +1,427 @@
+#define START "_start"
+#include <crt_arch.h>
+#include <bits/syscall.h.in>
+#include <syscall_arch.h>
+
+#include <stdarg.h>
+
+#include "common.h"
+
+enum {
+ flush_on_newline = 1
+};
+
+int errno = 0;
+char *progname;
+
+static long sysret(unsigned long r)
+{
+ if (r > -4096UL) {
+ errno = -r;
+ return -1;
+ }
+ return r;
+}
+
+void exit(int code)
+{
+ __syscall1(__NR_exit, code);
+}
+
+static void *brk(void *addr)
+{
+ return (void *)__syscall1(__NR_brk, (long)addr);
+}
+
+int write(int fd, char *buf, int len)
+{
+ return sysret(__syscall3(__NR_write, fd, (long)buf, len));
+}
+
+int read(int fd, char *buf, int len)
+{
+ return sysret(__syscall3(__NR_read, fd, (long)buf, len));
+}
+
+int open(char *fname, int flags, int mode)
+{
+ int o_largefile = 0100000;
+ return sysret(__syscall3(__NR_open, (long)fname,
+ flags|o_largefile, mode));
+}
+
+int stat(char *fname, struct stat *st)
+{
+ return sysret(__syscall2(__NR_stat, (long)fname, (long)st));
+}
+
+int fstat(int fd, struct stat *st)
+{
+ return sysret(__syscall2(__NR_fstat, (long)fd, (long)st));
+}
+
+int close(int fd)
+{
+ return sysret(__syscall1(__NR_close, fd));
+}
+
+void *mmap(char *addr, unsigned long length, int prot, int flags, int fd,
+ unsigned long offset)
+{
+ return (void*)sysret(__syscall6(__NR_mmap, (long)addr, length,
+ prot, flags, fd, offset));
+}
+
+int munmap(char *addr, int length)
+{
+ return sysret(__syscall2(__NR_munmap, (long)addr, length));
+}
+
+#define MREMAP_MAYMOVE 1
+
+int mremap(void *old_addr, unsigned long old_size, unsigned long new_size,
+ int flags, void *new_addr)
+{
+ return sysret(__syscall5(__NR_mremap, (long)old_addr, old_size,
+ new_size, flags, (long)new_addr));
+}
+
+int strzlen(char *s)
+{
+ char *sc = s;
+ for (; *s; ++s)
+ ;
+ return s - sc;
+}
+
+int strzeq(char *s1, char *s2)
+{
+ for (; *s1 && *s1 == *s2; ++s1, ++s2)
+ ;
+ return *s1 == 0 && *s2 == 0;
+}
+
+int streq(struct str *s1, struct str *s2)
+{
+ int i;
+ if (s1->end - s1->start != s2->end - s2->start)
+ return 0;
+ for (i = 0; s1->start + i < s1->end; ++i) {
+ if (s1->start[i] != s2->start[i])
+ return 0;
+ }
+ return 1;
+}
+
+void memmove(char *dest, char *src, int len)
+{
+ int i;
+ if (dest < src) {
+ for (i = 0; i < len; ++i)
+ dest[i] = src[i];
+ } else {
+ for (i = len - 1; i >= 0; --i)
+ dest[i] = src[i];
+ }
+}
+
+void memcpy(void *dest, void *src, int len)
+{
+ int i;
+ char *d = dest;
+ char *s = src;
+ for (i = 0; i < len; ++i)
+ d[i] = s[i];
+}
+
+void memset(void *dest, char c, int len)
+{
+ char *tmp = dest;
+ for (; tmp < (char*)dest + len; ++tmp)
+ *tmp = c;
+}
+
+#define E(e, s) [e] = s,
+
+char *estrings[256] = {
+#include "../src/errno/__strerror.h"
+};
+#undef E
+
+void perror(char *s)
+{
+ int idx = errno;
+ if (errno > 256 || estrings[errno] == NULL)
+ idx = 0;
+ eprintf("%s: %s\n", s, estrings[idx]);
+}
+
+void syscall_error(char *fname)
+{
+ perror(fname);
+ hexit(1);
+}
+
+enum {
+ buflen = 32 * 1024
+};
+
+struct iobuf {
+ int fd;
+ char buf[buflen];
+ int len;
+};
+
+struct iobuf stdout = { 1 };
+struct iobuf stderr = { 2 };
+
+static void flush(struct iobuf *buf)
+{
+ int ret = write(buf->fd, buf->buf, buf->len);
+ if (ret != buf->len) {
+ write(2, _S("some stdout/stderr error\n"));
+ exit(3);
+ }
+ buf->len = 0;
+}
+
+static void putchar(struct iobuf *buf, char c)
+{
+ if (buf->len == buflen)
+ flush(buf);
+ buf->buf[buf->len] = c;
+ ++buf->len;
+
+ if (flush_on_newline && c == '\n')
+ flush(buf);
+}
+
+void hexit(int code)
+{
+ flush(&stdout);
+ flush(&stderr);
+ exit(code);
+}
+
+static void print_int(struct iobuf *buf, int nsigned, int byte4,
+ unsigned long n)
+{
+ char tmpbuf[64];
+ char *p = tmpbuf;
+ unsigned long k;
+
+ if (nsigned) {
+ if (byte4) {
+ int tmp = n;
+ if (tmp < 0) {
+ putchar(buf, '-');
+ tmp = -tmp;
+ }
+ k = tmp;
+ } else {
+ if ((signed long)n < 0) {
+ putchar(buf, '-');
+ n = -n;
+ }
+ k = n;
+ }
+ } else {
+ k = n;
+ }
+
+ if (k == 0) {
+ putchar(buf, '0');
+ return;
+ }
+
+ for (; k > 0; k /= 10) {
+ *p = k % 10 + '0';
+ ++p;
+ }
+ for (--p;; --p) {
+ putchar(buf, *p);
+ if (p == tmpbuf)
+ break;
+ }
+}
+
+void print_strz(struct iobuf *buf, char *s)
+{
+ for (; *s; ++s)
+ putchar(buf, *s);
+}
+
+void print_str(struct iobuf *buf, struct str *s)
+{
+ char *tmp = s->start;
+ for (; tmp != s->end; ++tmp)
+ putchar(buf, *tmp);
+}
+
+static void format(struct iobuf *buf, char c, va_list vl)
+{
+ switch(c) {
+ unsigned long arg;
+ case '%':
+ putchar(buf, '%');
+ break;
+ case 'd':
+ case 'u':
+ arg = va_arg(vl, unsigned int);
+ print_int(buf, c == 'd', 1, arg);
+ break;
+ case 'D':
+ case 'U':
+ arg = va_arg(vl, unsigned long);
+ print_int(buf, c == 'D', 0, arg);
+ break;
+ case 's':
+ print_strz(buf, va_arg(vl, char*));
+ break;
+ case 'S':
+ print_str(buf, va_arg(vl, struct str *));
+ break;
+ case 'c':
+ arg = va_arg(vl, int);
+ putchar(buf, arg);
+ break;
+ default:
+ write(2, _S("printf: unexpected format option \n"));
+ hexit(2);
+ }
+}
+
+static void __printf(struct iobuf *buf, char *s, va_list vl)
+{
+ for (; *s; ++s) {
+ if (*s == '%') {
+ if (s[1] == 0) {
+ write(2, _S("printf: nothing comes after '%'"));
+ hexit(2);
+ }
+ format(buf, s[1], vl);
+ ++s;
+ } else {
+ putchar(buf, *s);
+ }
+ }
+ return;
+}
+
+void printf(char *fmt, ...)
+{
+ va_list vl;
+ va_start(vl, fmt);
+ __printf(&stdout, fmt, vl);
+ va_end(vl);
+}
+
+void evprintf(char *fmt, va_list vl)
+{
+ print_strz(&stderr, progname);
+ print_strz(&stderr, ": ");
+ __printf(&stderr, fmt, vl);
+}
+
+void eprintf(char *fmt, ...)
+{
+ va_list vl;
+ va_start(vl, fmt);
+ evprintf(fmt, vl);
+ va_end(vl);
+}
+
+/* Memory management */
+
+struct mem_buffer {
+ char *cur, *end;
+ char *saved;
+};
+
+static struct mem_buffer mem_buf;
+
+static void mem_buffer_init()
+{
+ mem_buf.cur = mem_buf.end = brk(NULL);
+ mem_buf.saved = NULL;
+}
+
+void save_heap_offset()
+{
+ if (mem_buf.saved != NULL) {
+ eprintf("mem_buffer: offset already saved\n");
+ hexit(2);
+ }
+ mem_buf.saved = mem_buf.cur;
+}
+
+void restore_heap_offset()
+{
+ if (mem_buf.saved == NULL) {
+ eprintf("mem_buffer: no offset was saved\n");
+ hexit(2);
+ }
+ mem_buf.cur = mem_buf.saved;
+ mem_buf.saved = NULL;
+}
+
+static int get_alignment(int n)
+{
+ if (n >= 16)
+ return 16;
+ if (n >= 8)
+ return 8;
+ if (n >= 4)
+ return 4;
+ if (n >= 2)
+ return 2;
+ return 1;
+}
+
+static void mem_buf_grow(int size)
+{
+ char *tmp;
+ if (size < 128*1024) {
+ size = 128*1024;
+ } else {
+ size *= 2;
+ size &= -4096;
+ }
+ /*printf("Growing (%d)\n", size);*/
+ tmp = brk(mem_buf.end + size);
+ if (tmp == mem_buf.end) {
+ eprintf("mem_buf: unable to allocate memory\n");
+ hexit(1);
+ }
+ mem_buf.end = tmp;
+}
+
+#define ALIGN(v, align) \
+ (void*)(((unsigned long)(v) + (align) - 1ul) & -(align))
+
+void *malloc(int size)
+{
+ int align = get_alignment(size);
+ char *tmp = ALIGN(mem_buf.cur, align);
+ int diff = tmp + size - mem_buf.end;
+
+ if (diff > 0)
+ mem_buf_grow(diff);
+
+ mem_buf.cur = tmp + size;
+ return tmp;
+}
+
+int main(int argc, char **argv, char **envp);
+
+void _start_c(unsigned long *p)
+{
+ int argc = p[0];
+ char **argv = (void *)(p + 1);
+ char **envp = argv + argc + 1;
+
+ progname = argv[0];
+ mem_buffer_init();
+
+ hexit(main(argc, argv, envp));
+}
diff --git a/tools/common.h b/tools/common.h
new file mode 100644
index 0000000..43e2dda
--- /dev/null
+++ b/tools/common.h
@@ -0,0 +1,108 @@
+#ifndef COMMON_H
+#define COMMON_H
+
+#include <bits/errno.h>
+
+#include <stdarg.h>
+
+#define NULL ((void *)0)
+#define _S(s) s, (sizeof(s) - 1)
+#define CSTR(s) ((struct str) { s, s + sizeof(s) - 1 })
+#define STR(s) ((struct str) { s, s + strzlen(s) })
+
+struct str {
+ char *start;
+ char *end;
+};
+
+extern int errno;
+extern char *progname;
+
+void exit(int code);
+int read(int fd, char *buf, int len);
+int write(int fd, char *buf, int len);
+
+#define O_RDONLY 0
+
+int open(char *fname, int flags, int mode);
+
+/*
+ * Taken from linux/arch/x86/include/uapi/asm/stat.h
+ * Matches arch/x86_64/bits/stat.h
+ */
+
+struct time {
+ unsigned long sec;
+ unsigned long usec;
+};
+
+struct stat {
+ unsigned long dev;
+ unsigned long ino;
+ unsigned long nlink;
+
+ unsigned int mode;
+ unsigned int uid;
+ unsigned int gid;
+ unsigned int __pad0;
+
+ unsigned long rdev;
+ unsigned long size;
+ unsigned long blksize;
+ unsigned long blocks;
+
+ struct time atime;
+ struct time mtime;
+ struct time ctime;
+ long __unused[3];
+};
+
+int stat(char *fname, struct stat *st);
+int fstat(int fd, struct stat *st);
+
+int close(int fd);
+
+#define MAP_PRIVATE 0x02
+#define MAP_ANON 0x20
+
+#define PROT_NONE 0
+#define PROT_READ 1
+#define PROT_WRITE 2
+#define PROT_EXEC 4
+
+void *mmap(char *addr, unsigned long length, int prot, int flags, int fd,
+ unsigned long offset);
+int munmap(char *addr, int length);
+
+#define MREMAP_MAYMOVE 1
+
+int mremap(void *old_addr, unsigned long old_size, unsigned long new_size,
+ int flags, void *new_addr);
+
+void hexit(int code);
+
+extern struct iobuf stdout;
+extern struct iobuf stderr;
+
+void print_strz(struct iobuf *buf, char *s);
+void print_str(struct iobuf *buf, struct str *s);
+
+void printf(char *fmt, ...);
+void eprintf(char *fmt, ...);
+void evprintf(char *fmt, va_list vl);
+
+void save_heap_offset();
+void restore_heap_offset();
+void *malloc(int size);
+
+int strzlen(char *s);
+int strzeq(char *s1, char *s2);
+
+int streq(struct str *s1, struct str *s2);
+void perror(char *s);
+void syscall_error(char *fname);
+
+void memmove(char *dest, char *src, int len);
+void memcpy(void *dest, void *src, int len);
+void memset(void *dest, char c, int len);
+#endif
diff --git a/tools/sed.c b/tools/sed.c
new file mode 100644
index 0000000..fd60a07
--- /dev/null
+++ b/tools/sed.c
@@ -0,0 +1,1089 @@
+#include "common.h"
+
+enum {
+ max_commands = 16,
+ inputbuf_size = 32 * 1024,
+ regbuf_size = 40,
+ text_size = 1024,
+};
+
+struct command {
+ struct str condition;
+ struct str body;
+ struct str result;
+};
+
+enum regex_type {
+ rx_char,
+ rx_any,
+ rx_range,
+ rx_rep,
+ rx_rep_nongreedy,
+ rx_concat,
+};
+
+struct regex {
+ enum regex_type type;
+ int submatch_id;
+ union {
+ char c;
+ unsigned long long table[2];
+ struct regex *child;
+ struct {
+ struct regex **children;
+ int childnum;
+ };
+ };
+};
+
+struct regbuf {
+ struct regex *buf[regbuf_size];
+ int size;
+};
+
+struct parsing_state {
+ int candidate_id;
+ char ids[10];
+};
+
+enum opcode {
+ op_char, /* char c */
+ op_match, /* match */
+ op_any, /* any */
+ op_range, /* range [...] */
+ op_jmp, /* jmp L1 */
+ op_split, /* split L1, L2 */
+ op_save, /* save n */
+};
+
+struct ins {
+ enum opcode opcode;
+ union {
+ char c;
+ unsigned long long table[2];
+ int n;
+ struct ins *arg_labels[2];
+ };
+};
+
+struct regex_prog {
+ struct ins *text;
+ int len;
+ int size;
+};
+
+struct submatches {
+ char *saves[20];
+ int refcount;
+};
+
+struct thread {
+ struct ins *pc;
+ struct submatches *submatches;
+};
+
+struct node {
+ struct node *next;
+ struct node *prev;
+
+ struct thread t;
+};
+
+struct threadlist {
+ struct node *anchor;
+
+ struct node anchor_body;
+
+ struct ins *ref;
+ struct node **presence_table;
+
+};
+
+struct input_buf {
+ char buf[inputbuf_size];
+ char *start;
+ char *end;
+} input_buf;
+
+struct compiled_command {
+ struct str str_condition;
+ struct regex_prog condition;
+
+ struct str str_body;
+ struct regex_prog body;
+ struct str str_result;
+};
+
+static struct compiled_command command_buf[max_commands];
+static int command_num = 0;
+
+static void error(char *fmt, ...)
+{
+ va_list vl;
+ va_start(vl, fmt);
+ evprintf(fmt, vl);
+ va_end(vl);
+ print_strz(&stderr, "\n");
+ hexit(1);
+}
+
+static void inbuf_init()
+{
+ input_buf.start = input_buf.end = input_buf.buf;
+}
+
+static int readline(struct str *line)
+{
+ char *start = input_buf.start;
+ char *tmp;
+ int none_processed = 1;
+
+ for (;;) {
+ for (tmp = start; tmp != input_buf.end; ++tmp) {
+ none_processed = 0;
+ if (*tmp == '\n') {
+ line->start = input_buf.start;
+ line->end = tmp;
+ input_buf.start = tmp + 1;
+ return 1;
+ }
+ }
+ if (input_buf.end < input_buf.buf + inputbuf_size / 2) {
+ int rem, ret;
+read:
+ rem = input_buf.buf + inputbuf_size - input_buf.end;
+ ret = read(0, input_buf.end, rem);
+ if (ret == -1)
+ syscall_error("stdin");
+ if (ret == 0) {
+ if (none_processed)
+ return 0;
+ error("unexpected input line end");
+ }
+ start = input_buf.end;
+ input_buf.end += ret;
+ } else {
+ int size;
+ if (input_buf.start == input_buf.buf)
+ error("input line is too long");
+ size = input_buf.end - input_buf.start;
+ memmove(input_buf.buf, input_buf.start, size);
+ input_buf.start = input_buf.buf;
+ input_buf.end = input_buf.buf + size;
+ goto read;
+ }
+ }
+}
+
+static struct regex *regex_alloc(enum regex_type type)
+{
+ struct regex *tmp = malloc(sizeof(struct regex));
+ tmp->type = type;
+ tmp->submatch_id = -1;
+ return tmp;
+}
+
+static void regbuf_add(struct regbuf *buf, struct regex *r)
+{
+ if (buf->size == regbuf_size)
+ error("regbuf: no more space");
+ buf->buf[buf->size] = r;
+ ++buf->size;
+}
+
+static struct regex *regbuf_pop(struct regbuf *buf)
+{
+ if (buf->size == 0)
+ return NULL;
+ --buf->size;
+ return buf->buf[buf->size];
+}
+
+static char *find_end(struct str *s)
+{
+ for (; s->start < s->end; ++s->start) {
+ if (*s->start == ']') {
+ char *tmp = s->start;
+ s->start = tmp + 1;
+ return tmp;
+ }
+ }
+ error("regex_parse: missing ']'");
+ return NULL;
+}
+
+static void __parse_brackets(struct regex *r, struct str *s)
+{
+ unsigned long long constant;
+ char *tmp;
+ if (s->start == s->end)
+ goto error;
+ if (*s->start == '^') {
+ constant = -1ull;
+ ++s->start;
+ if (s->start == s->end)
+ goto error;
+ } else {
+ constant = 0;
+ }
+ r->table[0] = r->table[1] = constant;
+ for (tmp = s->start; tmp != s->end; ++tmp) {
+ /* flip bit */
+ r->table[*tmp / 64] ^= (1ull << (*tmp % 64));
+ }
+ return;
+error:
+ error("regex_parse: met [] or [^]");
+}
+
+static void parse_brackets(struct regbuf *buf, struct str *rx)
+{
+ struct str brackets;
+ struct regex *r;
+
+ ++rx->start;
+ brackets = (struct str) { rx->start, find_end(rx) };
+ r = regex_alloc(rx_range);
+
+ __parse_brackets(r, &brackets);
+ regbuf_add(buf, r);
+}
+
+static void parse_rep(struct regbuf *buf, struct str *rx)
+{
+ enum regex_type type;
+ struct regex *r, *tmp;
+ if (rx->start + 1 != rx->end && rx->start[1] == '?') {
+ type = rx_rep_nongreedy;
+ rx->start += 2;
+ } else {
+ type = rx_rep;
+ ++rx->start;
+ }
+ r = regex_alloc(type);
+ tmp = regbuf_pop(buf);
+ if (tmp == NULL)
+ error("regex_parse: stray '*'");
+ r->child = tmp;
+ regbuf_add(buf, r);
+}
+
+static char *find_pend(struct str *rx)
+{
+ int level = 0;
+ for (; rx->start + 1 < rx->end; ++rx->start) {
+ if (*rx->start == '\\' && rx->start[1] == '(')
+ ++level;
+ if (*rx->start == '\\' && rx->start[1] == ')') {
+ if (level == 0) {
+ char *tmp = rx->start;
+ rx->start += 2;
+ return tmp;
+ } else {
+ --level;
+ }
+ }
+ }
+ error("regex_parse: missing \\)");
+ return NULL;
+}
+
+static int pick_id(struct parsing_state *state)
+{
+ int i;
+ if (state->candidate_id != -1) {
+ int tmp = state->candidate_id;
+ state->ids[tmp] = 0;
+ state->candidate_id = -1;
+ return tmp;
+ }
+ /* 0, 8 and 9 are for internal use, so they must be requested
+ * explicitly"
+ */
+ for (i = 1; i < 8; ++i) {
+ if (state->ids[i] == 1) {
+ state->ids[i] = 0;
+ return i;
+ }
+ }
+ error("regex_parse: space of submatch ids is exhausted");
+ return -1;
+}
+
+static struct regex *__regex_parse(struct parsing_state *state,
+ struct str *rx);
+
+static void parse_parenthesis(struct parsing_state *state,
+ struct regbuf *buf, struct str *rx)
+{
+ struct str parenthesis;
+ struct regex *r;
+ int id;
+
+ ++rx->start;
+ id = pick_id(state);
+
+ parenthesis = (struct str) { rx->start, find_pend(rx) };
+ r = __regex_parse(state, &parenthesis);
+
+ r->submatch_id = id;
+ regbuf_add(buf, r);
+}
+
+static void parse_backslash(struct parsing_state *state,
+ struct regbuf *buf, struct str *rx)
+{
+ struct regex *r;
+ if (rx->start + 1 == rx->end)
+ error("regex_parse: stray '\\'");
+ ++rx->start;
+
+ switch(*rx->start) {
+ case '(':
+ parse_parenthesis(state, buf, rx);
+ break;
+ case '0' ... '9':
+ state->candidate_id = *rx->start - '0';
+ ++rx->start;
+ break;
+ default:
+ r = regex_alloc(rx_char);
+ r->c = *rx->start;
+ regbuf_add(buf, r);
+ ++rx->start;
+ }
+}
+
+static struct regex *collect(struct regex **buf, int len)
+{
+ if (len == 0)
+ error("regex_parse: empty regex");
+ if (len == 1) {
+ return *buf;
+ } else {
+ int size;
+ struct regex *r = regex_alloc(rx_concat);
+ r->childnum = len;
+ size = sizeof(struct regex *) * len;
+ r->children = malloc(size);
+ memcpy(r->children, buf, size);
+ return r;
+ }
+}
+
+static struct regex *__regex_parse(struct parsing_state *state,
+ struct str *rx)
+{
+ struct regbuf rbuf;
+ struct regex *r;
+
+ rbuf.size = 0;
+
+ while (rx->start != rx->end) {
+ switch(*rx->start) {
+ case '.':
+ r = regex_alloc(rx_any);
+ break;
+ case '[':
+ parse_brackets(&rbuf, rx);
+ continue;
+ case '*':
+ parse_rep(&rbuf, rx);
+ continue;
+ case '\\':
+ parse_backslash(state, &rbuf, rx);
+ continue;
+ default:
+ r = regex_alloc(rx_char);
+ r->c = *rx->start;
+ }
+ regbuf_add(&rbuf, r);
+ ++rx->start;
+ }
+ return collect(rbuf.buf, rbuf.size);
+}
+
+static struct regex *regex_parse(struct str rx)
+{
+ struct parsing_state state;
+ struct regex *buf[3];
+ int idx = 0, inc = 0;
+
+ memset(state.ids, 1, 10);
+ state.candidate_id = -1;
+
+ if (rx.start != rx.end && *rx.start != '^')
+ buf[idx++] = __regex_parse(&state, &CSTR("\\8\\(.*?\\)"));
+ else
+ ++rx.start;
+
+ if (rx.start != rx.end && rx.end[-1] != '$') {
+ buf[idx + 1] = __regex_parse(&state, &CSTR("\\9\\(.*\\)"));
+ inc = 1;
+ } else {
+ --rx.end;
+ }
+
+ buf[idx] = __regex_parse(&state, &rx);
+ if (buf[idx]->submatch_id != -1)
+ error("regex_parse: whole regex receives id=0 implicitly");
+ buf[idx]->submatch_id = 0;
+ ++idx;
+
+ idx += inc;
+ return collect(buf, idx);
+}
+
+static char *print_range(unsigned long long *table)
+{
+ static char buf[68];
+ int idx = 0;
+ int i;
+ unsigned long long tcopy[2] = { table[0], table[1] };
+
+ buf[idx++] = '[';
+ int raised_bits = __builtin_popcountll(tcopy[0]) +
+ __builtin_popcountll(tcopy[1]);
+ if (raised_bits > 64) {
+ buf[idx++] = '^';
+ tcopy[0] = ~tcopy[0];
+ tcopy[1] = ~tcopy[1];
+ }
+ for (i = 0; i < 128; ++i) {
+ if (tcopy[i / 64] & (1ull << (i % 64)))
+ buf[idx++] = i;
+ }
+ buf[idx++] = ']';
+ return buf;
+}
+
+static void __regex_print(struct regex *r)
+{
+ int i;
+
+ if (r->submatch_id != -1)
+ printf("%d:", r->submatch_id);
+ switch(r->type) {
+ case rx_char:
+ if (r->c == ' ')
+ printf("' '");
+ else
+ printf("%c", r->c);
+ return;
+ case rx_any:
+ printf(".");
+ return;
+ case rx_range:
+ printf("%s", print_range(r->table));
+ return;
+ case rx_concat:
+ printf("(| ");
+ for (i = 0; i < r->childnum; ++i) {
+ __regex_print(r->children[i]);
+ if (i + 1 < r->childnum)
+ printf(" ");
+ else
+ printf(")");
+ }
+ return;
+ case rx_rep:
+ case rx_rep_nongreedy:
+ printf("(*");
+ if (r->type == rx_rep_nongreedy)
+ printf("? ");
+ else
+ printf(" ");
+ __regex_print(r->child);
+ printf(")");
+ return;
+ }
+}
+
+static void regex_print(struct regex *r)
+{
+ __regex_print(r);
+ printf("\n");
+}
+
+static void regex_prog_alloc(struct regex_prog *prog)
+{
+ prog->size = text_size;
+ prog->text = malloc(sizeof(struct ins) * prog->size);
+ prog->len = 0;
+}
+
+static struct ins *prog_get_ins(struct regex_prog *prog, int inc)
+{
+ struct ins *tmp;
+ if (prog->len == prog->size)
+ error("regex_prog: text section is too big");
+ tmp = prog->text + prog->len;
+ if (inc)
+ ++prog->len;
+ return tmp;
+}
+
+static void __regex_compile(struct regex_prog *prog, struct regex *r);
+
+static void compile_rep(struct regex_prog *prog, struct regex *r)
+{
+ struct ins *tmp, *tmp2;
+
+ tmp = prog_get_ins(prog, 1);
+ tmp->opcode = op_split;
+ prog_get_ins(prog, 0);
+ tmp->arg_labels[0] = tmp + 1;
+
+ __regex_compile(prog, r->child);
+ tmp2 = prog_get_ins(prog, 1);
+ tmp2->opcode = op_jmp;
+ tmp2->arg_labels[0] = tmp;
+
+ tmp2 = prog_get_ins(prog, 0);
+ tmp->arg_labels[1] = tmp2;
+
+ if (r->type == rx_rep_nongreedy) {
+ struct ins *tmp3 = tmp->arg_labels[1];
+ tmp->arg_labels[1] = tmp->arg_labels[0];
+ tmp->arg_labels[0] = tmp3;
+ }
+}
+
+static void __regex_compile(struct regex_prog *prog, struct regex *r)
+{
+ struct ins *tmp;
+ int i;
+
+ if (r->submatch_id != -1) {
+ tmp = prog_get_ins(prog, 1);
+ tmp->opcode = op_save;
+ tmp->n = r->submatch_id * 2;
+ }
+ switch(r->type) {
+ case rx_char:
+ tmp = prog_get_ins(prog, 1);
+ tmp->opcode = op_char;
+ tmp->c = r->c;
+ break;
+ case rx_any:
+ tmp = prog_get_ins(prog, 1);
+ tmp->opcode = op_any;
+ break;
+ case rx_range:
+ tmp = prog_get_ins(prog, 1);
+ tmp->opcode = op_range;
+ tmp->table[0] = r->table[0];
+ tmp->table[1] = r->table[1];
+ break;
+ case rx_concat:
+ for (i = 0; i < r->childnum; ++i)
+ __regex_compile(prog, r->children[i]);
+ break;
+ case rx_rep:
+ case rx_rep_nongreedy:
+ compile_rep(prog, r);
+ break;
+ }
+ if (r->submatch_id != -1) {
+ tmp = prog_get_ins(prog, 1);
+ tmp->opcode = op_save;
+ tmp->n = r->submatch_id * 2 + 1;
+ }
+}
+
+static void regex_compile(struct regex_prog *prog, struct regex *r)
+{
+ struct ins *tmp;
+ regex_prog_alloc(prog);
+ __regex_compile(prog, r);
+ tmp = prog_get_ins(prog, 1);
+ tmp->opcode = op_match;
+}
+
+static void assign_L(int *labels, int offset)
+{
+ static int next_label = 0;
+ if (labels[offset] == -1) {
+ labels[offset] = next_label;
+ ++next_label;
+ }
+}
+
+static void fill_label_table(int *labels, struct regex_prog *prog)
+{
+ struct ins *tmp;
+ for (tmp = prog->text; tmp < prog->text + prog->len; ++tmp) {
+ if (tmp->opcode == op_jmp) {
+ assign_L(labels, tmp->arg_labels[0] - prog->text);
+ }
+ if (tmp->opcode == op_split) {
+ assign_L(labels, tmp->arg_labels[0] - prog->text);
+ assign_L(labels, tmp->arg_labels[1] - prog->text);
+ }
+ }
+}
+
+static void print_ins(struct ins *ins, int *labels,
+ struct regex_prog *prog)
+{
+ switch(ins->opcode) {
+ case op_char:
+ printf("\tchar ");
+ if (ins->c == ' ')
+ printf("' '\n");
+ else
+ printf("%c\n", ins->c);
+ break;
+ case op_any:
+ printf("\tany\n");
+ break;
+ case op_range:
+ printf("\trange %s\n", print_range(ins->table));
+ break;
+ case op_match:
+ printf("\tmatch\n");
+ break;
+ case op_split:
+ printf("\tsplit L%d, L%d\n",
+ labels[ins->arg_labels[0] - prog->text],
+ labels[ins->arg_labels[1] - prog->text]);
+ break;
+ case op_jmp:
+ printf("\tjmp L%d\n",
+ labels[ins->arg_labels[0] - prog->text]);
+ break;
+ case op_save:
+ printf("\tsave %d\n", ins->n);
+ break;
+ }
+}
+
+static void regex_prog_print(struct regex_prog *prog)
+{
+ struct ins *tmp;
+ int *labels = malloc(sizeof(int) * prog->len);
+ memset(labels, -1, sizeof(int) * prog->len);
+ fill_label_table(labels, prog);
+
+ for (tmp = prog->text; tmp < prog->text + prog->len; ++tmp) {
+ int offset = tmp - prog->text;
+ if (labels[offset] != -1)
+ printf("L%d:", labels[offset]);
+ print_ins(tmp, labels, prog);
+ }
+}
+
+static struct threadlist *threadlist_init(struct regex_prog *prog)
+{
+ struct threadlist *tmp = malloc(sizeof(struct threadlist));
+
+ tmp->anchor = &tmp->anchor_body;
+ tmp->anchor->prev = tmp->anchor;
+ tmp->anchor->next = tmp->anchor;
+
+ tmp->presence_table = malloc(sizeof(struct node *) * prog->len);
+ tmp->ref = prog->text;
+ memset(tmp->presence_table, 0, sizeof(struct node *) * prog->len);
+ return tmp;
+}
+
+static struct node *alloc_node()
+{
+ struct node *tmp = malloc(sizeof(struct node));
+ tmp->next = tmp->prev = NULL;
+ return tmp;
+}
+
+static void node_extract(struct node *n)
+{
+ n->prev->next = n->next;
+ n->next->prev = n->prev;
+}
+
+static void node_addfront(struct threadlist *tl, struct node *n)
+{
+ n->next = tl->anchor->next;
+ n->prev = tl->anchor;
+ tl->anchor->next->prev = n;
+ tl->anchor->next = n;
+}
+
+static int is_threadlist_empty(struct threadlist *tl)
+{
+ return tl->anchor->next == tl->anchor;
+}
+
+int adds = 0;
+int adjustments = 0;
+
+static struct node **adjust_if_present(struct threadlist *tl,
+ struct thread *t, int back)
+{
+ struct node **cell = tl->presence_table + (t->pc - tl->ref);
+
+ if (*cell == NULL)
+ return cell;
+ if (back)
+ return NULL;
+ node_extract(*cell);
+ node_addfront(tl, *cell);
+ return NULL;
+}
+
+static void threadlist_addfront(struct threadlist *tl, struct thread t)
+{
+ struct node *tmp;
+ struct node **cell;
+ if ((cell = adjust_if_present(tl, &t, 0)) == NULL)
+ return;
+
+ ++adds;
+ tmp = alloc_node();
+ tmp->t = t;
+ node_addfront(tl, tmp);
+ *cell = tmp;
+}
+
+static void threadlist_addback(struct threadlist *tl, struct thread t)
+{
+ struct node *tmp;
+ struct node **cell;
+
+ if ((cell = adjust_if_present(tl, &t, 1)) == NULL)
+ return;
+
+ ++adds;
+ tmp = alloc_node();
+ tmp->t = t;
+
+ tmp->prev = tl->anchor->prev;
+ tmp->next = tl->anchor;
+ tl->anchor->prev->next = tmp;
+ tl->anchor->prev = tmp;
+ *cell = tmp;
+}
+
+static int threadlist_getfront(struct threadlist *tl, struct thread *t)
+{
+ struct node *tmp;
+
+ if (is_threadlist_empty(tl))
+ return 0;
+ tmp = tl->anchor->next;
+ *t = tmp->t;
+
+ node_extract(tmp);
+
+ tl->presence_table[t->pc - tl->ref] = NULL;
+ return 1;
+}
+
+static struct submatches *alloc_submatches(char **p)
+{
+ int i;
+ struct submatches *tmp = malloc(sizeof(*tmp));
+ tmp->refcount = 1;
+ if (p == NULL) {
+ for (i = 0; i < 20; ++i)
+ tmp->saves[i] = NULL;
+ } else {
+ for (i = 0; i < 20; ++i)
+ tmp->saves[i] = p[i];
+ }
+ return tmp;
+}
+
+static struct thread thread(struct ins *pc)
+{
+ return (struct thread) { pc, alloc_submatches(NULL) };
+}
+
+static struct thread thread_copy(struct thread *t)
+{
+ ++t->submatches->refcount;
+ return *t;
+}
+
+static void save_offset(struct thread *t, int n, char *offset)
+{
+ if (t->submatches->refcount > 1) {
+ --t->submatches->refcount;
+ t->submatches = alloc_submatches(t->submatches->saves);
+ }
+ t->submatches->saves[n] = offset;
+}
+
+
+static char **exec_threadlist(struct threadlist *clist,
+ struct threadlist *nlist, char c, char *offset)
+{
+ struct thread tmp;
+ while (threadlist_getfront(clist, &tmp)) {
+start:
+ switch(tmp.pc->opcode) {
+ case op_match:
+ if (c == 0)
+ return tmp.submatches->saves;
+ break;
+ case op_char:
+ if (tmp.pc->c == c) {
+ ++tmp.pc;
+ threadlist_addback(nlist, tmp);
+ }
+ break;
+ case op_any:
+ if (c != 0) {
+ ++tmp.pc;
+ threadlist_addback(nlist, tmp);
+ }
+ break;
+ case op_range:
+ if (tmp.pc->table[c / 64] & (1ull << (c % 64))) {
+ ++tmp.pc;
+ threadlist_addback(nlist, tmp);
+ }
+ break;
+ case op_jmp:
+ tmp.pc = tmp.pc->arg_labels[0];
+ goto start;
+ case op_split:
+ struct thread tmp2 = thread_copy(&tmp);
+ tmp2.pc = tmp.pc->arg_labels[1];
+ tmp.pc = tmp.pc->arg_labels[0];
+ threadlist_addfront(clist, tmp2);
+ threadlist_addfront(clist, tmp);
+ break;
+ case op_save:
+ save_offset(&tmp, tmp.pc->n, offset);
+ ++tmp.pc;
+ goto start;
+ }
+ }
+ return NULL;
+}
+
+static char **__regex_exec(struct regex_prog *prog, struct str *str)
+{
+ struct threadlist *clist = threadlist_init(prog);
+ struct threadlist *nlist = threadlist_init(prog);
+ struct threadlist *tmpl;
+ char *tmps = str->start;
+
+ threadlist_addback(clist, thread(prog->text));
+
+ for (; !is_threadlist_empty(clist); ++tmps) {
+ char c = tmps < str->end ? *tmps: 0;
+ char **ret = exec_threadlist(clist, nlist, c, tmps);
+ if (ret != NULL)
+ return ret;
+ if (tmps == str->end)
+ return NULL;
+ tmpl = nlist;
+ nlist = clist;
+ clist = tmpl;
+ }
+ return NULL;
+}
+
+static char **regex_exec(struct regex_prog *prog, struct str *str)
+{
+ char **ret;
+ save_heap_offset();
+ ret = __regex_exec(prog, str);
+ restore_heap_offset();
+ return ret;
+}
+
+static void regex_compile_wrapper(struct regex_prog *prog,
+ struct str *regex)
+{
+ struct regex *r = regex_parse(*regex);
+ /*regex_print(r);*/
+ regex_compile(prog, r);
+ /*regex_prog_print(prog);*/
+}
+
+static void print_saved(char **saves)
+{
+ int i;
+ if (saves == NULL) {
+ printf("No match\n");
+ return;
+ }
+ for (i = 0; i < 10; ++i) {
+ struct str tmp = { saves[i * 2], saves[i * 2 + 1] };
+ printf("%d: %S\n", i, &tmp);
+ }
+}
+
+static void read_part(struct str *part, struct str *s, char *err)
+{
+ char *tmp = s->start;
+ for (; tmp != s->end && *tmp != '/'; ++tmp)
+ ;
+ if (tmp == s->end)
+ error("%s", err);
+ part->start = s->start;
+ part->end = tmp;
+ s->start = tmp + 1;
+}
+
+static int is_space(char c)
+{
+ return c == ' ' || c == '\t' || c == '\n';
+}
+
+static void skip_spaces(struct str *s)
+{
+ for (; s->start != s->end && is_space(*s->start); ++s->start)
+ ;
+}
+
+static int parse_command(struct str *s)
+{
+ struct str cmdname;
+
+ struct compiled_command *ccmd = command_buf + command_num;
+
+ if (command_num == max_commands)
+ error("too many commands");
+
+ skip_spaces(s);
+ if (s->end - s->start == 0)
+ return 0;
+
+ if (s->start[0] == '/') {
+ ++s->start;
+ read_part(&ccmd->str_condition, s,
+ "command: unfinished condition");
+ regex_compile_wrapper(&ccmd->condition,
+ &ccmd->str_condition);
+ } else {
+ ccmd->str_condition.start = NULL;
+ ccmd->str_condition.end = NULL;
+ }
+ read_part(&cmdname, s, "command: unfinished command type");
+ read_part(&ccmd->str_body, s, "command: unfinished body");
+ regex_compile_wrapper(&ccmd->body, &ccmd->str_body);
+
+ read_part(&ccmd->str_result, s, "command: unfinished result");
+ if (streq(&cmdname, &CSTR("s")) == 0)
+ error("%s: command not supported");
+
+ ++command_num;
+ return 1;
+}
+
+static void parse_file(char *fname)
+{
+ char *addr;
+ struct str contents;
+ struct stat st;
+ int fd = open(fname, O_RDONLY, 0);
+
+ if (fd == -1)
+ goto syserror;
+ if (fstat(fd, &st) == -1)
+ goto syserror;
+
+ addr = mmap(NULL, st.size, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (addr == (void*)-1)
+ goto syserror;
+ if (close(fd) == -1)
+ goto syserror;
+
+ contents = (struct str) { addr, addr + st.size };
+ while (parse_command(&contents))
+ ;
+ return;
+syserror:
+ syscall_error(fname);
+}
+
+static void parse_cmdargs(int argc, char **argv)
+{
+ ++argv;
+ if (*argv == NULL) {
+ printf("No help for you\n");
+ hexit(1);
+ } else if (strzeq(*argv, "-f")) {
+ if (argv[1] == NULL)
+ error("-f: missing file");
+ parse_file(argv[1]);
+ argv += 2;
+ if (*argv != NULL)
+ goto unexpected_arg;
+ } else {
+ parse_command(&STR(*argv));
+ ++argv;
+ if (*argv != NULL)
+ goto unexpected_arg;
+
+ }
+ return;
+unexpected_arg:
+ error("%s: unexpected argument", *argv);
+}
+
+static void construct_result(struct str *saves, struct str *result)
+{
+ char *tmp;
+
+ printf("%S", saves + 8);
+ for (tmp = result->start; tmp != result->end; ++tmp) {
+ if (*tmp != '\\') {
+ printf("%c", *tmp);
+ } else {
+ if (tmp + 1 == result->end)
+ error("stray '\\' in input");
+ ++tmp;
+ switch(*tmp) {
+ case '0' ... '7':
+ printf("%S", saves + (*tmp - '0'));
+ break;
+ default:
+ printf("%c", *tmp);
+ }
+ }
+ }
+ printf("%S\n", saves + 9);
+}
+
+static void process_line(struct str *line)
+{
+ int i;
+ for (i = 0; i < command_num; ++i) {
+ struct compiled_command *cmd = command_buf + i;
+ char **res;
+ if (cmd->str_condition.start != cmd->str_condition.end) {
+ res = regex_exec(&cmd->condition, line);
+ if (res == NULL)
+ continue;
+ }
+ res = regex_exec(&cmd->body, line);
+ if (res == NULL)
+ continue;
+ /* printf("Found: %S|%S\n", &cmd->str_condition,
+ &cmd->str_body); */
+ construct_result((struct str*)res, &cmd->str_result);
+ return;
+ }
+ printf("%S\n", line);
+}
+
+static void process()
+{
+ struct str line;
+
+ while (readline(&line))
+ process_line(&line);
+}
+
+int main(int argc, char **argv, char **envp)
+{
+ parse_cmdargs(argc, argv);
+ inbuf_init();
+ process();
+
+ return 0;
+}