summaryrefslogtreecommitdiff
path: root/tools/sed.c
diff options
context:
space:
mode:
authorVladimir Azarov <avm@intermediate-node.net>2024-10-13 20:54:32 +0200
committerVladimir Azarov <avm@intermediate-node.net>2024-10-13 20:54:32 +0200
commit3972268b97b789a8212ea9bc7d1b9ce9905a68d8 (patch)
treeb0cbf5012266da4702afb640f6bb1cdb2f097153 /tools/sed.c
parent47a781f03119d360ef2b2936234d381fd2f4c85d (diff)
Base library for tools and sed utility
Diffstat (limited to 'tools/sed.c')
-rw-r--r--tools/sed.c1089
1 files changed, 1089 insertions, 0 deletions
diff --git a/tools/sed.c b/tools/sed.c
new file mode 100644
index 0000000..fd60a07
--- /dev/null
+++ b/tools/sed.c
@@ -0,0 +1,1089 @@
+#include "common.h"
+
+enum {
+ max_commands = 16,
+ inputbuf_size = 32 * 1024,
+ regbuf_size = 40,
+ text_size = 1024,
+};
+
+struct command {
+ struct str condition;
+ struct str body;
+ struct str result;
+};
+
+enum regex_type {
+ rx_char,
+ rx_any,
+ rx_range,
+ rx_rep,
+ rx_rep_nongreedy,
+ rx_concat,
+};
+
+struct regex {
+ enum regex_type type;
+ int submatch_id;
+ union {
+ char c;
+ unsigned long long table[2];
+ struct regex *child;
+ struct {
+ struct regex **children;
+ int childnum;
+ };
+ };
+};
+
+struct regbuf {
+ struct regex *buf[regbuf_size];
+ int size;
+};
+
+struct parsing_state {
+ int candidate_id;
+ char ids[10];
+};
+
+enum opcode {
+ op_char, /* char c */
+ op_match, /* match */
+ op_any, /* any */
+ op_range, /* range [...] */
+ op_jmp, /* jmp L1 */
+ op_split, /* split L1, L2 */
+ op_save, /* save n */
+};
+
+struct ins {
+ enum opcode opcode;
+ union {
+ char c;
+ unsigned long long table[2];
+ int n;
+ struct ins *arg_labels[2];
+ };
+};
+
+struct regex_prog {
+ struct ins *text;
+ int len;
+ int size;
+};
+
+struct submatches {
+ char *saves[20];
+ int refcount;
+};
+
+struct thread {
+ struct ins *pc;
+ struct submatches *submatches;
+};
+
+struct node {
+ struct node *next;
+ struct node *prev;
+
+ struct thread t;
+};
+
+struct threadlist {
+ struct node *anchor;
+
+ struct node anchor_body;
+
+ struct ins *ref;
+ struct node **presence_table;
+
+};
+
+struct input_buf {
+ char buf[inputbuf_size];
+ char *start;
+ char *end;
+} input_buf;
+
+struct compiled_command {
+ struct str str_condition;
+ struct regex_prog condition;
+
+ struct str str_body;
+ struct regex_prog body;
+ struct str str_result;
+};
+
+static struct compiled_command command_buf[max_commands];
+static int command_num = 0;
+
+static void error(char *fmt, ...)
+{
+ va_list vl;
+ va_start(vl, fmt);
+ evprintf(fmt, vl);
+ va_end(vl);
+ print_strz(&stderr, "\n");
+ hexit(1);
+}
+
+static void inbuf_init()
+{
+ input_buf.start = input_buf.end = input_buf.buf;
+}
+
+static int readline(struct str *line)
+{
+ char *start = input_buf.start;
+ char *tmp;
+ int none_processed = 1;
+
+ for (;;) {
+ for (tmp = start; tmp != input_buf.end; ++tmp) {
+ none_processed = 0;
+ if (*tmp == '\n') {
+ line->start = input_buf.start;
+ line->end = tmp;
+ input_buf.start = tmp + 1;
+ return 1;
+ }
+ }
+ if (input_buf.end < input_buf.buf + inputbuf_size / 2) {
+ int rem, ret;
+read:
+ rem = input_buf.buf + inputbuf_size - input_buf.end;
+ ret = read(0, input_buf.end, rem);
+ if (ret == -1)
+ syscall_error("stdin");
+ if (ret == 0) {
+ if (none_processed)
+ return 0;
+ error("unexpected input line end");
+ }
+ start = input_buf.end;
+ input_buf.end += ret;
+ } else {
+ int size;
+ if (input_buf.start == input_buf.buf)
+ error("input line is too long");
+ size = input_buf.end - input_buf.start;
+ memmove(input_buf.buf, input_buf.start, size);
+ input_buf.start = input_buf.buf;
+ input_buf.end = input_buf.buf + size;
+ goto read;
+ }
+ }
+}
+
+static struct regex *regex_alloc(enum regex_type type)
+{
+ struct regex *tmp = malloc(sizeof(struct regex));
+ tmp->type = type;
+ tmp->submatch_id = -1;
+ return tmp;
+}
+
+static void regbuf_add(struct regbuf *buf, struct regex *r)
+{
+ if (buf->size == regbuf_size)
+ error("regbuf: no more space");
+ buf->buf[buf->size] = r;
+ ++buf->size;
+}
+
+static struct regex *regbuf_pop(struct regbuf *buf)
+{
+ if (buf->size == 0)
+ return NULL;
+ --buf->size;
+ return buf->buf[buf->size];
+}
+
+static char *find_end(struct str *s)
+{
+ for (; s->start < s->end; ++s->start) {
+ if (*s->start == ']') {
+ char *tmp = s->start;
+ s->start = tmp + 1;
+ return tmp;
+ }
+ }
+ error("regex_parse: missing ']'");
+ return NULL;
+}
+
+static void __parse_brackets(struct regex *r, struct str *s)
+{
+ unsigned long long constant;
+ char *tmp;
+ if (s->start == s->end)
+ goto error;
+ if (*s->start == '^') {
+ constant = -1ull;
+ ++s->start;
+ if (s->start == s->end)
+ goto error;
+ } else {
+ constant = 0;
+ }
+ r->table[0] = r->table[1] = constant;
+ for (tmp = s->start; tmp != s->end; ++tmp) {
+ /* flip bit */
+ r->table[*tmp / 64] ^= (1ull << (*tmp % 64));
+ }
+ return;
+error:
+ error("regex_parse: met [] or [^]");
+}
+
+static void parse_brackets(struct regbuf *buf, struct str *rx)
+{
+ struct str brackets;
+ struct regex *r;
+
+ ++rx->start;
+ brackets = (struct str) { rx->start, find_end(rx) };
+ r = regex_alloc(rx_range);
+
+ __parse_brackets(r, &brackets);
+ regbuf_add(buf, r);
+}
+
+static void parse_rep(struct regbuf *buf, struct str *rx)
+{
+ enum regex_type type;
+ struct regex *r, *tmp;
+ if (rx->start + 1 != rx->end && rx->start[1] == '?') {
+ type = rx_rep_nongreedy;
+ rx->start += 2;
+ } else {
+ type = rx_rep;
+ ++rx->start;
+ }
+ r = regex_alloc(type);
+ tmp = regbuf_pop(buf);
+ if (tmp == NULL)
+ error("regex_parse: stray '*'");
+ r->child = tmp;
+ regbuf_add(buf, r);
+}
+
+static char *find_pend(struct str *rx)
+{
+ int level = 0;
+ for (; rx->start + 1 < rx->end; ++rx->start) {
+ if (*rx->start == '\\' && rx->start[1] == '(')
+ ++level;
+ if (*rx->start == '\\' && rx->start[1] == ')') {
+ if (level == 0) {
+ char *tmp = rx->start;
+ rx->start += 2;
+ return tmp;
+ } else {
+ --level;
+ }
+ }
+ }
+ error("regex_parse: missing \\)");
+ return NULL;
+}
+
+static int pick_id(struct parsing_state *state)
+{
+ int i;
+ if (state->candidate_id != -1) {
+ int tmp = state->candidate_id;
+ state->ids[tmp] = 0;
+ state->candidate_id = -1;
+ return tmp;
+ }
+ /* 0, 8 and 9 are for internal use, so they must be requested
+ * explicitly"
+ */
+ for (i = 1; i < 8; ++i) {
+ if (state->ids[i] == 1) {
+ state->ids[i] = 0;
+ return i;
+ }
+ }
+ error("regex_parse: space of submatch ids is exhausted");
+ return -1;
+}
+
+static struct regex *__regex_parse(struct parsing_state *state,
+ struct str *rx);
+
+static void parse_parenthesis(struct parsing_state *state,
+ struct regbuf *buf, struct str *rx)
+{
+ struct str parenthesis;
+ struct regex *r;
+ int id;
+
+ ++rx->start;
+ id = pick_id(state);
+
+ parenthesis = (struct str) { rx->start, find_pend(rx) };
+ r = __regex_parse(state, &parenthesis);
+
+ r->submatch_id = id;
+ regbuf_add(buf, r);
+}
+
+static void parse_backslash(struct parsing_state *state,
+ struct regbuf *buf, struct str *rx)
+{
+ struct regex *r;
+ if (rx->start + 1 == rx->end)
+ error("regex_parse: stray '\\'");
+ ++rx->start;
+
+ switch(*rx->start) {
+ case '(':
+ parse_parenthesis(state, buf, rx);
+ break;
+ case '0' ... '9':
+ state->candidate_id = *rx->start - '0';
+ ++rx->start;
+ break;
+ default:
+ r = regex_alloc(rx_char);
+ r->c = *rx->start;
+ regbuf_add(buf, r);
+ ++rx->start;
+ }
+}
+
+static struct regex *collect(struct regex **buf, int len)
+{
+ if (len == 0)
+ error("regex_parse: empty regex");
+ if (len == 1) {
+ return *buf;
+ } else {
+ int size;
+ struct regex *r = regex_alloc(rx_concat);
+ r->childnum = len;
+ size = sizeof(struct regex *) * len;
+ r->children = malloc(size);
+ memcpy(r->children, buf, size);
+ return r;
+ }
+}
+
+static struct regex *__regex_parse(struct parsing_state *state,
+ struct str *rx)
+{
+ struct regbuf rbuf;
+ struct regex *r;
+
+ rbuf.size = 0;
+
+ while (rx->start != rx->end) {
+ switch(*rx->start) {
+ case '.':
+ r = regex_alloc(rx_any);
+ break;
+ case '[':
+ parse_brackets(&rbuf, rx);
+ continue;
+ case '*':
+ parse_rep(&rbuf, rx);
+ continue;
+ case '\\':
+ parse_backslash(state, &rbuf, rx);
+ continue;
+ default:
+ r = regex_alloc(rx_char);
+ r->c = *rx->start;
+ }
+ regbuf_add(&rbuf, r);
+ ++rx->start;
+ }
+ return collect(rbuf.buf, rbuf.size);
+}
+
+static struct regex *regex_parse(struct str rx)
+{
+ struct parsing_state state;
+ struct regex *buf[3];
+ int idx = 0, inc = 0;
+
+ memset(state.ids, 1, 10);
+ state.candidate_id = -1;
+
+ if (rx.start != rx.end && *rx.start != '^')
+ buf[idx++] = __regex_parse(&state, &CSTR("\\8\\(.*?\\)"));
+ else
+ ++rx.start;
+
+ if (rx.start != rx.end && rx.end[-1] != '$') {
+ buf[idx + 1] = __regex_parse(&state, &CSTR("\\9\\(.*\\)"));
+ inc = 1;
+ } else {
+ --rx.end;
+ }
+
+ buf[idx] = __regex_parse(&state, &rx);
+ if (buf[idx]->submatch_id != -1)
+ error("regex_parse: whole regex receives id=0 implicitly");
+ buf[idx]->submatch_id = 0;
+ ++idx;
+
+ idx += inc;
+ return collect(buf, idx);
+}
+
+static char *print_range(unsigned long long *table)
+{
+ static char buf[68];
+ int idx = 0;
+ int i;
+ unsigned long long tcopy[2] = { table[0], table[1] };
+
+ buf[idx++] = '[';
+ int raised_bits = __builtin_popcountll(tcopy[0]) +
+ __builtin_popcountll(tcopy[1]);
+ if (raised_bits > 64) {
+ buf[idx++] = '^';
+ tcopy[0] = ~tcopy[0];
+ tcopy[1] = ~tcopy[1];
+ }
+ for (i = 0; i < 128; ++i) {
+ if (tcopy[i / 64] & (1ull << (i % 64)))
+ buf[idx++] = i;
+ }
+ buf[idx++] = ']';
+ return buf;
+}
+
+static void __regex_print(struct regex *r)
+{
+ int i;
+
+ if (r->submatch_id != -1)
+ printf("%d:", r->submatch_id);
+ switch(r->type) {
+ case rx_char:
+ if (r->c == ' ')
+ printf("' '");
+ else
+ printf("%c", r->c);
+ return;
+ case rx_any:
+ printf(".");
+ return;
+ case rx_range:
+ printf("%s", print_range(r->table));
+ return;
+ case rx_concat:
+ printf("(| ");
+ for (i = 0; i < r->childnum; ++i) {
+ __regex_print(r->children[i]);
+ if (i + 1 < r->childnum)
+ printf(" ");
+ else
+ printf(")");
+ }
+ return;
+ case rx_rep:
+ case rx_rep_nongreedy:
+ printf("(*");
+ if (r->type == rx_rep_nongreedy)
+ printf("? ");
+ else
+ printf(" ");
+ __regex_print(r->child);
+ printf(")");
+ return;
+ }
+}
+
+static void regex_print(struct regex *r)
+{
+ __regex_print(r);
+ printf("\n");
+}
+
+static void regex_prog_alloc(struct regex_prog *prog)
+{
+ prog->size = text_size;
+ prog->text = malloc(sizeof(struct ins) * prog->size);
+ prog->len = 0;
+}
+
+static struct ins *prog_get_ins(struct regex_prog *prog, int inc)
+{
+ struct ins *tmp;
+ if (prog->len == prog->size)
+ error("regex_prog: text section is too big");
+ tmp = prog->text + prog->len;
+ if (inc)
+ ++prog->len;
+ return tmp;
+}
+
+static void __regex_compile(struct regex_prog *prog, struct regex *r);
+
+static void compile_rep(struct regex_prog *prog, struct regex *r)
+{
+ struct ins *tmp, *tmp2;
+
+ tmp = prog_get_ins(prog, 1);
+ tmp->opcode = op_split;
+ prog_get_ins(prog, 0);
+ tmp->arg_labels[0] = tmp + 1;
+
+ __regex_compile(prog, r->child);
+ tmp2 = prog_get_ins(prog, 1);
+ tmp2->opcode = op_jmp;
+ tmp2->arg_labels[0] = tmp;
+
+ tmp2 = prog_get_ins(prog, 0);
+ tmp->arg_labels[1] = tmp2;
+
+ if (r->type == rx_rep_nongreedy) {
+ struct ins *tmp3 = tmp->arg_labels[1];
+ tmp->arg_labels[1] = tmp->arg_labels[0];
+ tmp->arg_labels[0] = tmp3;
+ }
+}
+
+static void __regex_compile(struct regex_prog *prog, struct regex *r)
+{
+ struct ins *tmp;
+ int i;
+
+ if (r->submatch_id != -1) {
+ tmp = prog_get_ins(prog, 1);
+ tmp->opcode = op_save;
+ tmp->n = r->submatch_id * 2;
+ }
+ switch(r->type) {
+ case rx_char:
+ tmp = prog_get_ins(prog, 1);
+ tmp->opcode = op_char;
+ tmp->c = r->c;
+ break;
+ case rx_any:
+ tmp = prog_get_ins(prog, 1);
+ tmp->opcode = op_any;
+ break;
+ case rx_range:
+ tmp = prog_get_ins(prog, 1);
+ tmp->opcode = op_range;
+ tmp->table[0] = r->table[0];
+ tmp->table[1] = r->table[1];
+ break;
+ case rx_concat:
+ for (i = 0; i < r->childnum; ++i)
+ __regex_compile(prog, r->children[i]);
+ break;
+ case rx_rep:
+ case rx_rep_nongreedy:
+ compile_rep(prog, r);
+ break;
+ }
+ if (r->submatch_id != -1) {
+ tmp = prog_get_ins(prog, 1);
+ tmp->opcode = op_save;
+ tmp->n = r->submatch_id * 2 + 1;
+ }
+}
+
+static void regex_compile(struct regex_prog *prog, struct regex *r)
+{
+ struct ins *tmp;
+ regex_prog_alloc(prog);
+ __regex_compile(prog, r);
+ tmp = prog_get_ins(prog, 1);
+ tmp->opcode = op_match;
+}
+
+static void assign_L(int *labels, int offset)
+{
+ static int next_label = 0;
+ if (labels[offset] == -1) {
+ labels[offset] = next_label;
+ ++next_label;
+ }
+}
+
+static void fill_label_table(int *labels, struct regex_prog *prog)
+{
+ struct ins *tmp;
+ for (tmp = prog->text; tmp < prog->text + prog->len; ++tmp) {
+ if (tmp->opcode == op_jmp) {
+ assign_L(labels, tmp->arg_labels[0] - prog->text);
+ }
+ if (tmp->opcode == op_split) {
+ assign_L(labels, tmp->arg_labels[0] - prog->text);
+ assign_L(labels, tmp->arg_labels[1] - prog->text);
+ }
+ }
+}
+
+static void print_ins(struct ins *ins, int *labels,
+ struct regex_prog *prog)
+{
+ switch(ins->opcode) {
+ case op_char:
+ printf("\tchar ");
+ if (ins->c == ' ')
+ printf("' '\n");
+ else
+ printf("%c\n", ins->c);
+ break;
+ case op_any:
+ printf("\tany\n");
+ break;
+ case op_range:
+ printf("\trange %s\n", print_range(ins->table));
+ break;
+ case op_match:
+ printf("\tmatch\n");
+ break;
+ case op_split:
+ printf("\tsplit L%d, L%d\n",
+ labels[ins->arg_labels[0] - prog->text],
+ labels[ins->arg_labels[1] - prog->text]);
+ break;
+ case op_jmp:
+ printf("\tjmp L%d\n",
+ labels[ins->arg_labels[0] - prog->text]);
+ break;
+ case op_save:
+ printf("\tsave %d\n", ins->n);
+ break;
+ }
+}
+
+static void regex_prog_print(struct regex_prog *prog)
+{
+ struct ins *tmp;
+ int *labels = malloc(sizeof(int) * prog->len);
+ memset(labels, -1, sizeof(int) * prog->len);
+ fill_label_table(labels, prog);
+
+ for (tmp = prog->text; tmp < prog->text + prog->len; ++tmp) {
+ int offset = tmp - prog->text;
+ if (labels[offset] != -1)
+ printf("L%d:", labels[offset]);
+ print_ins(tmp, labels, prog);
+ }
+}
+
+static struct threadlist *threadlist_init(struct regex_prog *prog)
+{
+ struct threadlist *tmp = malloc(sizeof(struct threadlist));
+
+ tmp->anchor = &tmp->anchor_body;
+ tmp->anchor->prev = tmp->anchor;
+ tmp->anchor->next = tmp->anchor;
+
+ tmp->presence_table = malloc(sizeof(struct node *) * prog->len);
+ tmp->ref = prog->text;
+ memset(tmp->presence_table, 0, sizeof(struct node *) * prog->len);
+ return tmp;
+}
+
+static struct node *alloc_node()
+{
+ struct node *tmp = malloc(sizeof(struct node));
+ tmp->next = tmp->prev = NULL;
+ return tmp;
+}
+
+static void node_extract(struct node *n)
+{
+ n->prev->next = n->next;
+ n->next->prev = n->prev;
+}
+
+static void node_addfront(struct threadlist *tl, struct node *n)
+{
+ n->next = tl->anchor->next;
+ n->prev = tl->anchor;
+ tl->anchor->next->prev = n;
+ tl->anchor->next = n;
+}
+
+static int is_threadlist_empty(struct threadlist *tl)
+{
+ return tl->anchor->next == tl->anchor;
+}
+
+int adds = 0;
+int adjustments = 0;
+
+static struct node **adjust_if_present(struct threadlist *tl,
+ struct thread *t, int back)
+{
+ struct node **cell = tl->presence_table + (t->pc - tl->ref);
+
+ if (*cell == NULL)
+ return cell;
+ if (back)
+ return NULL;
+ node_extract(*cell);
+ node_addfront(tl, *cell);
+ return NULL;
+}
+
+static void threadlist_addfront(struct threadlist *tl, struct thread t)
+{
+ struct node *tmp;
+ struct node **cell;
+ if ((cell = adjust_if_present(tl, &t, 0)) == NULL)
+ return;
+
+ ++adds;
+ tmp = alloc_node();
+ tmp->t = t;
+ node_addfront(tl, tmp);
+ *cell = tmp;
+}
+
+static void threadlist_addback(struct threadlist *tl, struct thread t)
+{
+ struct node *tmp;
+ struct node **cell;
+
+ if ((cell = adjust_if_present(tl, &t, 1)) == NULL)
+ return;
+
+ ++adds;
+ tmp = alloc_node();
+ tmp->t = t;
+
+ tmp->prev = tl->anchor->prev;
+ tmp->next = tl->anchor;
+ tl->anchor->prev->next = tmp;
+ tl->anchor->prev = tmp;
+ *cell = tmp;
+}
+
+static int threadlist_getfront(struct threadlist *tl, struct thread *t)
+{
+ struct node *tmp;
+
+ if (is_threadlist_empty(tl))
+ return 0;
+ tmp = tl->anchor->next;
+ *t = tmp->t;
+
+ node_extract(tmp);
+
+ tl->presence_table[t->pc - tl->ref] = NULL;
+ return 1;
+}
+
+static struct submatches *alloc_submatches(char **p)
+{
+ int i;
+ struct submatches *tmp = malloc(sizeof(*tmp));
+ tmp->refcount = 1;
+ if (p == NULL) {
+ for (i = 0; i < 20; ++i)
+ tmp->saves[i] = NULL;
+ } else {
+ for (i = 0; i < 20; ++i)
+ tmp->saves[i] = p[i];
+ }
+ return tmp;
+}
+
+static struct thread thread(struct ins *pc)
+{
+ return (struct thread) { pc, alloc_submatches(NULL) };
+}
+
+static struct thread thread_copy(struct thread *t)
+{
+ ++t->submatches->refcount;
+ return *t;
+}
+
+static void save_offset(struct thread *t, int n, char *offset)
+{
+ if (t->submatches->refcount > 1) {
+ --t->submatches->refcount;
+ t->submatches = alloc_submatches(t->submatches->saves);
+ }
+ t->submatches->saves[n] = offset;
+}
+
+
+static char **exec_threadlist(struct threadlist *clist,
+ struct threadlist *nlist, char c, char *offset)
+{
+ struct thread tmp;
+ while (threadlist_getfront(clist, &tmp)) {
+start:
+ switch(tmp.pc->opcode) {
+ case op_match:
+ if (c == 0)
+ return tmp.submatches->saves;
+ break;
+ case op_char:
+ if (tmp.pc->c == c) {
+ ++tmp.pc;
+ threadlist_addback(nlist, tmp);
+ }
+ break;
+ case op_any:
+ if (c != 0) {
+ ++tmp.pc;
+ threadlist_addback(nlist, tmp);
+ }
+ break;
+ case op_range:
+ if (tmp.pc->table[c / 64] & (1ull << (c % 64))) {
+ ++tmp.pc;
+ threadlist_addback(nlist, tmp);
+ }
+ break;
+ case op_jmp:
+ tmp.pc = tmp.pc->arg_labels[0];
+ goto start;
+ case op_split:
+ struct thread tmp2 = thread_copy(&tmp);
+ tmp2.pc = tmp.pc->arg_labels[1];
+ tmp.pc = tmp.pc->arg_labels[0];
+ threadlist_addfront(clist, tmp2);
+ threadlist_addfront(clist, tmp);
+ break;
+ case op_save:
+ save_offset(&tmp, tmp.pc->n, offset);
+ ++tmp.pc;
+ goto start;
+ }
+ }
+ return NULL;
+}
+
+static char **__regex_exec(struct regex_prog *prog, struct str *str)
+{
+ struct threadlist *clist = threadlist_init(prog);
+ struct threadlist *nlist = threadlist_init(prog);
+ struct threadlist *tmpl;
+ char *tmps = str->start;
+
+ threadlist_addback(clist, thread(prog->text));
+
+ for (; !is_threadlist_empty(clist); ++tmps) {
+ char c = tmps < str->end ? *tmps: 0;
+ char **ret = exec_threadlist(clist, nlist, c, tmps);
+ if (ret != NULL)
+ return ret;
+ if (tmps == str->end)
+ return NULL;
+ tmpl = nlist;
+ nlist = clist;
+ clist = tmpl;
+ }
+ return NULL;
+}
+
+static char **regex_exec(struct regex_prog *prog, struct str *str)
+{
+ char **ret;
+ save_heap_offset();
+ ret = __regex_exec(prog, str);
+ restore_heap_offset();
+ return ret;
+}
+
+static void regex_compile_wrapper(struct regex_prog *prog,
+ struct str *regex)
+{
+ struct regex *r = regex_parse(*regex);
+ /*regex_print(r);*/
+ regex_compile(prog, r);
+ /*regex_prog_print(prog);*/
+}
+
+static void print_saved(char **saves)
+{
+ int i;
+ if (saves == NULL) {
+ printf("No match\n");
+ return;
+ }
+ for (i = 0; i < 10; ++i) {
+ struct str tmp = { saves[i * 2], saves[i * 2 + 1] };
+ printf("%d: %S\n", i, &tmp);
+ }
+}
+
+static void read_part(struct str *part, struct str *s, char *err)
+{
+ char *tmp = s->start;
+ for (; tmp != s->end && *tmp != '/'; ++tmp)
+ ;
+ if (tmp == s->end)
+ error("%s", err);
+ part->start = s->start;
+ part->end = tmp;
+ s->start = tmp + 1;
+}
+
+static int is_space(char c)
+{
+ return c == ' ' || c == '\t' || c == '\n';
+}
+
+static void skip_spaces(struct str *s)
+{
+ for (; s->start != s->end && is_space(*s->start); ++s->start)
+ ;
+}
+
+static int parse_command(struct str *s)
+{
+ struct str cmdname;
+
+ struct compiled_command *ccmd = command_buf + command_num;
+
+ if (command_num == max_commands)
+ error("too many commands");
+
+ skip_spaces(s);
+ if (s->end - s->start == 0)
+ return 0;
+
+ if (s->start[0] == '/') {
+ ++s->start;
+ read_part(&ccmd->str_condition, s,
+ "command: unfinished condition");
+ regex_compile_wrapper(&ccmd->condition,
+ &ccmd->str_condition);
+ } else {
+ ccmd->str_condition.start = NULL;
+ ccmd->str_condition.end = NULL;
+ }
+ read_part(&cmdname, s, "command: unfinished command type");
+ read_part(&ccmd->str_body, s, "command: unfinished body");
+ regex_compile_wrapper(&ccmd->body, &ccmd->str_body);
+
+ read_part(&ccmd->str_result, s, "command: unfinished result");
+ if (streq(&cmdname, &CSTR("s")) == 0)
+ error("%s: command not supported");
+
+ ++command_num;
+ return 1;
+}
+
+static void parse_file(char *fname)
+{
+ char *addr;
+ struct str contents;
+ struct stat st;
+ int fd = open(fname, O_RDONLY, 0);
+
+ if (fd == -1)
+ goto syserror;
+ if (fstat(fd, &st) == -1)
+ goto syserror;
+
+ addr = mmap(NULL, st.size, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (addr == (void*)-1)
+ goto syserror;
+ if (close(fd) == -1)
+ goto syserror;
+
+ contents = (struct str) { addr, addr + st.size };
+ while (parse_command(&contents))
+ ;
+ return;
+syserror:
+ syscall_error(fname);
+}
+
+static void parse_cmdargs(int argc, char **argv)
+{
+ ++argv;
+ if (*argv == NULL) {
+ printf("No help for you\n");
+ hexit(1);
+ } else if (strzeq(*argv, "-f")) {
+ if (argv[1] == NULL)
+ error("-f: missing file");
+ parse_file(argv[1]);
+ argv += 2;
+ if (*argv != NULL)
+ goto unexpected_arg;
+ } else {
+ parse_command(&STR(*argv));
+ ++argv;
+ if (*argv != NULL)
+ goto unexpected_arg;
+
+ }
+ return;
+unexpected_arg:
+ error("%s: unexpected argument", *argv);
+}
+
+static void construct_result(struct str *saves, struct str *result)
+{
+ char *tmp;
+
+ printf("%S", saves + 8);
+ for (tmp = result->start; tmp != result->end; ++tmp) {
+ if (*tmp != '\\') {
+ printf("%c", *tmp);
+ } else {
+ if (tmp + 1 == result->end)
+ error("stray '\\' in input");
+ ++tmp;
+ switch(*tmp) {
+ case '0' ... '7':
+ printf("%S", saves + (*tmp - '0'));
+ break;
+ default:
+ printf("%c", *tmp);
+ }
+ }
+ }
+ printf("%S\n", saves + 9);
+}
+
+static void process_line(struct str *line)
+{
+ int i;
+ for (i = 0; i < command_num; ++i) {
+ struct compiled_command *cmd = command_buf + i;
+ char **res;
+ if (cmd->str_condition.start != cmd->str_condition.end) {
+ res = regex_exec(&cmd->condition, line);
+ if (res == NULL)
+ continue;
+ }
+ res = regex_exec(&cmd->body, line);
+ if (res == NULL)
+ continue;
+ /* printf("Found: %S|%S\n", &cmd->str_condition,
+ &cmd->str_body); */
+ construct_result((struct str*)res, &cmd->str_result);
+ return;
+ }
+ printf("%S\n", line);
+}
+
+static void process()
+{
+ struct str line;
+
+ while (readline(&line))
+ process_line(&line);
+}
+
+int main(int argc, char **argv, char **envp)
+{
+ parse_cmdargs(argc, argv);
+ inbuf_init();
+ process();
+
+ return 0;
+}