summaryrefslogtreecommitdiff
path: root/test/benchmarks/search-kmp.c
blob: 79ba9c807ae747981ca43111b32ed166d18cc957 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
struct result_buf {
	unsigned long array[24*1024*1024];
	int len;
};

struct result_buf rbuf;

int read(int fd, void *buf, unsigned long len);
void *malloc(unsigned long);
long strlen(char *);
int open(char *, int flags, long mode);
int printf(char *, ...);
void exit(int);

static void compute_pi(char *p, int len, int *pibuf)
{
	int k = 0;
	int q;
	pibuf[0] = 0;
	for (q = 1; q < len; ++q) {
		while (k > 0 && p[k] != p[q])
			k = pibuf[k];
		if (p[k] == p[q])
			++k;
		pibuf[q] = k;
	}
}

void kmp(char *p, int plen, char *t, int tlen)
{
	int i, q = 0;
	int *pibuf = malloc(sizeof(int) * plen);

	compute_pi(p, plen, pibuf);

	for (i = 0; i < tlen; ++i) {
		while (q > 0 && p[q] != t[i])
			q = pibuf[q-1];
		if (p[q] == t[i])
			++q;
		if (q == plen) {
			rbuf.array[rbuf.len] = i + 1 - plen;
			++rbuf.len;
			q = pibuf[q-1];
		}
	}

}

void *read_file(int fd, int *len)
{
	int size = 1500 * 1024 * 1024;
	unsigned char *arr = malloc(size);
	int i;
	for (i = 0; ;) {
		int rd = read(fd, arr + i, size - i);
		if (rd < 0)
			exit(2);
		if (rd == 0)
			break;
		i += rd;
	}

	*len = i;
	return arr;
}

int main(int argc, char **argv)
{
	int i, slen;
	long sum = 0;
	char *needle, *buf;
	int fd = open(argv[1], 0, 0);

	if (fd == -1)
		return 1;

	needle = argv[2];
	buf = read_file(fd, &slen);
	printf("len: %d\n", slen);

	kmp(needle, strlen(needle), buf, slen);

	for (i = 0; i < rbuf.len; ++i)
		sum += rbuf.array[i];
	printf("%ld\n", sum);

	return 0;
}