diff options
Diffstat (limited to 'src/string/x86_64')
-rw-r--r-- | src/string/x86_64/memcpy.s | 25 | ||||
-rw-r--r-- | src/string/x86_64/memmove.s | 16 | ||||
-rw-r--r-- | src/string/x86_64/memset.s | 72 |
3 files changed, 113 insertions, 0 deletions
diff --git a/src/string/x86_64/memcpy.s b/src/string/x86_64/memcpy.s new file mode 100644 index 0000000..3d960ef --- /dev/null +++ b/src/string/x86_64/memcpy.s @@ -0,0 +1,25 @@ +.global memcpy +.global __memcpy_fwd +.hidden __memcpy_fwd +.type memcpy,@function +memcpy: +__memcpy_fwd: + mov %rdi,%rax + cmp $8,%rdx + jc 1f + test $7,%edi + jz 1f +2: movsb + dec %rdx + test $7,%edi + jnz 2b +1: mov %rdx,%rcx + shr $3,%rcx + rep + movsq + and $7,%edx + jz 1f +2: movsb + dec %edx + jnz 2b +1: ret diff --git a/src/string/x86_64/memmove.s b/src/string/x86_64/memmove.s new file mode 100644 index 0000000..172c025 --- /dev/null +++ b/src/string/x86_64/memmove.s @@ -0,0 +1,16 @@ +.global memmove +.type memmove,@function +memmove: + mov %rdi,%rax + sub %rsi,%rax + cmp %rdx,%rax +.hidden __memcpy_fwd + jae __memcpy_fwd + mov %rdx,%rcx + lea -1(%rdi,%rdx),%rdi + lea -1(%rsi,%rdx),%rsi + std + rep movsb + cld + lea 1(%rdi),%rax + ret diff --git a/src/string/x86_64/memset.s b/src/string/x86_64/memset.s new file mode 100644 index 0000000..2d3f5e5 --- /dev/null +++ b/src/string/x86_64/memset.s @@ -0,0 +1,72 @@ +.global memset +.type memset,@function +memset: + movzbq %sil,%rax + mov $0x101010101010101,%r8 + imul %r8,%rax + + cmp $126,%rdx + ja 2f + + test %edx,%edx + jz 1f + + mov %sil,(%rdi) + mov %sil,-1(%rdi,%rdx) + cmp $2,%edx + jbe 1f + + mov %ax,1(%rdi) + mov %ax,(-1-2)(%rdi,%rdx) + cmp $6,%edx + jbe 1f + + mov %eax,(1+2)(%rdi) + mov %eax,(-1-2-4)(%rdi,%rdx) + cmp $14,%edx + jbe 1f + + mov %rax,(1+2+4)(%rdi) + mov %rax,(-1-2-4-8)(%rdi,%rdx) + cmp $30,%edx + jbe 1f + + mov %rax,(1+2+4+8)(%rdi) + mov %rax,(1+2+4+8+8)(%rdi) + mov %rax,(-1-2-4-8-16)(%rdi,%rdx) + mov %rax,(-1-2-4-8-8)(%rdi,%rdx) + cmp $62,%edx + jbe 1f + + mov %rax,(1+2+4+8+16)(%rdi) + mov %rax,(1+2+4+8+16+8)(%rdi) + mov %rax,(1+2+4+8+16+16)(%rdi) + mov %rax,(1+2+4+8+16+24)(%rdi) + mov %rax,(-1-2-4-8-16-32)(%rdi,%rdx) + mov %rax,(-1-2-4-8-16-24)(%rdi,%rdx) + mov %rax,(-1-2-4-8-16-16)(%rdi,%rdx) + mov %rax,(-1-2-4-8-16-8)(%rdi,%rdx) + +1: mov %rdi,%rax + ret + +2: test $15,%edi + mov %rdi,%r8 + mov %rax,-8(%rdi,%rdx) + mov %rdx,%rcx + jnz 2f + +1: shr $3,%rcx + rep + stosq + mov %r8,%rax + ret + +2: xor %edx,%edx + sub %edi,%edx + and $15,%edx + mov %rax,(%rdi) + mov %rax,8(%rdi) + sub %rdx,%rcx + add %rdx,%rdi + jmp 1b |