mirror of https://gitee.com/openkylin/linux.git
amd64: switch csum_partial_copy_generic() to new calling conventions
... and fold handling of misaligned case into it. Implementation note: we stash the "will we need to rol8 the sum in the end" flag into the MSB of %rcx (the lower 32 bits are used for length); the rest is pretty straightforward. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
This commit is contained in:
parent
fdf8bee96f
commit
daf52375c1
|
@ -130,10 +130,7 @@ static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
|
||||||
extern __wsum csum_partial(const void *buff, int len, __wsum sum);
|
extern __wsum csum_partial(const void *buff, int len, __wsum sum);
|
||||||
|
|
||||||
/* Do not call this directly. Use the wrappers below */
|
/* Do not call this directly. Use the wrappers below */
|
||||||
extern __visible __wsum csum_partial_copy_generic(const void *src, const void *dst,
|
extern __visible __wsum csum_partial_copy_generic(const void *src, void *dst, int len);
|
||||||
int len, __wsum sum,
|
|
||||||
int *src_err_ptr, int *dst_err_ptr);
|
|
||||||
|
|
||||||
|
|
||||||
extern __wsum csum_and_copy_from_user(const void __user *src, void *dst, int len);
|
extern __wsum csum_and_copy_from_user(const void __user *src, void *dst, int len);
|
||||||
extern __wsum csum_and_copy_to_user(const void *src, void __user *dst, int len);
|
extern __wsum csum_and_copy_to_user(const void *src, void __user *dst, int len);
|
||||||
|
|
|
@ -18,9 +18,6 @@
|
||||||
* rdi source
|
* rdi source
|
||||||
* rsi destination
|
* rsi destination
|
||||||
* edx len (32bit)
|
* edx len (32bit)
|
||||||
* ecx sum (32bit)
|
|
||||||
* r8 src_err_ptr (int)
|
|
||||||
* r9 dst_err_ptr (int)
|
|
||||||
*
|
*
|
||||||
* Output
|
* Output
|
||||||
* eax 64bit sum. undefined in case of exception.
|
* eax 64bit sum. undefined in case of exception.
|
||||||
|
@ -31,44 +28,32 @@
|
||||||
|
|
||||||
.macro source
|
.macro source
|
||||||
10:
|
10:
|
||||||
_ASM_EXTABLE_UA(10b, .Lbad_source)
|
_ASM_EXTABLE_UA(10b, .Lfault)
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
.macro dest
|
.macro dest
|
||||||
20:
|
20:
|
||||||
_ASM_EXTABLE_UA(20b, .Lbad_dest)
|
_ASM_EXTABLE_UA(20b, .Lfault)
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
/*
|
|
||||||
* No _ASM_EXTABLE_UA; this is used for intentional prefetch on a
|
|
||||||
* potentially unmapped kernel address.
|
|
||||||
*/
|
|
||||||
.macro ignore L=.Lignore
|
|
||||||
30:
|
|
||||||
_ASM_EXTABLE(30b, \L)
|
|
||||||
.endm
|
|
||||||
|
|
||||||
|
|
||||||
SYM_FUNC_START(csum_partial_copy_generic)
|
SYM_FUNC_START(csum_partial_copy_generic)
|
||||||
cmpl $3*64, %edx
|
subq $5*8, %rsp
|
||||||
jle .Lignore
|
movq %rbx, 0*8(%rsp)
|
||||||
|
movq %r12, 1*8(%rsp)
|
||||||
.Lignore:
|
movq %r14, 2*8(%rsp)
|
||||||
subq $7*8, %rsp
|
movq %r13, 3*8(%rsp)
|
||||||
movq %rbx, 2*8(%rsp)
|
movq %r15, 4*8(%rsp)
|
||||||
movq %r12, 3*8(%rsp)
|
|
||||||
movq %r14, 4*8(%rsp)
|
|
||||||
movq %r13, 5*8(%rsp)
|
|
||||||
movq %r15, 6*8(%rsp)
|
|
||||||
|
|
||||||
movq %r8, (%rsp)
|
|
||||||
movq %r9, 1*8(%rsp)
|
|
||||||
|
|
||||||
movl %ecx, %eax
|
|
||||||
movl %edx, %ecx
|
|
||||||
|
|
||||||
|
movl $-1, %eax
|
||||||
xorl %r9d, %r9d
|
xorl %r9d, %r9d
|
||||||
movq %rcx, %r12
|
movl %edx, %ecx
|
||||||
|
cmpl $8, %ecx
|
||||||
|
jb .Lshort
|
||||||
|
|
||||||
|
testb $7, %sil
|
||||||
|
jne .Lunaligned
|
||||||
|
.Laligned:
|
||||||
|
movl %ecx, %r12d
|
||||||
|
|
||||||
shrq $6, %r12
|
shrq $6, %r12
|
||||||
jz .Lhandle_tail /* < 64 */
|
jz .Lhandle_tail /* < 64 */
|
||||||
|
@ -99,7 +84,12 @@ SYM_FUNC_START(csum_partial_copy_generic)
|
||||||
source
|
source
|
||||||
movq 56(%rdi), %r13
|
movq 56(%rdi), %r13
|
||||||
|
|
||||||
ignore 2f
|
30:
|
||||||
|
/*
|
||||||
|
* No _ASM_EXTABLE_UA; this is used for intentional prefetch on a
|
||||||
|
* potentially unmapped kernel address.
|
||||||
|
*/
|
||||||
|
_ASM_EXTABLE(30b, 2f)
|
||||||
prefetcht0 5*64(%rdi)
|
prefetcht0 5*64(%rdi)
|
||||||
2:
|
2:
|
||||||
adcq %rbx, %rax
|
adcq %rbx, %rax
|
||||||
|
@ -131,8 +121,6 @@ SYM_FUNC_START(csum_partial_copy_generic)
|
||||||
dest
|
dest
|
||||||
movq %r13, 56(%rsi)
|
movq %r13, 56(%rsi)
|
||||||
|
|
||||||
3:
|
|
||||||
|
|
||||||
leaq 64(%rdi), %rdi
|
leaq 64(%rdi), %rdi
|
||||||
leaq 64(%rsi), %rsi
|
leaq 64(%rsi), %rsi
|
||||||
|
|
||||||
|
@ -142,8 +130,8 @@ SYM_FUNC_START(csum_partial_copy_generic)
|
||||||
|
|
||||||
/* do last up to 56 bytes */
|
/* do last up to 56 bytes */
|
||||||
.Lhandle_tail:
|
.Lhandle_tail:
|
||||||
/* ecx: count */
|
/* ecx: count, rcx.63: the end result needs to be rol8 */
|
||||||
movl %ecx, %r10d
|
movq %rcx, %r10
|
||||||
andl $63, %ecx
|
andl $63, %ecx
|
||||||
shrl $3, %ecx
|
shrl $3, %ecx
|
||||||
jz .Lfold
|
jz .Lfold
|
||||||
|
@ -172,6 +160,7 @@ SYM_FUNC_START(csum_partial_copy_generic)
|
||||||
.Lhandle_7:
|
.Lhandle_7:
|
||||||
movl %r10d, %ecx
|
movl %r10d, %ecx
|
||||||
andl $7, %ecx
|
andl $7, %ecx
|
||||||
|
.L1: /* .Lshort rejoins the common path here */
|
||||||
shrl $1, %ecx
|
shrl $1, %ecx
|
||||||
jz .Lhandle_1
|
jz .Lhandle_1
|
||||||
movl $2, %edx
|
movl $2, %edx
|
||||||
|
@ -203,26 +192,65 @@ SYM_FUNC_START(csum_partial_copy_generic)
|
||||||
adcl %r9d, %eax /* carry */
|
adcl %r9d, %eax /* carry */
|
||||||
|
|
||||||
.Lende:
|
.Lende:
|
||||||
movq 2*8(%rsp), %rbx
|
testq %r10, %r10
|
||||||
movq 3*8(%rsp), %r12
|
js .Lwas_odd
|
||||||
movq 4*8(%rsp), %r14
|
.Lout:
|
||||||
movq 5*8(%rsp), %r13
|
movq 0*8(%rsp), %rbx
|
||||||
movq 6*8(%rsp), %r15
|
movq 1*8(%rsp), %r12
|
||||||
addq $7*8, %rsp
|
movq 2*8(%rsp), %r14
|
||||||
|
movq 3*8(%rsp), %r13
|
||||||
|
movq 4*8(%rsp), %r15
|
||||||
|
addq $5*8, %rsp
|
||||||
ret
|
ret
|
||||||
|
.Lshort:
|
||||||
|
movl %ecx, %r10d
|
||||||
|
jmp .L1
|
||||||
|
.Lunaligned:
|
||||||
|
xorl %ebx, %ebx
|
||||||
|
testb $1, %sil
|
||||||
|
jne .Lodd
|
||||||
|
1: testb $2, %sil
|
||||||
|
je 2f
|
||||||
|
source
|
||||||
|
movw (%rdi), %bx
|
||||||
|
dest
|
||||||
|
movw %bx, (%rsi)
|
||||||
|
leaq 2(%rdi), %rdi
|
||||||
|
subq $2, %rcx
|
||||||
|
leaq 2(%rsi), %rsi
|
||||||
|
addq %rbx, %rax
|
||||||
|
2: testb $4, %sil
|
||||||
|
je .Laligned
|
||||||
|
source
|
||||||
|
movl (%rdi), %ebx
|
||||||
|
dest
|
||||||
|
movl %ebx, (%rsi)
|
||||||
|
leaq 4(%rdi), %rdi
|
||||||
|
subq $4, %rcx
|
||||||
|
leaq 4(%rsi), %rsi
|
||||||
|
addq %rbx, %rax
|
||||||
|
jmp .Laligned
|
||||||
|
|
||||||
/* Exception handlers. Very simple, zeroing is done in the wrappers */
|
.Lodd:
|
||||||
.Lbad_source:
|
source
|
||||||
movq (%rsp), %rax
|
movb (%rdi), %bl
|
||||||
testq %rax, %rax
|
dest
|
||||||
jz .Lende
|
movb %bl, (%rsi)
|
||||||
movl $-EFAULT, (%rax)
|
leaq 1(%rdi), %rdi
|
||||||
jmp .Lende
|
leaq 1(%rsi), %rsi
|
||||||
|
/* decrement, set MSB */
|
||||||
|
leaq -1(%rcx, %rcx), %rcx
|
||||||
|
rorq $1, %rcx
|
||||||
|
shll $8, %ebx
|
||||||
|
addq %rbx, %rax
|
||||||
|
jmp 1b
|
||||||
|
|
||||||
.Lbad_dest:
|
.Lwas_odd:
|
||||||
movq 8(%rsp), %rax
|
roll $8, %eax
|
||||||
testq %rax, %rax
|
jmp .Lout
|
||||||
jz .Lende
|
|
||||||
movl $-EFAULT, (%rax)
|
/* Exception: just return 0 */
|
||||||
jmp .Lende
|
.Lfault:
|
||||||
|
xorl %eax, %eax
|
||||||
|
jmp .Lout
|
||||||
SYM_FUNC_END(csum_partial_copy_generic)
|
SYM_FUNC_END(csum_partial_copy_generic)
|
||||||
|
|
|
@ -21,49 +21,16 @@
|
||||||
* src and dst are best aligned to 64bits.
|
* src and dst are best aligned to 64bits.
|
||||||
*/
|
*/
|
||||||
__wsum
|
__wsum
|
||||||
csum_and_copy_from_user(const void __user *src, void *dst,
|
csum_and_copy_from_user(const void __user *src, void *dst, int len)
|
||||||
int len)
|
|
||||||
{
|
{
|
||||||
int err = 0;
|
__wsum sum;
|
||||||
__wsum isum = ~0U;
|
|
||||||
|
|
||||||
might_sleep();
|
might_sleep();
|
||||||
|
|
||||||
if (!user_access_begin(src, len))
|
if (!user_access_begin(src, len))
|
||||||
return 0;
|
return 0;
|
||||||
|
sum = csum_partial_copy_generic((__force const void *)src, dst, len);
|
||||||
/*
|
|
||||||
* Why 6, not 7? To handle odd addresses aligned we
|
|
||||||
* would need to do considerable complications to fix the
|
|
||||||
* checksum which is defined as an 16bit accumulator. The
|
|
||||||
* fix alignment code is primarily for performance
|
|
||||||
* compatibility with 32bit and that will handle odd
|
|
||||||
* addresses slowly too.
|
|
||||||
*/
|
|
||||||
if (unlikely((unsigned long)src & 6)) {
|
|
||||||
while (((unsigned long)src & 6) && len >= 2) {
|
|
||||||
__u16 val16;
|
|
||||||
|
|
||||||
unsafe_get_user(val16, (const __u16 __user *)src, out);
|
|
||||||
|
|
||||||
*(__u16 *)dst = val16;
|
|
||||||
isum = (__force __wsum)add32_with_carry(
|
|
||||||
(__force unsigned)isum, val16);
|
|
||||||
src += 2;
|
|
||||||
dst += 2;
|
|
||||||
len -= 2;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
isum = csum_partial_copy_generic((__force const void *)src,
|
|
||||||
dst, len, isum, &err, NULL);
|
|
||||||
user_access_end();
|
user_access_end();
|
||||||
if (unlikely(err))
|
return sum;
|
||||||
isum = 0;
|
|
||||||
return isum;
|
|
||||||
|
|
||||||
out:
|
|
||||||
user_access_end();
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(csum_and_copy_from_user);
|
EXPORT_SYMBOL(csum_and_copy_from_user);
|
||||||
|
|
||||||
|
@ -79,37 +46,16 @@ EXPORT_SYMBOL(csum_and_copy_from_user);
|
||||||
* src and dst are best aligned to 64bits.
|
* src and dst are best aligned to 64bits.
|
||||||
*/
|
*/
|
||||||
__wsum
|
__wsum
|
||||||
csum_and_copy_to_user(const void *src, void __user *dst,
|
csum_and_copy_to_user(const void *src, void __user *dst, int len)
|
||||||
int len)
|
|
||||||
{
|
{
|
||||||
__wsum ret, isum = ~0U;
|
__wsum sum;
|
||||||
int err = 0;
|
|
||||||
|
|
||||||
might_sleep();
|
might_sleep();
|
||||||
|
|
||||||
if (!user_access_begin(dst, len))
|
if (!user_access_begin(dst, len))
|
||||||
return 0;
|
return 0;
|
||||||
|
sum = csum_partial_copy_generic(src, (void __force *)dst, len);
|
||||||
if (unlikely((unsigned long)dst & 6)) {
|
|
||||||
while (((unsigned long)dst & 6) && len >= 2) {
|
|
||||||
__u16 val16 = *(__u16 *)src;
|
|
||||||
|
|
||||||
isum = (__force __wsum)add32_with_carry(
|
|
||||||
(__force unsigned)isum, val16);
|
|
||||||
unsafe_put_user(val16, (__u16 __user *)dst, out);
|
|
||||||
src += 2;
|
|
||||||
dst += 2;
|
|
||||||
len -= 2;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = csum_partial_copy_generic(src, (void __force *)dst,
|
|
||||||
len, isum, NULL, &err);
|
|
||||||
user_access_end();
|
user_access_end();
|
||||||
return err ? 0 : ret;
|
return sum;
|
||||||
out:
|
|
||||||
user_access_end();
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(csum_and_copy_to_user);
|
EXPORT_SYMBOL(csum_and_copy_to_user);
|
||||||
|
|
||||||
|
@ -125,7 +71,7 @@ EXPORT_SYMBOL(csum_and_copy_to_user);
|
||||||
__wsum
|
__wsum
|
||||||
csum_partial_copy_nocheck(const void *src, void *dst, int len)
|
csum_partial_copy_nocheck(const void *src, void *dst, int len)
|
||||||
{
|
{
|
||||||
return csum_partial_copy_generic(src, dst, len, 0, NULL, NULL);
|
return csum_partial_copy_generic(src, dst, len);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(csum_partial_copy_nocheck);
|
EXPORT_SYMBOL(csum_partial_copy_nocheck);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue