2019-05-27 14:55:01 +08:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
2005-10-21 00:44:46 +08:00
|
|
|
#ifndef _ASM_POWERPC_CHECKSUM_H
|
|
|
|
#define _ASM_POWERPC_CHECKSUM_H
|
2005-12-17 05:43:46 +08:00
|
|
|
#ifdef __KERNEL__
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
*/
|
|
|
|
|
powerpc/64: optimises from64to32()
The current implementation of from64to32() gives a poor result:
0000000000000270 <.from64to32>:
270: 38 00 ff ff li r0,-1
274: 78 69 00 22 rldicl r9,r3,32,32
278: 78 00 00 20 clrldi r0,r0,32
27c: 7c 60 00 38 and r0,r3,r0
280: 7c 09 02 14 add r0,r9,r0
284: 78 09 00 22 rldicl r9,r0,32,32
288: 7c 00 4a 14 add r0,r0,r9
28c: 78 03 00 20 clrldi r3,r0,32
290: 4e 80 00 20 blr
This patch modifies from64to32() to operate in the same
spirit as csum_fold()
It swaps the two 32-bit halves of sum then it adds it with the
unswapped sum. If there is a carry from adding the two 32-bit halves,
it will carry from the lower half into the upper half, giving us the
correct sum in the upper half.
The resulting code is:
0000000000000260 <.from64to32>:
260: 78 60 00 02 rotldi r0,r3,32
264: 7c 60 1a 14 add r3,r0,r3
268: 78 63 00 22 rldicl r3,r3,32,32
26c: 4e 80 00 20 blr
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-04-10 14:34:35 +08:00
|
|
|
#include <linux/bitops.h>
|
powerpc: Implement csum_ipv6_magic in assembly
The generic csum_ipv6_magic() generates a pretty bad result
00000000 <csum_ipv6_magic>: (PPC32)
0: 81 23 00 00 lwz r9,0(r3)
4: 81 03 00 04 lwz r8,4(r3)
8: 7c e7 4a 14 add r7,r7,r9
c: 7d 29 38 10 subfc r9,r9,r7
10: 7d 4a 51 10 subfe r10,r10,r10
14: 7d 27 42 14 add r9,r7,r8
18: 7d 2a 48 50 subf r9,r10,r9
1c: 80 e3 00 08 lwz r7,8(r3)
20: 7d 08 48 10 subfc r8,r8,r9
24: 7d 4a 51 10 subfe r10,r10,r10
28: 7d 29 3a 14 add r9,r9,r7
2c: 81 03 00 0c lwz r8,12(r3)
30: 7d 2a 48 50 subf r9,r10,r9
34: 7c e7 48 10 subfc r7,r7,r9
38: 7d 4a 51 10 subfe r10,r10,r10
3c: 7d 29 42 14 add r9,r9,r8
40: 7d 2a 48 50 subf r9,r10,r9
44: 80 e4 00 00 lwz r7,0(r4)
48: 7d 08 48 10 subfc r8,r8,r9
4c: 7d 4a 51 10 subfe r10,r10,r10
50: 7d 29 3a 14 add r9,r9,r7
54: 7d 2a 48 50 subf r9,r10,r9
58: 81 04 00 04 lwz r8,4(r4)
5c: 7c e7 48 10 subfc r7,r7,r9
60: 7d 4a 51 10 subfe r10,r10,r10
64: 7d 29 42 14 add r9,r9,r8
68: 7d 2a 48 50 subf r9,r10,r9
6c: 80 e4 00 08 lwz r7,8(r4)
70: 7d 08 48 10 subfc r8,r8,r9
74: 7d 4a 51 10 subfe r10,r10,r10
78: 7d 29 3a 14 add r9,r9,r7
7c: 7d 2a 48 50 subf r9,r10,r9
80: 81 04 00 0c lwz r8,12(r4)
84: 7c e7 48 10 subfc r7,r7,r9
88: 7d 4a 51 10 subfe r10,r10,r10
8c: 7d 29 42 14 add r9,r9,r8
90: 7d 2a 48 50 subf r9,r10,r9
94: 7d 08 48 10 subfc r8,r8,r9
98: 7d 4a 51 10 subfe r10,r10,r10
9c: 7d 29 2a 14 add r9,r9,r5
a0: 7d 2a 48 50 subf r9,r10,r9
a4: 7c a5 48 10 subfc r5,r5,r9
a8: 7c 63 19 10 subfe r3,r3,r3
ac: 7d 29 32 14 add r9,r9,r6
b0: 7d 23 48 50 subf r9,r3,r9
b4: 7c c6 48 10 subfc r6,r6,r9
b8: 7c 63 19 10 subfe r3,r3,r3
bc: 7c 63 48 50 subf r3,r3,r9
c0: 54 6a 80 3e rotlwi r10,r3,16
c4: 7c 63 52 14 add r3,r3,r10
c8: 7c 63 18 f8 not r3,r3
cc: 54 63 84 3e rlwinm r3,r3,16,16,31
d0: 4e 80 00 20 blr
0000000000000000 <.csum_ipv6_magic>: (PPC64)
0: 81 23 00 00 lwz r9,0(r3)
4: 80 03 00 04 lwz r0,4(r3)
8: 81 63 00 08 lwz r11,8(r3)
c: 7c e7 4a 14 add r7,r7,r9
10: 7f 89 38 40 cmplw cr7,r9,r7
14: 7d 47 02 14 add r10,r7,r0
18: 7d 30 10 26 mfocrf r9,1
1c: 55 29 f7 fe rlwinm r9,r9,30,31,31
20: 7d 4a 4a 14 add r10,r10,r9
24: 7f 80 50 40 cmplw cr7,r0,r10
28: 7d 2a 5a 14 add r9,r10,r11
2c: 80 03 00 0c lwz r0,12(r3)
30: 81 44 00 00 lwz r10,0(r4)
34: 7d 10 10 26 mfocrf r8,1
38: 55 08 f7 fe rlwinm r8,r8,30,31,31
3c: 7d 29 42 14 add r9,r9,r8
40: 81 04 00 04 lwz r8,4(r4)
44: 7f 8b 48 40 cmplw cr7,r11,r9
48: 7d 29 02 14 add r9,r9,r0
4c: 7d 70 10 26 mfocrf r11,1
50: 55 6b f7 fe rlwinm r11,r11,30,31,31
54: 7d 29 5a 14 add r9,r9,r11
58: 7f 80 48 40 cmplw cr7,r0,r9
5c: 7d 29 52 14 add r9,r9,r10
60: 7c 10 10 26 mfocrf r0,1
64: 54 00 f7 fe rlwinm r0,r0,30,31,31
68: 7d 69 02 14 add r11,r9,r0
6c: 7f 8a 58 40 cmplw cr7,r10,r11
70: 7c 0b 42 14 add r0,r11,r8
74: 81 44 00 08 lwz r10,8(r4)
78: 7c f0 10 26 mfocrf r7,1
7c: 54 e7 f7 fe rlwinm r7,r7,30,31,31
80: 7c 00 3a 14 add r0,r0,r7
84: 7f 88 00 40 cmplw cr7,r8,r0
88: 7d 20 52 14 add r9,r0,r10
8c: 80 04 00 0c lwz r0,12(r4)
90: 7d 70 10 26 mfocrf r11,1
94: 55 6b f7 fe rlwinm r11,r11,30,31,31
98: 7d 29 5a 14 add r9,r9,r11
9c: 7f 8a 48 40 cmplw cr7,r10,r9
a0: 7d 29 02 14 add r9,r9,r0
a4: 7d 70 10 26 mfocrf r11,1
a8: 55 6b f7 fe rlwinm r11,r11,30,31,31
ac: 7d 29 5a 14 add r9,r9,r11
b0: 7f 80 48 40 cmplw cr7,r0,r9
b4: 7d 29 2a 14 add r9,r9,r5
b8: 7c 10 10 26 mfocrf r0,1
bc: 54 00 f7 fe rlwinm r0,r0,30,31,31
c0: 7d 29 02 14 add r9,r9,r0
c4: 7f 85 48 40 cmplw cr7,r5,r9
c8: 7c 09 32 14 add r0,r9,r6
cc: 7d 50 10 26 mfocrf r10,1
d0: 55 4a f7 fe rlwinm r10,r10,30,31,31
d4: 7c 00 52 14 add r0,r0,r10
d8: 7f 80 30 40 cmplw cr7,r0,r6
dc: 7d 30 10 26 mfocrf r9,1
e0: 55 29 ef fe rlwinm r9,r9,29,31,31
e4: 7c 09 02 14 add r0,r9,r0
e8: 54 03 80 3e rotlwi r3,r0,16
ec: 7c 03 02 14 add r0,r3,r0
f0: 7c 03 00 f8 not r3,r0
f4: 78 63 84 22 rldicl r3,r3,48,48
f8: 4e 80 00 20 blr
This patch implements it in assembly for both PPC32 and PPC64
Link: https://github.com/linuxppc/linux/issues/9
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Reviewed-by: Segher Boessenkool <segher@kernel.crashing.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-05-24 19:33:18 +08:00
|
|
|
#include <linux/in6.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
2005-10-21 00:44:46 +08:00
|
|
|
* Computes the checksum of a memory block at src, length len,
|
|
|
|
* and adds in "sum" (32-bit), while copying the block to dst.
|
|
|
|
* If an access exception occurs on src or dst, it stores -EFAULT
|
|
|
|
* to *src_err or *dst_err respectively (if that pointer is not
|
|
|
|
* NULL), and, for an error on src, zeroes the rest of dst.
|
|
|
|
*
|
|
|
|
* Like csum_partial, this must be called with even lengths,
|
|
|
|
* except for the last fragment.
|
2005-04-17 06:20:36 +08:00
|
|
|
*/
|
2006-11-15 13:21:58 +08:00
|
|
|
extern __wsum csum_partial_copy_generic(const void *src, void *dst,
|
|
|
|
int len, __wsum sum,
|
2005-04-17 06:20:36 +08:00
|
|
|
int *src_err, int *dst_err);
|
2010-08-03 04:09:52 +08:00
|
|
|
|
|
|
|
#define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
|
|
|
|
extern __wsum csum_and_copy_from_user(const void __user *src, void *dst,
|
|
|
|
int len, __wsum sum, int *err_ptr);
|
2010-08-03 04:11:36 +08:00
|
|
|
#define HAVE_CSUM_COPY_USER
|
|
|
|
extern __wsum csum_and_copy_to_user(const void *src, void __user *dst,
|
|
|
|
int len, __wsum sum, int *err_ptr);
|
2005-10-21 00:44:46 +08:00
|
|
|
|
|
|
|
#define csum_partial_copy_nocheck(src, dst, len, sum) \
|
|
|
|
csum_partial_copy_generic((src), (dst), (len), (sum), NULL, NULL)
|
|
|
|
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* turns a 32-bit partial checksum (e.g. from csum_partial) into a
|
|
|
|
* 1's complement 16-bit checksum.
|
|
|
|
*/
|
2006-11-15 13:21:58 +08:00
|
|
|
static inline __sum16 csum_fold(__wsum sum)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
|
|
|
unsigned int tmp;
|
|
|
|
|
|
|
|
/* swap the two 16-bit halves of sum */
|
|
|
|
__asm__("rlwinm %0,%1,16,0,31" : "=r" (tmp) : "r" (sum));
|
|
|
|
/* if there is a carry from adding the two 16-bit halves,
|
|
|
|
it will carry from the lower half into the upper half,
|
|
|
|
giving us the correct sum in the upper half. */
|
2006-11-15 13:21:58 +08:00
|
|
|
return (__force __sum16)(~((__force u32)sum + tmp) >> 16);
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
2016-11-03 13:10:55 +08:00
|
|
|
static inline u32 from64to32(u64 x)
|
|
|
|
{
|
powerpc/64: optimises from64to32()
The current implementation of from64to32() gives a poor result:
0000000000000270 <.from64to32>:
270: 38 00 ff ff li r0,-1
274: 78 69 00 22 rldicl r9,r3,32,32
278: 78 00 00 20 clrldi r0,r0,32
27c: 7c 60 00 38 and r0,r3,r0
280: 7c 09 02 14 add r0,r9,r0
284: 78 09 00 22 rldicl r9,r0,32,32
288: 7c 00 4a 14 add r0,r0,r9
28c: 78 03 00 20 clrldi r3,r0,32
290: 4e 80 00 20 blr
This patch modifies from64to32() to operate in the same
spirit as csum_fold()
It swaps the two 32-bit halves of sum then it adds it with the
unswapped sum. If there is a carry from adding the two 32-bit halves,
it will carry from the lower half into the upper half, giving us the
correct sum in the upper half.
The resulting code is:
0000000000000260 <.from64to32>:
260: 78 60 00 02 rotldi r0,r3,32
264: 7c 60 1a 14 add r3,r0,r3
268: 78 63 00 22 rldicl r3,r3,32,32
26c: 4e 80 00 20 blr
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-04-10 14:34:35 +08:00
|
|
|
return (x + ror64(x, 32)) >> 32;
|
2016-11-03 13:10:55 +08:00
|
|
|
}
|
|
|
|
|
2016-10-27 22:30:06 +08:00
|
|
|
static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len,
|
|
|
|
__u8 proto, __wsum sum)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2006-11-15 13:21:58 +08:00
|
|
|
#ifdef __powerpc64__
|
2016-11-03 13:10:55 +08:00
|
|
|
u64 s = (__force u32)sum;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2006-11-15 13:21:58 +08:00
|
|
|
s += (__force u32)saddr;
|
|
|
|
s += (__force u32)daddr;
|
2016-11-03 13:15:42 +08:00
|
|
|
#ifdef __BIG_ENDIAN__
|
2006-11-15 13:21:58 +08:00
|
|
|
s += proto + len;
|
2016-11-03 13:15:42 +08:00
|
|
|
#else
|
|
|
|
s += (proto + len) << 8;
|
|
|
|
#endif
|
2016-11-03 13:10:55 +08:00
|
|
|
return (__force __wsum) from64to32(s);
|
2005-10-21 00:44:46 +08:00
|
|
|
#else
|
|
|
|
__asm__("\n\
|
|
|
|
addc %0,%0,%1 \n\
|
|
|
|
adde %0,%0,%2 \n\
|
|
|
|
adde %0,%0,%3 \n\
|
|
|
|
addze %0,%0 \n\
|
|
|
|
"
|
|
|
|
: "=r" (sum)
|
2006-11-15 13:21:58 +08:00
|
|
|
: "r" (daddr), "r"(saddr), "r"(proto + len), "0"(sum));
|
|
|
|
return sum;
|
2005-04-17 06:20:36 +08:00
|
|
|
#endif
|
2006-11-15 13:21:58 +08:00
|
|
|
}
|
2013-09-23 10:04:51 +08:00
|
|
|
|
2015-05-19 23:18:55 +08:00
|
|
|
/*
|
|
|
|
* computes the checksum of the TCP/UDP pseudo-header
|
|
|
|
* returns a 16-bit checksum, already complemented
|
|
|
|
*/
|
2016-10-27 22:30:06 +08:00
|
|
|
static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len,
|
|
|
|
__u8 proto, __wsum sum)
|
2015-05-19 23:18:55 +08:00
|
|
|
{
|
|
|
|
return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
|
|
|
|
}
|
|
|
|
|
powerpc: add support for csum_add()
The C version of csum_add() as defined in include/net/checksum.h gives
the following assembly in ppc32:
0: 7c 04 1a 14 add r0,r4,r3
4: 7c 64 00 10 subfc r3,r4,r0
8: 7c 63 19 10 subfe r3,r3,r3
c: 7c 63 00 50 subf r3,r3,r0
and the following in ppc64:
0xc000000000001af8 <+0>: add r3,r3,r4
0xc000000000001afc <+4>: cmplw cr7,r3,r4
0xc000000000001b00 <+8>: mfcr r4
0xc000000000001b04 <+12>: rlwinm r4,r4,29,31,31
0xc000000000001b08 <+16>: add r3,r4,r3
0xc000000000001b0c <+20>: clrldi r3,r3,32
0xc000000000001b10 <+24>: blr
include/net/checksum.h also offers the possibility to define an arch
specific function. This patch provides a specific csum_add() inline
function.
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Scott Wood <scottwood@freescale.com>
2015-05-19 23:18:57 +08:00
|
|
|
#define HAVE_ARCH_CSUM_ADD
|
|
|
|
static inline __wsum csum_add(__wsum csum, __wsum addend)
|
|
|
|
{
|
|
|
|
#ifdef __powerpc64__
|
|
|
|
u64 res = (__force u64)csum;
|
2015-09-22 22:34:34 +08:00
|
|
|
#endif
|
|
|
|
if (__builtin_constant_p(csum) && csum == 0)
|
|
|
|
return addend;
|
|
|
|
if (__builtin_constant_p(addend) && addend == 0)
|
|
|
|
return csum;
|
powerpc: add support for csum_add()
The C version of csum_add() as defined in include/net/checksum.h gives
the following assembly in ppc32:
0: 7c 04 1a 14 add r0,r4,r3
4: 7c 64 00 10 subfc r3,r4,r0
8: 7c 63 19 10 subfe r3,r3,r3
c: 7c 63 00 50 subf r3,r3,r0
and the following in ppc64:
0xc000000000001af8 <+0>: add r3,r3,r4
0xc000000000001afc <+4>: cmplw cr7,r3,r4
0xc000000000001b00 <+8>: mfcr r4
0xc000000000001b04 <+12>: rlwinm r4,r4,29,31,31
0xc000000000001b08 <+16>: add r3,r4,r3
0xc000000000001b0c <+20>: clrldi r3,r3,32
0xc000000000001b10 <+24>: blr
include/net/checksum.h also offers the possibility to define an arch
specific function. This patch provides a specific csum_add() inline
function.
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Scott Wood <scottwood@freescale.com>
2015-05-19 23:18:57 +08:00
|
|
|
|
2015-09-22 22:34:34 +08:00
|
|
|
#ifdef __powerpc64__
|
powerpc: add support for csum_add()
The C version of csum_add() as defined in include/net/checksum.h gives
the following assembly in ppc32:
0: 7c 04 1a 14 add r0,r4,r3
4: 7c 64 00 10 subfc r3,r4,r0
8: 7c 63 19 10 subfe r3,r3,r3
c: 7c 63 00 50 subf r3,r3,r0
and the following in ppc64:
0xc000000000001af8 <+0>: add r3,r3,r4
0xc000000000001afc <+4>: cmplw cr7,r3,r4
0xc000000000001b00 <+8>: mfcr r4
0xc000000000001b04 <+12>: rlwinm r4,r4,29,31,31
0xc000000000001b08 <+16>: add r3,r4,r3
0xc000000000001b0c <+20>: clrldi r3,r3,32
0xc000000000001b10 <+24>: blr
include/net/checksum.h also offers the possibility to define an arch
specific function. This patch provides a specific csum_add() inline
function.
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Scott Wood <scottwood@freescale.com>
2015-05-19 23:18:57 +08:00
|
|
|
res += (__force u64)addend;
|
Revert "powerpc/64: Fix checksum folding in csum_add()"
This reverts commit 6ad966d7303b70165228dba1ee8da1a05c10eefe.
That commit was pointless, because csum_add() sums two 32 bits
values, so the sum is 0x1fffffffe at the maximum.
And then when adding upper part (1) and lower part (0xfffffffe),
the result is 0xffffffff which doesn't carry.
Any lower value will not carry either.
And behind the fact that this commit is useless, it also kills the
whole purpose of having an arch specific inline csum_add()
because the resulting code gets even worse than what is obtained
with the generic implementation of csum_add()
0000000000000240 <.csum_add>:
240: 38 00 ff ff li r0,-1
244: 7c 84 1a 14 add r4,r4,r3
248: 78 00 00 20 clrldi r0,r0,32
24c: 78 89 00 22 rldicl r9,r4,32,32
250: 7c 80 00 38 and r0,r4,r0
254: 7c 09 02 14 add r0,r9,r0
258: 78 09 00 22 rldicl r9,r0,32,32
25c: 7c 00 4a 14 add r0,r0,r9
260: 78 03 00 20 clrldi r3,r0,32
264: 4e 80 00 20 blr
In comparison, the generic implementation of csum_add() gives:
0000000000000290 <.csum_add>:
290: 7c 63 22 14 add r3,r3,r4
294: 7f 83 20 40 cmplw cr7,r3,r4
298: 7c 10 10 26 mfocrf r0,1
29c: 54 00 ef fe rlwinm r0,r0,29,31,31
2a0: 7c 60 1a 14 add r3,r0,r3
2a4: 78 63 00 20 clrldi r3,r3,32
2a8: 4e 80 00 20 blr
And the reverted implementation for PPC64 gives:
0000000000000240 <.csum_add>:
240: 7c 84 1a 14 add r4,r4,r3
244: 78 80 00 22 rldicl r0,r4,32,32
248: 7c 80 22 14 add r4,r0,r4
24c: 78 83 00 20 clrldi r3,r4,32
250: 4e 80 00 20 blr
Fixes: 6ad966d7303b7 ("powerpc/64: Fix checksum folding in csum_add()")
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Acked-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-04-10 14:34:37 +08:00
|
|
|
return (__force __wsum)((u32)res + (res >> 32));
|
powerpc: add support for csum_add()
The C version of csum_add() as defined in include/net/checksum.h gives
the following assembly in ppc32:
0: 7c 04 1a 14 add r0,r4,r3
4: 7c 64 00 10 subfc r3,r4,r0
8: 7c 63 19 10 subfe r3,r3,r3
c: 7c 63 00 50 subf r3,r3,r0
and the following in ppc64:
0xc000000000001af8 <+0>: add r3,r3,r4
0xc000000000001afc <+4>: cmplw cr7,r3,r4
0xc000000000001b00 <+8>: mfcr r4
0xc000000000001b04 <+12>: rlwinm r4,r4,29,31,31
0xc000000000001b08 <+16>: add r3,r4,r3
0xc000000000001b0c <+20>: clrldi r3,r3,32
0xc000000000001b10 <+24>: blr
include/net/checksum.h also offers the possibility to define an arch
specific function. This patch provides a specific csum_add() inline
function.
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Scott Wood <scottwood@freescale.com>
2015-05-19 23:18:57 +08:00
|
|
|
#else
|
|
|
|
asm("addc %0,%0,%1;"
|
|
|
|
"addze %0,%0;"
|
2015-09-22 22:34:21 +08:00
|
|
|
: "+r" (csum) : "r" (addend) : "xer");
|
powerpc: add support for csum_add()
The C version of csum_add() as defined in include/net/checksum.h gives
the following assembly in ppc32:
0: 7c 04 1a 14 add r0,r4,r3
4: 7c 64 00 10 subfc r3,r4,r0
8: 7c 63 19 10 subfe r3,r3,r3
c: 7c 63 00 50 subf r3,r3,r0
and the following in ppc64:
0xc000000000001af8 <+0>: add r3,r3,r4
0xc000000000001afc <+4>: cmplw cr7,r3,r4
0xc000000000001b00 <+8>: mfcr r4
0xc000000000001b04 <+12>: rlwinm r4,r4,29,31,31
0xc000000000001b08 <+16>: add r3,r4,r3
0xc000000000001b0c <+20>: clrldi r3,r3,32
0xc000000000001b10 <+24>: blr
include/net/checksum.h also offers the possibility to define an arch
specific function. This patch provides a specific csum_add() inline
function.
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Scott Wood <scottwood@freescale.com>
2015-05-19 23:18:57 +08:00
|
|
|
return csum;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2015-09-22 22:34:25 +08:00
|
|
|
/*
|
|
|
|
* This is a version of ip_compute_csum() optimized for IP headers,
|
|
|
|
* which always checksum on 4 octet boundaries. ihl is the number
|
|
|
|
* of 32-bit words and is always >= 5.
|
|
|
|
*/
|
|
|
|
static inline __wsum ip_fast_csum_nofold(const void *iph, unsigned int ihl)
|
|
|
|
{
|
|
|
|
const u32 *ptr = (const u32 *)iph + 1;
|
|
|
|
#ifdef __powerpc64__
|
|
|
|
unsigned int i;
|
|
|
|
u64 s = *(const u32 *)iph;
|
|
|
|
|
|
|
|
for (i = 0; i < ihl - 1; i++, ptr++)
|
|
|
|
s += *ptr;
|
2016-11-03 13:10:55 +08:00
|
|
|
return (__force __wsum)from64to32(s);
|
2015-09-22 22:34:25 +08:00
|
|
|
#else
|
|
|
|
__wsum sum, tmp;
|
|
|
|
|
|
|
|
asm("mtctr %3;"
|
|
|
|
"addc %0,%4,%5;"
|
|
|
|
"1: lwzu %1, 4(%2);"
|
|
|
|
"adde %0,%0,%1;"
|
|
|
|
"bdnz 1b;"
|
|
|
|
"addze %0,%0;"
|
|
|
|
: "=r" (sum), "=r" (tmp), "+b" (ptr)
|
|
|
|
: "r" (ihl - 2), "r" (*(const u32 *)iph), "r" (*ptr)
|
|
|
|
: "ctr", "xer", "memory");
|
|
|
|
|
|
|
|
return sum;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
|
|
|
|
{
|
|
|
|
return csum_fold(ip_fast_csum_nofold(iph, ihl));
|
|
|
|
}
|
|
|
|
|
2016-03-08 01:44:37 +08:00
|
|
|
/*
|
|
|
|
* computes the checksum of a memory block at buff, length len,
|
|
|
|
* and adds in "sum" (32-bit)
|
|
|
|
*
|
|
|
|
* returns a 32-bit number suitable for feeding into itself
|
|
|
|
* or csum_tcpudp_magic
|
|
|
|
*
|
|
|
|
* this function must be called with even lengths, except
|
|
|
|
* for the last fragment, which may be odd
|
|
|
|
*
|
|
|
|
* it's best to have buff aligned on a 32-bit boundary
|
|
|
|
*/
|
|
|
|
__wsum __csum_partial(const void *buff, int len, __wsum sum);
|
|
|
|
|
|
|
|
static inline __wsum csum_partial(const void *buff, int len, __wsum sum)
|
|
|
|
{
|
|
|
|
if (__builtin_constant_p(len) && len <= 16 && (len & 1) == 0) {
|
|
|
|
if (len == 2)
|
|
|
|
sum = csum_add(sum, (__force __wsum)*(const u16 *)buff);
|
|
|
|
if (len >= 4)
|
|
|
|
sum = csum_add(sum, (__force __wsum)*(const u32 *)buff);
|
|
|
|
if (len == 6)
|
|
|
|
sum = csum_add(sum, (__force __wsum)
|
|
|
|
*(const u16 *)(buff + 4));
|
|
|
|
if (len >= 8)
|
|
|
|
sum = csum_add(sum, (__force __wsum)
|
|
|
|
*(const u32 *)(buff + 4));
|
|
|
|
if (len == 10)
|
|
|
|
sum = csum_add(sum, (__force __wsum)
|
|
|
|
*(const u16 *)(buff + 8));
|
|
|
|
if (len >= 12)
|
|
|
|
sum = csum_add(sum, (__force __wsum)
|
|
|
|
*(const u32 *)(buff + 8));
|
|
|
|
if (len == 14)
|
|
|
|
sum = csum_add(sum, (__force __wsum)
|
|
|
|
*(const u16 *)(buff + 12));
|
|
|
|
if (len >= 16)
|
|
|
|
sum = csum_add(sum, (__force __wsum)
|
|
|
|
*(const u32 *)(buff + 12));
|
|
|
|
} else if (__builtin_constant_p(len) && (len & 3) == 0) {
|
|
|
|
sum = csum_add(sum, ip_fast_csum_nofold(buff, len >> 2));
|
|
|
|
} else {
|
|
|
|
sum = __csum_partial(buff, len, sum);
|
|
|
|
}
|
|
|
|
return sum;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* this routine is used for miscellaneous IP-like checksums, mainly
|
|
|
|
* in icmp.c
|
|
|
|
*/
|
|
|
|
static inline __sum16 ip_compute_csum(const void *buff, int len)
|
|
|
|
{
|
|
|
|
return csum_fold(csum_partial(buff, len, 0));
|
|
|
|
}
|
|
|
|
|
powerpc: Implement csum_ipv6_magic in assembly
The generic csum_ipv6_magic() generates a pretty bad result
00000000 <csum_ipv6_magic>: (PPC32)
0: 81 23 00 00 lwz r9,0(r3)
4: 81 03 00 04 lwz r8,4(r3)
8: 7c e7 4a 14 add r7,r7,r9
c: 7d 29 38 10 subfc r9,r9,r7
10: 7d 4a 51 10 subfe r10,r10,r10
14: 7d 27 42 14 add r9,r7,r8
18: 7d 2a 48 50 subf r9,r10,r9
1c: 80 e3 00 08 lwz r7,8(r3)
20: 7d 08 48 10 subfc r8,r8,r9
24: 7d 4a 51 10 subfe r10,r10,r10
28: 7d 29 3a 14 add r9,r9,r7
2c: 81 03 00 0c lwz r8,12(r3)
30: 7d 2a 48 50 subf r9,r10,r9
34: 7c e7 48 10 subfc r7,r7,r9
38: 7d 4a 51 10 subfe r10,r10,r10
3c: 7d 29 42 14 add r9,r9,r8
40: 7d 2a 48 50 subf r9,r10,r9
44: 80 e4 00 00 lwz r7,0(r4)
48: 7d 08 48 10 subfc r8,r8,r9
4c: 7d 4a 51 10 subfe r10,r10,r10
50: 7d 29 3a 14 add r9,r9,r7
54: 7d 2a 48 50 subf r9,r10,r9
58: 81 04 00 04 lwz r8,4(r4)
5c: 7c e7 48 10 subfc r7,r7,r9
60: 7d 4a 51 10 subfe r10,r10,r10
64: 7d 29 42 14 add r9,r9,r8
68: 7d 2a 48 50 subf r9,r10,r9
6c: 80 e4 00 08 lwz r7,8(r4)
70: 7d 08 48 10 subfc r8,r8,r9
74: 7d 4a 51 10 subfe r10,r10,r10
78: 7d 29 3a 14 add r9,r9,r7
7c: 7d 2a 48 50 subf r9,r10,r9
80: 81 04 00 0c lwz r8,12(r4)
84: 7c e7 48 10 subfc r7,r7,r9
88: 7d 4a 51 10 subfe r10,r10,r10
8c: 7d 29 42 14 add r9,r9,r8
90: 7d 2a 48 50 subf r9,r10,r9
94: 7d 08 48 10 subfc r8,r8,r9
98: 7d 4a 51 10 subfe r10,r10,r10
9c: 7d 29 2a 14 add r9,r9,r5
a0: 7d 2a 48 50 subf r9,r10,r9
a4: 7c a5 48 10 subfc r5,r5,r9
a8: 7c 63 19 10 subfe r3,r3,r3
ac: 7d 29 32 14 add r9,r9,r6
b0: 7d 23 48 50 subf r9,r3,r9
b4: 7c c6 48 10 subfc r6,r6,r9
b8: 7c 63 19 10 subfe r3,r3,r3
bc: 7c 63 48 50 subf r3,r3,r9
c0: 54 6a 80 3e rotlwi r10,r3,16
c4: 7c 63 52 14 add r3,r3,r10
c8: 7c 63 18 f8 not r3,r3
cc: 54 63 84 3e rlwinm r3,r3,16,16,31
d0: 4e 80 00 20 blr
0000000000000000 <.csum_ipv6_magic>: (PPC64)
0: 81 23 00 00 lwz r9,0(r3)
4: 80 03 00 04 lwz r0,4(r3)
8: 81 63 00 08 lwz r11,8(r3)
c: 7c e7 4a 14 add r7,r7,r9
10: 7f 89 38 40 cmplw cr7,r9,r7
14: 7d 47 02 14 add r10,r7,r0
18: 7d 30 10 26 mfocrf r9,1
1c: 55 29 f7 fe rlwinm r9,r9,30,31,31
20: 7d 4a 4a 14 add r10,r10,r9
24: 7f 80 50 40 cmplw cr7,r0,r10
28: 7d 2a 5a 14 add r9,r10,r11
2c: 80 03 00 0c lwz r0,12(r3)
30: 81 44 00 00 lwz r10,0(r4)
34: 7d 10 10 26 mfocrf r8,1
38: 55 08 f7 fe rlwinm r8,r8,30,31,31
3c: 7d 29 42 14 add r9,r9,r8
40: 81 04 00 04 lwz r8,4(r4)
44: 7f 8b 48 40 cmplw cr7,r11,r9
48: 7d 29 02 14 add r9,r9,r0
4c: 7d 70 10 26 mfocrf r11,1
50: 55 6b f7 fe rlwinm r11,r11,30,31,31
54: 7d 29 5a 14 add r9,r9,r11
58: 7f 80 48 40 cmplw cr7,r0,r9
5c: 7d 29 52 14 add r9,r9,r10
60: 7c 10 10 26 mfocrf r0,1
64: 54 00 f7 fe rlwinm r0,r0,30,31,31
68: 7d 69 02 14 add r11,r9,r0
6c: 7f 8a 58 40 cmplw cr7,r10,r11
70: 7c 0b 42 14 add r0,r11,r8
74: 81 44 00 08 lwz r10,8(r4)
78: 7c f0 10 26 mfocrf r7,1
7c: 54 e7 f7 fe rlwinm r7,r7,30,31,31
80: 7c 00 3a 14 add r0,r0,r7
84: 7f 88 00 40 cmplw cr7,r8,r0
88: 7d 20 52 14 add r9,r0,r10
8c: 80 04 00 0c lwz r0,12(r4)
90: 7d 70 10 26 mfocrf r11,1
94: 55 6b f7 fe rlwinm r11,r11,30,31,31
98: 7d 29 5a 14 add r9,r9,r11
9c: 7f 8a 48 40 cmplw cr7,r10,r9
a0: 7d 29 02 14 add r9,r9,r0
a4: 7d 70 10 26 mfocrf r11,1
a8: 55 6b f7 fe rlwinm r11,r11,30,31,31
ac: 7d 29 5a 14 add r9,r9,r11
b0: 7f 80 48 40 cmplw cr7,r0,r9
b4: 7d 29 2a 14 add r9,r9,r5
b8: 7c 10 10 26 mfocrf r0,1
bc: 54 00 f7 fe rlwinm r0,r0,30,31,31
c0: 7d 29 02 14 add r9,r9,r0
c4: 7f 85 48 40 cmplw cr7,r5,r9
c8: 7c 09 32 14 add r0,r9,r6
cc: 7d 50 10 26 mfocrf r10,1
d0: 55 4a f7 fe rlwinm r10,r10,30,31,31
d4: 7c 00 52 14 add r0,r0,r10
d8: 7f 80 30 40 cmplw cr7,r0,r6
dc: 7d 30 10 26 mfocrf r9,1
e0: 55 29 ef fe rlwinm r9,r9,29,31,31
e4: 7c 09 02 14 add r0,r9,r0
e8: 54 03 80 3e rotlwi r3,r0,16
ec: 7c 03 02 14 add r0,r3,r0
f0: 7c 03 00 f8 not r3,r0
f4: 78 63 84 22 rldicl r3,r3,48,48
f8: 4e 80 00 20 blr
This patch implements it in assembly for both PPC32 and PPC64
Link: https://github.com/linuxppc/linux/issues/9
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Reviewed-by: Segher Boessenkool <segher@kernel.crashing.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2018-05-24 19:33:18 +08:00
|
|
|
#define _HAVE_ARCH_IPV6_CSUM
|
|
|
|
__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
|
|
|
|
const struct in6_addr *daddr,
|
|
|
|
__u32 len, __u8 proto, __wsum sum);
|
|
|
|
|
2005-12-17 05:43:46 +08:00
|
|
|
#endif /* __KERNEL__ */
|
2005-10-21 00:44:46 +08:00
|
|
|
#endif
|