From 94850257cf0f88b20db7644f28bfedc7d284de15 Mon Sep 17 00:00:00 2001 From: Boris Pismenny Date: Wed, 27 Feb 2019 17:38:03 +0200 Subject: [PATCH 1/4] tls: Fix tls_device handling of partial records Cleanup the handling of partial records while fixing a bug where the tls_push_pending_closed_record function is using the software tls context instead of the hardware context. The bug resulted in the following crash: [ 88.791229] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 [ 88.793271] #PF error: [normal kernel read fault] [ 88.794449] PGD 800000022a426067 P4D 800000022a426067 PUD 22a156067 PMD 0 [ 88.795958] Oops: 0000 [#1] SMP PTI [ 88.796884] CPU: 2 PID: 4973 Comm: openssl Not tainted 5.0.0-rc4+ #3 [ 88.798314] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 [ 88.800067] RIP: 0010:tls_tx_records+0xef/0x1d0 [tls] [ 88.801256] Code: 00 02 48 89 43 08 e8 a0 0b 96 d9 48 89 df e8 48 dd 4d d9 4c 89 f8 4d 8b bf 98 00 00 00 48 05 98 00 00 00 48 89 04 24 49 39 c7 <49> 8b 1f 4d 89 fd 0f 84 af 00 00 00 41 8b 47 10 85 c0 0f 85 8d 00 [ 88.805179] RSP: 0018:ffffbd888186fca8 EFLAGS: 00010213 [ 88.806458] RAX: ffff9af1ed657c98 RBX: ffff9af1e88a1980 RCX: 0000000000000000 [ 88.808050] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff9af1e88a1980 [ 88.809724] RBP: ffff9af1e88a1980 R08: 0000000000000017 R09: ffff9af1ebeeb700 [ 88.811294] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 [ 88.812917] R13: ffff9af1e88a1980 R14: ffff9af1ec13f800 R15: 0000000000000000 [ 88.814506] FS: 00007fcad2240740(0000) GS:ffff9af1f7880000(0000) knlGS:0000000000000000 [ 88.816337] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 88.817717] CR2: 0000000000000000 CR3: 0000000228b3e000 CR4: 00000000001406e0 [ 88.819328] Call Trace: [ 88.820123] tls_push_data+0x628/0x6a0 [tls] [ 88.821283] ? remove_wait_queue+0x20/0x60 [ 88.822383] ? n_tty_read+0x683/0x910 [ 88.823363] tls_device_sendmsg+0x53/0xa0 [tls] [ 88.824505] sock_sendmsg+0x36/0x50 [ 88.825492] sock_write_iter+0x87/0x100 [ 88.826521] __vfs_write+0x127/0x1b0 [ 88.827499] vfs_write+0xad/0x1b0 [ 88.828454] ksys_write+0x52/0xc0 [ 88.829378] do_syscall_64+0x5b/0x180 [ 88.830369] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 88.831603] RIP: 0033:0x7fcad1451680 [ 1248.470626] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 [ 1248.472564] #PF error: [normal kernel read fault] [ 1248.473790] PGD 0 P4D 0 [ 1248.474642] Oops: 0000 [#1] SMP PTI [ 1248.475651] CPU: 3 PID: 7197 Comm: openssl Tainted: G OE 5.0.0-rc4+ #3 [ 1248.477426] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 [ 1248.479310] RIP: 0010:tls_tx_records+0x110/0x1f0 [tls] [ 1248.480644] Code: 00 02 48 89 43 08 e8 4f cb 63 d7 48 89 df e8 f7 9c 1b d7 4c 89 f8 4d 8b bf 98 00 00 00 48 05 98 00 00 00 48 89 04 24 49 39 c7 <49> 8b 1f 4d 89 fd 0f 84 af 00 00 00 41 8b 47 10 85 c0 0f 85 8d 00 [ 1248.484825] RSP: 0018:ffffaa0a41543c08 EFLAGS: 00010213 [ 1248.486154] RAX: ffff955a2755dc98 RBX: ffff955a36031980 RCX: 0000000000000006 [ 1248.487855] RDX: 0000000000000000 RSI: 000000000000002b RDI: 0000000000000286 [ 1248.489524] RBP: ffff955a36031980 R08: 0000000000000000 R09: 00000000000002b1 [ 1248.491394] R10: 0000000000000003 R11: 00000000ad55ad55 R12: 0000000000000000 [ 1248.493162] R13: 0000000000000000 R14: ffff955a2abe6c00 R15: 0000000000000000 [ 1248.494923] FS: 0000000000000000(0000) GS:ffff955a378c0000(0000) knlGS:0000000000000000 [ 1248.496847] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1248.498357] CR2: 0000000000000000 CR3: 000000020c40e000 CR4: 00000000001406e0 [ 1248.500136] Call Trace: [ 1248.500998] ? tcp_check_oom+0xd0/0xd0 [ 1248.502106] tls_sk_proto_close+0x127/0x1e0 [tls] [ 1248.503411] inet_release+0x3c/0x60 [ 1248.504530] __sock_release+0x3d/0xb0 [ 1248.505611] sock_close+0x11/0x20 [ 1248.506612] __fput+0xb4/0x220 [ 1248.507559] task_work_run+0x88/0xa0 [ 1248.508617] do_exit+0x2cb/0xbc0 [ 1248.509597] ? core_sys_select+0x17a/0x280 [ 1248.510740] do_group_exit+0x39/0xb0 [ 1248.511789] get_signal+0x1d0/0x630 [ 1248.512823] do_signal+0x36/0x620 [ 1248.513822] exit_to_usermode_loop+0x5c/0xc6 [ 1248.515003] do_syscall_64+0x157/0x180 [ 1248.516094] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 1248.517456] RIP: 0033:0x7fb398bd3f53 [ 1248.518537] Code: Bad RIP value. Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance") Signed-off-by: Boris Pismenny Signed-off-by: Eran Ben Elisha Signed-off-by: David S. Miller --- include/net/tls.h | 20 ++++---------------- net/tls/tls_device.c | 9 +++++---- net/tls/tls_main.c | 13 ------------- 3 files changed, 9 insertions(+), 33 deletions(-) diff --git a/include/net/tls.h b/include/net/tls.h index 9f4117ae2297..a528a082da73 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -199,10 +199,6 @@ struct tls_offload_context_tx { (ALIGN(sizeof(struct tls_offload_context_tx), sizeof(void *)) + \ TLS_DRIVER_STATE_SIZE) -enum { - TLS_PENDING_CLOSED_RECORD -}; - struct cipher_context { char *iv; char *rec_seq; @@ -335,17 +331,14 @@ int tls_push_sg(struct sock *sk, struct tls_context *ctx, int tls_push_partial_record(struct sock *sk, struct tls_context *ctx, int flags); -int tls_push_pending_closed_record(struct sock *sk, struct tls_context *ctx, - int flags, long *timeo); - static inline struct tls_msg *tls_msg(struct sk_buff *skb) { return (struct tls_msg *)strp_msg(skb); } -static inline bool tls_is_pending_closed_record(struct tls_context *ctx) +static inline bool tls_is_partially_sent_record(struct tls_context *ctx) { - return test_bit(TLS_PENDING_CLOSED_RECORD, &ctx->flags); + return !!ctx->partially_sent_record; } static inline int tls_complete_pending_work(struct sock *sk, @@ -357,17 +350,12 @@ static inline int tls_complete_pending_work(struct sock *sk, if (unlikely(sk->sk_write_pending)) rc = wait_on_pending_writer(sk, timeo); - if (!rc && tls_is_pending_closed_record(ctx)) - rc = tls_push_pending_closed_record(sk, ctx, flags, timeo); + if (!rc && tls_is_partially_sent_record(ctx)) + rc = tls_push_partial_record(sk, ctx, flags); return rc; } -static inline bool tls_is_partially_sent_record(struct tls_context *ctx) -{ - return !!ctx->partially_sent_record; -} - static inline bool tls_is_pending_open_record(struct tls_context *tls_ctx) { return tls_ctx->pending_open_record_frags; diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index a5c17c47d08a..3e5e8e021a87 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -271,7 +271,6 @@ static int tls_push_record(struct sock *sk, list_add_tail(&record->list, &offload_ctx->records_list); spin_unlock_irq(&offload_ctx->lock); offload_ctx->open_record = NULL; - set_bit(TLS_PENDING_CLOSED_RECORD, &ctx->flags); tls_advance_record_sn(sk, &ctx->tx, ctx->crypto_send.info.version); for (i = 0; i < record->num_frags; i++) { @@ -368,9 +367,11 @@ static int tls_push_data(struct sock *sk, return -sk->sk_err; timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); - rc = tls_complete_pending_work(sk, tls_ctx, flags, &timeo); - if (rc < 0) - return rc; + if (tls_is_partially_sent_record(tls_ctx)) { + rc = tls_push_partial_record(sk, tls_ctx, flags); + if (rc < 0) + return rc; + } pfrag = sk_page_frag(sk); diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index caff15b2f9b2..7e05af75536d 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -209,19 +209,6 @@ int tls_push_partial_record(struct sock *sk, struct tls_context *ctx, return tls_push_sg(sk, ctx, sg, offset, flags); } -int tls_push_pending_closed_record(struct sock *sk, - struct tls_context *tls_ctx, - int flags, long *timeo) -{ - struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); - - if (tls_is_partially_sent_record(tls_ctx) || - !list_empty(&ctx->tx_list)) - return tls_tx_records(sk, flags); - else - return tls_ctx->push_pending_record(sk, flags); -} - static void tls_write_space(struct sock *sk) { struct tls_context *ctx = tls_get_ctx(sk); From 7463d3a2db0efea3701aab5eeb310e0d8157aff7 Mon Sep 17 00:00:00 2001 From: Boris Pismenny Date: Wed, 27 Feb 2019 17:38:04 +0200 Subject: [PATCH 2/4] tls: Fix write space handling TLS device cannot use the sw context. This patch returns the original tls device write space handler and moves the sw/device specific portions to the relevant files. Also, we remove the write_space call for the tls_sw flow, because it handles partial records in its delayed tx work handler. Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance") Signed-off-by: Boris Pismenny Reviewed-by: Eran Ben Elisha Signed-off-by: David S. Miller --- include/net/tls.h | 3 +++ net/tls/tls_device.c | 17 +++++++++++++++++ net/tls/tls_main.c | 15 ++++++--------- net/tls/tls_sw.c | 13 +++++++++++++ 4 files changed, 39 insertions(+), 9 deletions(-) diff --git a/include/net/tls.h b/include/net/tls.h index a528a082da73..a5a938583295 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -519,6 +519,9 @@ static inline bool tls_sw_has_ctx_tx(const struct sock *sk) return !!tls_sw_ctx_tx(ctx); } +void tls_sw_write_space(struct sock *sk, struct tls_context *ctx); +void tls_device_write_space(struct sock *sk, struct tls_context *ctx); + static inline struct tls_offload_context_rx * tls_offload_ctx_rx(const struct tls_context *tls_ctx) { diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 3e5e8e021a87..4a1da837a733 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -546,6 +546,23 @@ static int tls_device_push_pending_record(struct sock *sk, int flags) return tls_push_data(sk, &msg_iter, 0, flags, TLS_RECORD_TYPE_DATA); } +void tls_device_write_space(struct sock *sk, struct tls_context *ctx) +{ + int rc = 0; + + if (!sk->sk_write_pending && tls_is_partially_sent_record(ctx)) { + gfp_t sk_allocation = sk->sk_allocation; + + sk->sk_allocation = GFP_ATOMIC; + rc = tls_push_partial_record(sk, ctx, + MSG_DONTWAIT | MSG_NOSIGNAL); + sk->sk_allocation = sk_allocation; + } + + if (!rc) + ctx->sk_write_space(sk); +} + void handle_device_resync(struct sock *sk, u32 seq, u64 rcd_sn) { struct tls_context *tls_ctx = tls_get_ctx(sk); diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 7e05af75536d..17e8667917aa 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -212,7 +212,6 @@ int tls_push_partial_record(struct sock *sk, struct tls_context *ctx, static void tls_write_space(struct sock *sk) { struct tls_context *ctx = tls_get_ctx(sk); - struct tls_sw_context_tx *tx_ctx = tls_sw_ctx_tx(ctx); /* If in_tcp_sendpages call lower protocol write space handler * to ensure we wake up any waiting operations there. For example @@ -223,14 +222,12 @@ static void tls_write_space(struct sock *sk) return; } - /* Schedule the transmission if tx list is ready */ - if (is_tx_ready(tx_ctx) && !sk->sk_write_pending) { - /* Schedule the transmission */ - if (!test_and_set_bit(BIT_TX_SCHEDULED, &tx_ctx->tx_bitmask)) - schedule_delayed_work(&tx_ctx->tx_work.work, 0); - } - - ctx->sk_write_space(sk); +#ifdef CONFIG_TLS_DEVICE + if (ctx->tx_conf == TLS_HW) + tls_device_write_space(sk, ctx); + else +#endif + tls_sw_write_space(sk, ctx); } static void tls_ctx_free(struct tls_context *ctx) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 1cc830582fa8..917caacd4d31 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2126,6 +2126,19 @@ static void tx_work_handler(struct work_struct *work) release_sock(sk); } +void tls_sw_write_space(struct sock *sk, struct tls_context *ctx) +{ + struct tls_sw_context_tx *tx_ctx = tls_sw_ctx_tx(ctx); + + /* Schedule the transmission if tx list is ready */ + if (is_tx_ready(tx_ctx) && !sk->sk_write_pending) { + /* Schedule the transmission */ + if (!test_and_set_bit(BIT_TX_SCHEDULED, + &tx_ctx->tx_bitmask)) + schedule_delayed_work(&tx_ctx->tx_work.work, 0); + } +} + int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) { struct tls_context *tls_ctx = tls_get_ctx(sk); From 7754bd63ed081fa7c0aedd79ae0e8003465b641b Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Wed, 27 Feb 2019 17:38:05 +0200 Subject: [PATCH 3/4] tls: Fix mixing between async capable and async Today, tls_sw_recvmsg is capable of using asynchronous mode to handle application data TLS records. Moreover, it assumes that if the cipher can be handled asynchronously, then all packets will be processed asynchronously. However, this assumption is not always true. Specifically, for AES-GCM in TLS1.2, it causes data corruption, and breaks user applications. This patch fixes this problem by separating the async capability from the decryption operation result. Fixes: c0ab4732d4c6 ("net/tls: Do not use async crypto for non-data records") Signed-off-by: Eran Ben Elisha Reviewed-by: Boris Pismenny Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 917caacd4d31..68cd026fa57c 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1693,7 +1693,8 @@ int tls_sw_recvmsg(struct sock *sk, bool zc = false; int to_decrypt; int chunk = 0; - bool async; + bool async_capable; + bool async = false; skb = tls_wait_data(sk, psock, flags, timeo, &err); if (!skb) { @@ -1727,21 +1728,23 @@ int tls_sw_recvmsg(struct sock *sk, /* Do not use async mode if record is non-data */ if (ctx->control == TLS_RECORD_TYPE_DATA) - async = ctx->async_capable; + async_capable = ctx->async_capable; else - async = false; + async_capable = false; err = decrypt_skb_update(sk, skb, &msg->msg_iter, - &chunk, &zc, async); + &chunk, &zc, async_capable); if (err < 0 && err != -EINPROGRESS) { tls_err_abort(sk, EBADMSG); goto recv_end; } - if (err == -EINPROGRESS) + if (err == -EINPROGRESS) { + async = true; num_async++; - else if (prot->version == TLS_1_3_VERSION) + } else if (prot->version == TLS_1_3_VERSION) { tlm->control = ctx->control; + } /* If the type of records being processed is not known yet, * set it to record type just dequeued. If it is already known, From d069b780e367149a42d92be85ab21ac8c0281aad Mon Sep 17 00:00:00 2001 From: Boris Pismenny Date: Wed, 27 Feb 2019 17:38:06 +0200 Subject: [PATCH 4/4] tls: Fix tls_device receive Currently, the receive function fails to handle records already decrypted by the device due to the commit mentioned below. This commit advances the TLS record sequence number and prepares the context to handle the next record. Fixes: fedf201e1296 ("net: tls: Refactor control message handling on recv") Signed-off-by: Boris Pismenny Reviewed-by: Eran Ben Elisha Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 68cd026fa57c..425351ac2a9b 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1467,23 +1467,26 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, struct strp_msg *rxm = strp_msg(skb); int err = 0; -#ifdef CONFIG_TLS_DEVICE - err = tls_device_decrypted(sk, skb); - if (err < 0) - return err; -#endif if (!ctx->decrypted) { - err = decrypt_internal(sk, skb, dest, NULL, chunk, zc, async); - if (err < 0) { - if (err == -EINPROGRESS) - tls_advance_record_sn(sk, &tls_ctx->rx, - version); - +#ifdef CONFIG_TLS_DEVICE + err = tls_device_decrypted(sk, skb); + if (err < 0) return err; +#endif + /* Still not decrypted after tls_device */ + if (!ctx->decrypted) { + err = decrypt_internal(sk, skb, dest, NULL, chunk, zc, + async); + if (err < 0) { + if (err == -EINPROGRESS) + tls_advance_record_sn(sk, &tls_ctx->rx, + version); + + return err; + } } rxm->full_len -= padding_length(ctx, tls_ctx, skb); - rxm->offset += prot->prepend_size; rxm->full_len -= prot->overhead_size; tls_advance_record_sn(sk, &tls_ctx->rx, version);