diff --git a/redis.conf b/redis.conf
index 6688fdc2a..a1cbedd34 100644
--- a/redis.conf
+++ b/redis.conf
@@ -1291,38 +1291,27 @@ lazyfree-lazy-user-flush no
 # in different I/O threads. Since especially writing is so slow, normally
 # Redis users use pipelining in order to speed up the Redis performances per
 # core, and spawn multiple instances in order to scale more. Using I/O
-# threads it is possible to easily speedup two times Redis without resorting
+# threads it is possible to easily speedup several times Redis without resorting
 # to pipelining nor sharding of the instance.
 #
 # By default threading is disabled, we suggest enabling it only in machines
 # that have at least 4 or more cores, leaving at least one spare core.
-# Using more than 8 threads is unlikely to help much. We also recommend using
-# threaded I/O only if you actually have performance problems, with Redis
-# instances being able to use a quite big percentage of CPU time, otherwise
-# there is no point in using this feature.
+# We also recommend using threaded I/O only if you actually have performance
+# problems, with Redis instances being able to use a quite big percentage of
+# CPU time, otherwise there is no point in using this feature.
 #
-# So for instance if you have a four cores boxes, try to use 2 or 3 I/O
-# threads, if you have a 8 cores, try to use 6 threads. In order to
+# So for instance if you have a four cores boxes, try to use 3 I/O
+# threads, if you have a 8 cores, try to use 7 threads. In order to
 # enable I/O threads use the following configuration directive:
 #
 # io-threads 4
 #
 # Setting io-threads to 1 will just use the main thread as usual.
-# When I/O threads are enabled, we only use threads for writes, that is
-# to thread the write(2) syscall and transfer the client buffers to the
-# socket. However it is also possible to enable threading of reads and
-# protocol parsing using the following configuration directive, by setting
-# it to yes:
+# When I/O threads are enabled, we not only use threads for writes, that
+# is to thread the write(2) syscall and transfer the client buffers to the
+# socket, but also use threads for reads and protocol parsing.
 #
-# io-threads-do-reads no
-#
-# Usually threading reads doesn't help much.
-#
-# NOTE 1: This configuration directive cannot be changed at runtime via
-# CONFIG SET. Also, this feature currently does not work when SSL is
-# enabled.
-#
-# NOTE 2: If you want to test the Redis speedup using redis-benchmark, make
+# NOTE: If you want to test the Redis speedup using redis-benchmark, make
 # sure you also run the benchmark itself in threaded mode, using the
 # --threads option to match the number of Redis threads, otherwise you'll not
 # be able to notice the improvements.
diff --git a/src/Makefile b/src/Makefile
index 8f245d19d..4f394782d 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -354,7 +354,7 @@ endif
 
 REDIS_SERVER_NAME=redis-server$(PROG_SUFFIX)
 REDIS_SENTINEL_NAME=redis-sentinel$(PROG_SUFFIX)
-REDIS_SERVER_OBJ=threads_mngr.o adlist.o quicklist.o ae.o anet.o dict.o ebuckets.o mstr.o kvstore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_legacy.o crc16.o endianconv.o slowlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crccombine.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script_lua.o script.o functions.o function_lua.o commands.o strl.o connection.o unix.o logreqres.o
+REDIS_SERVER_OBJ=threads_mngr.o adlist.o quicklist.o ae.o anet.o dict.o ebuckets.o eventnotifier.o iothread.o mstr.o kvstore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_legacy.o crc16.o endianconv.o slowlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crccombine.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script_lua.o script.o functions.o function_lua.o commands.o strl.o connection.o unix.o logreqres.o
 REDIS_CLI_NAME=redis-cli$(PROG_SUFFIX)
 REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o ae.o redisassert.o crcspeed.o crccombine.o crc64.o siphash.o crc16.o monotonic.o cli_common.o mt19937-64.o strl.o cli_commands.o
 REDIS_BENCHMARK_NAME=redis-benchmark$(PROG_SUFFIX)
diff --git a/src/ae.c b/src/ae.c
index 3d3569865..ac4422398 100644
--- a/src/ae.c
+++ b/src/ae.c
@@ -42,7 +42,7 @@
     #endif
 #endif
 
-
+#define INITIAL_EVENT 1024
 aeEventLoop *aeCreateEventLoop(int setsize) {
     aeEventLoop *eventLoop;
     int i;
@@ -50,8 +50,9 @@ aeEventLoop *aeCreateEventLoop(int setsize) {
     monotonicInit();    /* just in case the calling app didn't initialize */
 
     if ((eventLoop = zmalloc(sizeof(*eventLoop))) == NULL) goto err;
-    eventLoop->events = zmalloc(sizeof(aeFileEvent)*setsize);
-    eventLoop->fired = zmalloc(sizeof(aeFiredEvent)*setsize);
+    eventLoop->nevents = setsize < INITIAL_EVENT ? setsize : INITIAL_EVENT;
+    eventLoop->events = zmalloc(sizeof(aeFileEvent)*eventLoop->nevents);
+    eventLoop->fired = zmalloc(sizeof(aeFiredEvent)*eventLoop->nevents);
     if (eventLoop->events == NULL || eventLoop->fired == NULL) goto err;
     eventLoop->setsize = setsize;
     eventLoop->timeEventHead = NULL;
@@ -61,10 +62,11 @@ aeEventLoop *aeCreateEventLoop(int setsize) {
     eventLoop->beforesleep = NULL;
     eventLoop->aftersleep = NULL;
     eventLoop->flags = 0;
+    memset(eventLoop->privdata, 0, sizeof(eventLoop->privdata));
     if (aeApiCreate(eventLoop) == -1) goto err;
     /* Events with mask == AE_NONE are not set. So let's initialize the
      * vector with it. */
-    for (i = 0; i < setsize; i++)
+    for (i = 0; i < eventLoop->nevents; i++)
         eventLoop->events[i].mask = AE_NONE;
     return eventLoop;
 
@@ -102,20 +104,19 @@ void aeSetDontWait(aeEventLoop *eventLoop, int noWait) {
  *
  * Otherwise AE_OK is returned and the operation is successful. */
 int aeResizeSetSize(aeEventLoop *eventLoop, int setsize) {
-    int i;
-
     if (setsize == eventLoop->setsize) return AE_OK;
     if (eventLoop->maxfd >= setsize) return AE_ERR;
     if (aeApiResize(eventLoop,setsize) == -1) return AE_ERR;
 
-    eventLoop->events = zrealloc(eventLoop->events,sizeof(aeFileEvent)*setsize);
-    eventLoop->fired = zrealloc(eventLoop->fired,sizeof(aeFiredEvent)*setsize);
     eventLoop->setsize = setsize;
 
-    /* Make sure that if we created new slots, they are initialized with
-     * an AE_NONE mask. */
-    for (i = eventLoop->maxfd+1; i < setsize; i++)
-        eventLoop->events[i].mask = AE_NONE;
+    /* If the current allocated space is larger than the requested size,
+     * we need to shrink it to the requested size. */
+    if (setsize < eventLoop->nevents) {
+        eventLoop->events = zrealloc(eventLoop->events,sizeof(aeFileEvent)*setsize);
+        eventLoop->fired = zrealloc(eventLoop->fired,sizeof(aeFiredEvent)*setsize);
+        eventLoop->nevents = setsize;
+    }
     return AE_OK;
 }
 
@@ -147,6 +148,22 @@ int aeCreateFileEvent(aeEventLoop *eventLoop, int fd, int mask,
         errno = ERANGE;
         return AE_ERR;
     }
+
+    /* Resize the events and fired arrays if the file
+     * descriptor exceeds the current number of events. */
+    if (unlikely(fd >= eventLoop->nevents)) {
+        int newnevents = eventLoop->nevents;
+        newnevents = (newnevents * 2 > fd + 1) ? newnevents * 2 : fd + 1;
+        newnevents = (newnevents > eventLoop->setsize) ? eventLoop->setsize : newnevents;
+        eventLoop->events = zrealloc(eventLoop->events, sizeof(aeFileEvent) * newnevents);
+        eventLoop->fired = zrealloc(eventLoop->fired, sizeof(aeFiredEvent) * newnevents);
+
+        /* Initialize new slots with an AE_NONE mask */
+        for (int i = eventLoop->nevents; i < newnevents; i++)
+            eventLoop->events[i].mask = AE_NONE;
+        eventLoop->nevents = newnevents;
+    }
+
     aeFileEvent *fe = &eventLoop->events[fd];
 
     if (aeApiAddEvent(eventLoop, fd, mask) == -1)
diff --git a/src/ae.h b/src/ae.h
index 5f1e17f7d..16c5fcc5c 100644
--- a/src/ae.h
+++ b/src/ae.h
@@ -79,6 +79,7 @@ typedef struct aeEventLoop {
     int maxfd;   /* highest file descriptor currently registered */
     int setsize; /* max number of file descriptors tracked */
     long long timeEventNextId;
+    int nevents; /* Size of Registered events */
     aeFileEvent *events; /* Registered events */
     aeFiredEvent *fired; /* Fired events */
     aeTimeEvent *timeEventHead;
@@ -87,6 +88,7 @@ typedef struct aeEventLoop {
     aeBeforeSleepProc *beforesleep;
     aeBeforeSleepProc *aftersleep;
     int flags;
+    void *privdata[2];
 } aeEventLoop;
 
 /* Prototypes */
diff --git a/src/cluster.c b/src/cluster.c
index 876b1327f..6c0bf75cc 100644
--- a/src/cluster.c
+++ b/src/cluster.c
@@ -317,7 +317,7 @@ migrateCachedSocket* migrateGetSocket(client *c, robj *host, robj *port, long ti
     }
 
     /* Create the connection */
-    conn = connCreate(connTypeOfCluster());
+    conn = connCreate(server.el, connTypeOfCluster());
     if (connBlockingConnect(conn, host->ptr, atoi(port->ptr), timeout)
         != C_OK) {
         addReplyError(c,"-IOERR error or timeout connecting to the client");
diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c
index ead19ac71..d707d863d 100644
--- a/src/cluster_legacy.c
+++ b/src/cluster_legacy.c
@@ -1262,7 +1262,7 @@ void clusterAcceptHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
             return;
         }
 
-        connection *conn = connCreateAccepted(connTypeOfCluster(), cfd, &require_auth);
+        connection *conn = connCreateAccepted(server.el, connTypeOfCluster(), cfd, &require_auth);
 
         /* Make sure connection is not in an error state */
         if (connGetState(conn) != CONN_STATE_ACCEPTING) {
@@ -4583,7 +4583,7 @@ static int clusterNodeCronHandleReconnect(clusterNode *node, mstime_t handshake_
 
     if (node->link == NULL) {
         clusterLink *link = createClusterLink(node);
-        link->conn = connCreate(connTypeOfCluster());
+        link->conn = connCreate(server.el, connTypeOfCluster());
         connSetPrivateData(link->conn, link);
         if (connConnect(link->conn, node->ip, node->cport, server.bind_source_addr,
                     clusterLinkConnectHandler) == C_ERR) {
diff --git a/src/commands.def b/src/commands.def
index ef42fb8da..53be28942 100644
--- a/src/commands.def
+++ b/src/commands.def
@@ -1239,6 +1239,9 @@ commandHistory CLIENT_LIST_History[] = {
 {"6.2.0","Added `argv-mem`, `tot-mem`, `laddr` and `redir` fields and the optional `ID` filter."},
 {"7.0.0","Added `resp`, `multi-mem`, `rbs` and `rbp` fields."},
 {"7.0.3","Added `ssub` field."},
+{"7.2.0","Added `lib-name` and `lib-ver` fields."},
+{"7.4.0","Added `watch` field."},
+{"8.0.0","Added `io-thread` field."},
 };
 #endif
 
@@ -1546,7 +1549,7 @@ struct COMMAND_STRUCT CLIENT_Subcommands[] = {
 {MAKE_CMD("id","Returns the unique client ID of the connection.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_ID_History,0,CLIENT_ID_Tips,0,clientCommand,2,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_ID_Keyspecs,0,NULL,0)},
 {MAKE_CMD("info","Returns information about the connection.","O(1)","6.2.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_INFO_History,0,CLIENT_INFO_Tips,1,clientCommand,2,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_INFO_Keyspecs,0,NULL,0)},
 {MAKE_CMD("kill","Terminates open connections.","O(N) where N is the number of client connections","2.4.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_KILL_History,6,CLIENT_KILL_Tips,0,clientCommand,-3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_KILL_Keyspecs,0,NULL,1),.args=CLIENT_KILL_Args},
-{MAKE_CMD("list","Lists open connections.","O(N) where N is the number of client connections","2.4.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_LIST_History,6,CLIENT_LIST_Tips,1,clientCommand,-2,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_LIST_Keyspecs,0,NULL,2),.args=CLIENT_LIST_Args},
+{MAKE_CMD("list","Lists open connections.","O(N) where N is the number of client connections","2.4.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_LIST_History,9,CLIENT_LIST_Tips,1,clientCommand,-2,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_LIST_Keyspecs,0,NULL,2),.args=CLIENT_LIST_Args},
 {MAKE_CMD("no-evict","Sets the client eviction mode of the connection.","O(1)","7.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_NO_EVICT_History,0,CLIENT_NO_EVICT_Tips,0,clientCommand,3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_NO_EVICT_Keyspecs,0,NULL,1),.args=CLIENT_NO_EVICT_Args},
 {MAKE_CMD("no-touch","Controls whether commands sent by the client affect the LRU/LFU of accessed keys.","O(1)","7.2.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_NO_TOUCH_History,0,CLIENT_NO_TOUCH_Tips,0,clientCommand,3,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,ACL_CATEGORY_CONNECTION,CLIENT_NO_TOUCH_Keyspecs,0,NULL,1),.args=CLIENT_NO_TOUCH_Args},
 {MAKE_CMD("pause","Suspends commands processing.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_PAUSE_History,1,CLIENT_PAUSE_Tips,0,clientCommand,-3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_PAUSE_Keyspecs,0,NULL,2),.args=CLIENT_PAUSE_Args},
diff --git a/src/commands/client-list.json b/src/commands/client-list.json
index f72ffaf40..08305216c 100644
--- a/src/commands/client-list.json
+++ b/src/commands/client-list.json
@@ -31,6 +31,18 @@
             [
                 "7.0.3",
                 "Added `ssub` field."
+            ],
+            [
+                "7.2.0",
+                "Added `lib-name` and `lib-ver` fields."
+            ],
+            [
+                "7.4.0",
+                "Added `watch` field."
+            ],
+            [
+                "8.0.0",
+                "Added `io-thread` field."
             ]
         ],
         "command_flags": [
diff --git a/src/config.c b/src/config.c
index d0d30966c..797284347 100644
--- a/src/config.c
+++ b/src/config.c
@@ -430,6 +430,7 @@ void loadServerConfigFromString(char *config) {
         {"list-max-ziplist-entries", 2, 2},
         {"list-max-ziplist-value", 2, 2},
         {"lua-replicate-commands", 2, 2},
+        {"io-threads-do-reads", 2, 2},
         {NULL, 0},
     };
     char buf[1024];
@@ -2550,11 +2551,10 @@ static int updateMaxclients(const char **err) {
         *err = msg;
         return 0;
     }
-    if ((unsigned int) aeGetSetSize(server.el) <
-        server.maxclients + CONFIG_FDSET_INCR)
-    {
-        if (aeResizeSetSize(server.el,
-            server.maxclients + CONFIG_FDSET_INCR) == AE_ERR)
+    size_t newsize = server.maxclients + CONFIG_FDSET_INCR;
+    if ((unsigned int) aeGetSetSize(server.el) < newsize) {
+        if (aeResizeSetSize(server.el, newsize) == AE_ERR ||
+            resizeAllIOThreadsEventLoops(newsize) == AE_ERR)
         {
             *err = "The event loop API used by Redis is not able to handle the specified number of clients";
             return 0;
@@ -3035,6 +3035,7 @@ static int applyClientMaxMemoryUsage(const char **err) {
     if (server.maxmemory_clients != 0)
         initServerClientMemUsageBuckets();
 
+    pauseAllIOThreads();
     /* When client eviction is enabled update memory buckets for all clients.
      * When disabled, clear that data structure. */
     listRewind(server.clients, &li);
@@ -3048,6 +3049,7 @@ static int applyClientMaxMemoryUsage(const char **err) {
             updateClientMemUsageAndBucket(c);
         }
     }
+    resumeAllIOThreads();
 
     if (server.maxmemory_clients == 0)
         freeServerClientMemUsageBuckets();
diff --git a/src/config.h b/src/config.h
index e8f77a350..ec0fb1529 100644
--- a/src/config.h
+++ b/src/config.h
@@ -47,6 +47,7 @@
 #define HAVE_PROC_SMAPS 1
 #define HAVE_PROC_SOMAXCONN 1
 #define HAVE_PROC_OOM_SCORE_ADJ 1
+#define HAVE_EVENT_FD 1
 #endif
 
 /* Test for task_info() */
diff --git a/src/connection.c b/src/connection.c
index fd9d5d17a..6ac1b99d9 100644
--- a/src/connection.c
+++ b/src/connection.c
@@ -156,14 +156,14 @@ void connTypeCleanupAll(void) {
 }
 
 /* walk all the connection types until has pending data */
-int connTypeHasPendingData(void) {
+int connTypeHasPendingData(struct aeEventLoop *el) {
     ConnectionType *ct;
     int type;
     int ret = 0;
 
     for (type = 0; type < CONN_TYPE_MAX; type++) {
         ct = connTypes[type];
-        if (ct && ct->has_pending_data && (ret = ct->has_pending_data())) {
+        if (ct && ct->has_pending_data && (ret = ct->has_pending_data(el))) {
             return ret;
         }
     }
@@ -172,7 +172,7 @@ int connTypeHasPendingData(void) {
 }
 
 /* walk all the connection types and process pending data for each connection type */
-int connTypeProcessPendingData(void) {
+int connTypeProcessPendingData(struct aeEventLoop *el) {
     ConnectionType *ct;
     int type;
     int ret = 0;
@@ -180,7 +180,7 @@ int connTypeProcessPendingData(void) {
     for (type = 0; type < CONN_TYPE_MAX; type++) {
         ct = connTypes[type];
         if (ct && ct->process_pending_data) {
-            ret += ct->process_pending_data();
+            ret += ct->process_pending_data(el);
         }
     }
 
diff --git a/src/connection.h b/src/connection.h
index a8c296d15..0ebc84489 100644
--- a/src/connection.h
+++ b/src/connection.h
@@ -60,8 +60,8 @@ typedef struct ConnectionType {
     int (*listen)(connListener *listener);
 
     /* create/shutdown/close connection */
-    connection* (*conn_create)(void);
-    connection* (*conn_create_accepted)(int fd, void *priv);
+    connection* (*conn_create)(struct aeEventLoop *el);
+    connection* (*conn_create_accepted)(struct aeEventLoop *el, int fd, void *priv);
     void (*shutdown)(struct connection *conn);
     void (*close)(struct connection *conn);
 
@@ -81,9 +81,13 @@ typedef struct ConnectionType {
     ssize_t (*sync_read)(struct connection *conn, char *ptr, ssize_t size, long long timeout);
     ssize_t (*sync_readline)(struct connection *conn, char *ptr, ssize_t size, long long timeout);
 
+    /* event loop */
+    void (*unbind_event_loop)(struct connection *conn);
+    int (*rebind_event_loop)(struct connection *conn, aeEventLoop *el);
+
     /* pending data */
-    int (*has_pending_data)(void);
-    int (*process_pending_data)(void);
+    int (*has_pending_data)(struct aeEventLoop *el);
+    int (*process_pending_data)(struct aeEventLoop *el);
 
     /* TLS specified methods */
     sds (*get_peer_cert)(struct connection *conn);
@@ -98,6 +102,7 @@ struct connection {
     short int refs;
     unsigned short int iovcnt;
     void *private_data;
+    struct aeEventLoop *el;
     ConnectionCallbackFunc conn_handler;
     ConnectionCallbackFunc write_handler;
     ConnectionCallbackFunc read_handler;
@@ -319,6 +324,28 @@ static inline int connHasReadHandler(connection *conn) {
     return conn->read_handler != NULL;
 }
 
+/* Returns true if the connection is bound to an event loop */
+static inline int connHasEventLoop(connection *conn) {
+    return conn->el != NULL;
+}
+
+/* Unbind the current event loop from the connection, so that it can be
+ * rebind to a different event loop in the future. */
+static inline void connUnbindEventLoop(connection *conn) {
+    if (conn->el == NULL) return;
+    connSetReadHandler(conn, NULL);
+    connSetWriteHandler(conn, NULL);
+    if (conn->type->unbind_event_loop)
+        conn->type->unbind_event_loop(conn);
+    conn->el = NULL;
+}
+
+/* Rebind the connection to another event loop, read/write handlers must not
+ * be installed in the current event loop */
+static inline int connRebindEventLoop(connection *conn, aeEventLoop *el) {
+    return conn->type->rebind_event_loop(conn, el);
+}
+
 /* Associate a private data pointer with the connection */
 static inline void connSetPrivateData(connection *conn, void *data) {
     conn->private_data = data;
@@ -379,14 +406,14 @@ ConnectionType *connectionTypeUnix(void);
 int connectionIndexByType(const char *typename);
 
 /* Create a connection of specified type */
-static inline connection *connCreate(ConnectionType *ct) {
-    return ct->conn_create();
+static inline connection *connCreate(struct aeEventLoop *el, ConnectionType *ct) {
+    return ct->conn_create(el);
 }
 
 /* Create an accepted connection of specified type.
  * priv is connection type specified argument */
-static inline connection *connCreateAccepted(ConnectionType *ct, int fd, void *priv) {
-    return ct->conn_create_accepted(fd, priv);
+static inline connection *connCreateAccepted(struct aeEventLoop *el, ConnectionType *ct, int fd, void *priv) {
+    return ct->conn_create_accepted(el, fd, priv);
 }
 
 /* Configure a connection type. A typical case is to configure TLS.
@@ -400,10 +427,10 @@ static inline int connTypeConfigure(ConnectionType *ct, void *priv, int reconfig
 void connTypeCleanupAll(void);
 
 /* Test all the connection type has pending data or not. */
-int connTypeHasPendingData(void);
+int connTypeHasPendingData(struct aeEventLoop *el);
 
 /* walk all the connection types and process pending data for each connection type */
-int connTypeProcessPendingData(void);
+int connTypeProcessPendingData(struct aeEventLoop *el);
 
 /* Listen on an initialized listener */
 static inline int connListen(connListener *listener) {
diff --git a/src/debug.c b/src/debug.c
index e40375fbe..c4d184b15 100644
--- a/src/debug.c
+++ b/src/debug.c
@@ -2451,6 +2451,8 @@ void removeSigSegvHandlers(void) {
 }
 
 void printCrashReport(void) {
+    server.crashing = 1;
+
     /* Log INFO and CLIENT LIST */
     logServerInfo();
 
diff --git a/src/eventnotifier.c b/src/eventnotifier.c
new file mode 100644
index 000000000..6dc3cf990
--- /dev/null
+++ b/src/eventnotifier.c
@@ -0,0 +1,97 @@
+/* eventnotifier.c -- An event notifier based on eventfd or pipe.
+ *
+ * Copyright (c) 2024-Present, Redis Ltd.
+ * All rights reserved.
+ *
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
+ */
+
+#include "eventnotifier.h"
+
+#include <stdint.h>
+#include <unistd.h>
+#include <fcntl.h>
+#ifdef HAVE_EVENT_FD
+#include <sys/eventfd.h>
+#endif
+
+#include "anet.h"
+#include "zmalloc.h"
+
+eventNotifier* createEventNotifier(void) {
+    eventNotifier *en = zmalloc(sizeof(eventNotifier));
+    if (!en) return NULL;
+
+#ifdef HAVE_EVENT_FD
+    if ((en->efd = eventfd(0, EFD_NONBLOCK| EFD_CLOEXEC)) != -1) {
+        return en;
+    }
+#else
+    if (anetPipe(en->pipefd, O_CLOEXEC|O_NONBLOCK, O_CLOEXEC|O_NONBLOCK) != -1) {
+        return en;
+    }
+#endif
+
+    /* Clean up if error. */
+    zfree(en);
+    return NULL;
+}
+
+int getReadEventFd(struct eventNotifier *en) {
+#ifdef HAVE_EVENT_FD
+    return en->efd;
+#else
+    return en->pipefd[0];
+#endif
+}
+
+int getWriteEventFd(struct eventNotifier *en) {
+#ifdef HAVE_EVENT_FD
+    return en->efd;
+#else
+    return en->pipefd[1];
+#endif
+}
+
+int triggerEventNotifier(struct eventNotifier *en) {
+#ifdef HAVE_EVENT_FD
+    uint64_t u = 1;
+    if (write(en->efd, &u, sizeof(uint64_t)) == -1) {
+        return EN_ERR;
+    }
+#else
+    char buf[1] = {'R'};
+    if (write(en->pipefd[1], buf, 1) == -1) {
+        return EN_ERR;
+    }
+#endif
+    return EN_OK;
+}
+
+int handleEventNotifier(struct eventNotifier *en) {
+#ifdef HAVE_EVENT_FD
+    uint64_t u;
+    if (read(en->efd, &u, sizeof(uint64_t)) == -1) {
+        return EN_ERR;
+    }
+#else
+    char buf[1];
+    if (read(en->pipefd[0], buf, 1) == -1) {
+        return EN_ERR;
+    }
+#endif
+    return EN_OK;
+}
+
+void freeEventNotifier(struct eventNotifier *en) {
+#ifdef HAVE_EVENT_FD
+    close(en->efd);
+#else
+    close(en->pipefd[0]);
+    close(en->pipefd[1]);
+#endif
+
+    /* Free memory */
+    zfree(en);
+}
diff --git a/src/eventnotifier.h b/src/eventnotifier.h
new file mode 100644
index 000000000..39e3b5113
--- /dev/null
+++ b/src/eventnotifier.h
@@ -0,0 +1,33 @@
+/* eventnotifier.h -- An event notifier based on eventfd or pipe.
+ *
+ * Copyright (c) 2024-Present, Redis Ltd.
+ * All rights reserved.
+ *
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
+ */
+
+#ifndef EVENTNOTIFIER_H
+#define EVENTNOTIFIER_H
+
+#include "config.h"
+
+#define EN_OK 0
+#define EN_ERR -1
+
+typedef struct eventNotifier {
+#ifdef HAVE_EVENT_FD
+    int efd;
+#else
+    int pipefd[2];
+#endif
+} eventNotifier;
+
+eventNotifier* createEventNotifier(void);
+int getReadEventFd(struct eventNotifier *en);
+int getWriteEventFd(struct eventNotifier *en);
+int triggerEventNotifier(struct eventNotifier *en);
+int handleEventNotifier(struct eventNotifier *en);
+void freeEventNotifier(struct eventNotifier *en);
+
+#endif
diff --git a/src/iothread.c b/src/iothread.c
new file mode 100644
index 000000000..2e5c98a28
--- /dev/null
+++ b/src/iothread.c
@@ -0,0 +1,631 @@
+/* iothread.c -- The threaded io implementation.
+ *
+ * Copyright (c) 2024-Present, Redis Ltd.
+ * All rights reserved.
+ *
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
+ */
+
+#include "server.h"
+
+/* IO threads. */
+static IOThread IOThreads[IO_THREADS_MAX_NUM];
+
+/* For main thread */
+static list *mainThreadPendingClientsToIOThreads[IO_THREADS_MAX_NUM]; /* Clients to IO threads */
+static list *mainThreadProcessingClients[IO_THREADS_MAX_NUM]; /* Clients in processing */
+static list *mainThreadPendingClients[IO_THREADS_MAX_NUM]; /* Pending clients from IO threads */
+static pthread_mutex_t mainThreadPendingClientsMutexes[IO_THREADS_MAX_NUM]; /* Mutex for pending clients */
+static eventNotifier* mainThreadPendingClientsNotifiers[IO_THREADS_MAX_NUM]; /* Notifier for pending clients */
+
+/* When IO threads read a complete query of clients or want to free clients, it
+ * should remove it from its clients list and put the client in the list to main
+ * thread, we will send these clients to main thread in IOThreadBeforeSleep. */
+void enqueuePendingClientsToMainThread(client *c, int unbind) {
+    /* If the IO thread may no longer manage it, such as closing client, we should
+     * unbind client from event loop, so main thread doesn't need to do it costly. */
+    if (unbind) connUnbindEventLoop(c->conn);
+    /* Just skip if it already is transferred. */
+    if (c->io_thread_client_list_node) {
+        listDelNode(IOThreads[c->tid].clients, c->io_thread_client_list_node);
+        c->io_thread_client_list_node = NULL;
+        /* Disable read and write to avoid race when main thread processes. */
+        c->io_flags &= ~(CLIENT_IO_READ_ENABLED | CLIENT_IO_WRITE_ENABLED);
+        listAddNodeTail(IOThreads[c->tid].pending_clients_to_main_thread, c);
+    }
+}
+
+/* Unbind connection of client from io thread event loop, write and read handlers
+ * also be removed, ensures that we can operate the client safely. */
+void unbindClientFromIOThreadEventLoop(client *c) {
+    serverAssert(c->tid != IOTHREAD_MAIN_THREAD_ID &&
+                 c->running_tid == IOTHREAD_MAIN_THREAD_ID);
+    if (!connHasEventLoop(c->conn)) return;
+    /* As calling in main thread, we should pause the io thread to make it safe. */
+    pauseIOThread(c->tid);
+    connUnbindEventLoop(c->conn);
+    resumeIOThread(c->tid);
+}
+
+/* When main thread is processing a client from IO thread, and wants to keep it,
+ * we should unbind connection of client from io thread event loop first,
+ * and then bind the client connection into server's event loop. */
+void keepClientInMainThread(client *c) {
+    serverAssert(c->tid != IOTHREAD_MAIN_THREAD_ID &&
+                 c->running_tid == IOTHREAD_MAIN_THREAD_ID);
+    /* IO thread no longer manage it. */
+    server.io_threads_clients_num[c->tid]--;
+    /* Unbind connection of client from io thread event loop. */
+    unbindClientFromIOThreadEventLoop(c);
+    /* Let main thread to run it, rebind event loop and read handler */
+    connRebindEventLoop(c->conn, server.el);
+    connSetReadHandler(c->conn, readQueryFromClient);
+    c->io_flags |= CLIENT_IO_READ_ENABLED | CLIENT_IO_WRITE_ENABLED;
+    c->running_tid = IOTHREAD_MAIN_THREAD_ID;
+    c->tid = IOTHREAD_MAIN_THREAD_ID;
+    /* Main thread starts to manage it. */
+    server.io_threads_clients_num[c->tid]++;
+}
+
+/* If the client is managed by IO thread, we should fetch it from IO thread
+ * and then main thread will can process it. Just like IO Thread transfers
+ * the client to the main thread for processing. */
+void fetchClientFromIOThread(client *c) {
+    serverAssert(c->tid != IOTHREAD_MAIN_THREAD_ID &&
+                 c->running_tid != IOTHREAD_MAIN_THREAD_ID);
+    pauseIOThread(c->tid);
+    /* Remove the client from clients list of IO thread or main thread. */
+    if (c->io_thread_client_list_node) {
+        listDelNode(IOThreads[c->tid].clients, c->io_thread_client_list_node);
+        c->io_thread_client_list_node = NULL;
+    } else {
+        list *clients[5] = {
+            IOThreads[c->tid].pending_clients,
+            IOThreads[c->tid].pending_clients_to_main_thread,
+            mainThreadPendingClients[c->tid],
+            mainThreadProcessingClients[c->tid],
+            mainThreadPendingClientsToIOThreads[c->tid]
+        };
+        for (int i = 0; i < 5; i++) {
+            listNode *ln = listSearchKey(clients[i], c);
+            if (ln) {
+                listDelNode(clients[i], ln);
+                /* Client only can be in one client list. */
+                break;
+            }
+        }
+    }
+    /* Unbind connection of client from io thread event loop. */
+    connUnbindEventLoop(c->conn);
+    /* Now main thread can process it. */
+    c->running_tid = IOTHREAD_MAIN_THREAD_ID;
+    resumeIOThread(c->tid);
+}
+
+/* For some clients, we must handle them in the main thread, since there is
+ * data race to be processed in IO threads.
+ *
+ * - Close ASAP, we must free the client in main thread.
+ * - Replica, pubsub, monitor, blocked, tracking clients, main thread may
+ *   directly write them a reply when conditions are met.
+ * - Script command with debug may operate connection directly. */
+int isClientMustHandledByMainThread(client *c) {
+    if (c->flags & (CLIENT_CLOSE_ASAP | CLIENT_MASTER | CLIENT_SLAVE |
+                    CLIENT_PUBSUB | CLIENT_MONITOR | CLIENT_BLOCKED |
+                    CLIENT_UNBLOCKED | CLIENT_TRACKING | CLIENT_LUA_DEBUG |
+                    CLIENT_LUA_DEBUG_SYNC))
+    {
+        return 1;
+    }
+    return 0;
+}
+
+/* When the main thread accepts a new client or transfers clients to IO threads,
+ * it assigns the client to the IO thread with the fewest clients. */
+void assignClientToIOThread(client *c) {
+    serverAssert(c->tid == IOTHREAD_MAIN_THREAD_ID);
+    /* Find the IO thread with the fewest clients. */
+    int min_id = 0;
+    int min = INT_MAX;
+    for (int i = 1; i < server.io_threads_num; i++) {
+        if (server.io_threads_clients_num[i] < min) {
+            min = server.io_threads_clients_num[i];
+            min_id = i;
+        }
+    }
+
+    /* Assign the client to the IO thread. */
+    server.io_threads_clients_num[c->tid]--;
+    c->tid = min_id;
+    c->running_tid = min_id;
+    server.io_threads_clients_num[min_id]++;
+
+    /* Unbind connection of client from main thread event loop, disable read and
+     * write, and then put it in the list, main thread will send these clients
+     * to IO thread in beforeSleep. */
+    connUnbindEventLoop(c->conn);
+    c->io_flags &= ~(CLIENT_IO_READ_ENABLED | CLIENT_IO_WRITE_ENABLED);
+    listAddNodeTail(mainThreadPendingClientsToIOThreads[c->tid], c);
+}
+
+/* If updating maxclients config, we not only resize the event loop of main thread
+ * but also resize the event loop of all io threads, and if one thread is failed,
+ * it is failed totally, since a fd can be distributed into any IO thread. */
+int resizeAllIOThreadsEventLoops(size_t newsize) {
+    int result = AE_OK;
+    if (server.io_threads_num <= 1) return result;
+
+    /* To make context safe. */
+    pauseAllIOThreads();
+    for (int i = 1; i < server.io_threads_num; i++) {
+        IOThread *t = &IOThreads[i];
+        if (aeResizeSetSize(t->el, newsize) == AE_ERR)
+            result = AE_ERR;
+    }
+    resumeAllIOThreads();
+    return result;
+}
+
+/* In the main thread, we may want to operate data of io threads, maybe uninstall
+ * event handler, access query/output buffer or resize event loop, we need a clean
+ * and safe context to do that. We pause io thread in IOThreadBeforeSleep, do some
+ * jobs and then resume it. To avoid thread suspended, we use busy waiting to confirm
+ * the target status. Besides we use atomic variable to make sure memory visibility
+ * and ordering.
+ *
+ * Make sure that only the main thread can call these function,
+ *  - pauseIOThread, resumeIOThread
+ *  - pauseAllIOThreads, resumeAllIOThreads
+ *  - pauseIOThreadsRange, resumeIOThreadsRange
+ *
+ * The main thread will pause the io thread, and then wait for the io thread to
+ * be paused. The io thread will check the paused status in IOThreadBeforeSleep,
+ * and then pause itself.
+ *
+ * The main thread will resume the io thread, and then wait for the io thread to
+ * be resumed. The io thread will check the paused status in IOThreadBeforeSleep,
+ * and then resume itself.
+ */
+
+/* We may pause the same io thread nestedly, so we need to record the times of
+ * pausing, and only when the times of pausing is 0, we can pause the io thread,
+ * and only when the times of pausing is 1, we can resume the io thread. */
+static int PausedIOThreads[IO_THREADS_MAX_NUM] = {0};
+
+/* Pause the specific range of io threads, and wait for them to be paused. */
+void pauseIOThreadsRange(int start, int end) {
+    if (server.io_threads_num <= 1) return;
+    serverAssert(start >= 1 && end < server.io_threads_num && start <= end);
+    serverAssert(pthread_equal(pthread_self(), server.main_thread_id));
+
+    /* Try to make all io threads paused in parallel */
+    for (int i = start; i <= end; i++) {
+        PausedIOThreads[i]++;
+        /* Skip if already paused */
+        if (PausedIOThreads[i] > 1) continue;
+
+        int paused;
+        atomicGetWithSync(IOThreads[i].paused, paused);
+        /* Don't support to call reentrant */
+        serverAssert(paused == IO_THREAD_UNPAUSED);
+        atomicSetWithSync(IOThreads[i].paused, IO_THREAD_PAUSING);
+        /* Just notify io thread, no actual job, since io threads check paused
+         * status in IOThreadBeforeSleep, so just wake it up if polling wait. */
+        triggerEventNotifier(IOThreads[i].pending_clients_notifier);
+    }
+
+    /* Wait for all io threads paused */
+    for (int i = start; i <= end; i++) {
+        if (PausedIOThreads[i] > 1) continue;
+        int paused = IO_THREAD_PAUSING;
+        while (paused != IO_THREAD_PAUSED) {
+            atomicGetWithSync(IOThreads[i].paused, paused);
+        }
+    }
+}
+
+/* Resume the specific range of io threads, and wait for them to be resumed. */
+void resumeIOThreadsRange(int start, int end) {
+    if (server.io_threads_num <= 1) return;
+    serverAssert(start >= 1 && end < server.io_threads_num && start <= end);
+    serverAssert(pthread_equal(pthread_self(), server.main_thread_id));
+
+    for (int i = start; i <= end; i++) {
+        serverAssert(PausedIOThreads[i] > 0);
+        PausedIOThreads[i]--;
+        if (PausedIOThreads[i] > 0) continue;
+
+        int paused;
+        /* Check if it is paused, since we must call 'pause' and
+         * 'resume' in pairs */
+        atomicGetWithSync(IOThreads[i].paused, paused);
+        serverAssert(paused == IO_THREAD_PAUSED);
+        /* Resume */
+        atomicSetWithSync(IOThreads[i].paused, IO_THREAD_RESUMING);
+        while (paused != IO_THREAD_UNPAUSED) {
+            atomicGetWithSync(IOThreads[i].paused, paused);
+        }
+    }
+}
+
+/* The IO thread checks whether it is being paused, and if so, it pauses itself
+ * and waits for resuming, corresponding to the pause/resumeIOThread* functions.
+ * Currently, this is only called in IOThreadBeforeSleep, as there are no pending
+ * I/O events at this point, with a clean context. */
+void handlePauseAndResume(IOThread *t) {
+    int paused;
+    /* Check if i am being paused. */
+    atomicGetWithSync(t->paused, paused);
+    if (paused == IO_THREAD_PAUSING) {
+        atomicSetWithSync(t->paused, IO_THREAD_PAUSED);
+        /* Wait for resuming */
+        while (paused != IO_THREAD_RESUMING) {
+            atomicGetWithSync(t->paused, paused);
+        }
+        atomicSetWithSync(t->paused, IO_THREAD_UNPAUSED);
+    }
+}
+
+/* Pause the specific io thread, and wait for it to be paused. */
+void pauseIOThread(int id) {
+    pauseIOThreadsRange(id, id);
+}
+
+/* Resume the specific io thread, and wait for it to be resumed. */
+void resumeIOThread(int id) {
+    resumeIOThreadsRange(id, id);
+}
+
+/* Pause all io threads, and wait for them to be paused. */
+void pauseAllIOThreads(void) {
+    pauseIOThreadsRange(1, server.io_threads_num-1);
+}
+
+/* Resume all io threads, and wait for them to be resumed. */
+void resumeAllIOThreads(void) {
+    resumeIOThreadsRange(1, server.io_threads_num-1);
+}
+
+/* Add the pending clients to the list of IO threads, and trigger an event to
+ * notify io threads to handle. */
+int sendPendingClientsToIOThreads(void) {
+    int processed = 0;
+    for (int i = 1; i < server.io_threads_num; i++) {
+        int len = listLength(mainThreadPendingClientsToIOThreads[i]);
+        if (len > 0) {
+            IOThread *t = &IOThreads[i];
+            pthread_mutex_lock(&t->pending_clients_mutex);
+            listJoin(t->pending_clients, mainThreadPendingClientsToIOThreads[i]);
+            pthread_mutex_unlock(&t->pending_clients_mutex);
+            /* Trigger an event, maybe an error is returned when buffer is full
+             * if using pipe, but no worry, io thread will handle all clients
+             * in list when receiving a notification. */
+            triggerEventNotifier(t->pending_clients_notifier);
+        }
+        processed += len;
+    }
+    return processed;
+}
+
+extern int ProcessingEventsWhileBlocked;
+
+/* The main thread processes the clients from IO threads, these clients may have
+ * a complete command to execute or need to be freed. Note that IO threads never
+ * free client since this operation access much server data.
+ *
+ * Please notice that this function may be called reentrantly, i,e, the same goes
+ * for handleClientsFromIOThread and processClientsOfAllIOThreads. For example,
+ * when processing script command, it may call processEventsWhileBlocked to
+ * process new events, if the clients with fired events from the same io thread,
+ * it may call this function reentrantly. */
+void processClientsFromIOThread(IOThread *t) {
+    listNode *node = NULL;
+
+    while (listLength(mainThreadProcessingClients[t->id])) {
+        /* Each time we pop up only the first client to process to guarantee
+         * reentrancy safety. */
+        if (node) zfree(node);
+        node = listFirst(mainThreadProcessingClients[t->id]);
+        listUnlinkNode(mainThreadProcessingClients[t->id], node);
+        client *c = listNodeValue(node);
+
+        /* Make sure the client is readable or writable in io thread to
+         * avoid data race. */
+        serverAssert(!(c->io_flags & (CLIENT_IO_READ_ENABLED | CLIENT_IO_WRITE_ENABLED)));
+        serverAssert(!(c->flags & CLIENT_CLOSE_ASAP));
+
+        /* Let main thread to run it, set running thread id first. */
+        c->running_tid = IOTHREAD_MAIN_THREAD_ID;
+
+        /* If a read error occurs, handle it in the main thread first, since we
+         * want to print logs about client information before freeing. */
+        if (c->read_error) handleClientReadError(c);
+
+        /* The client is asked to close in IO thread. */
+        if (c->io_flags & CLIENT_IO_CLOSE_ASAP) {
+            freeClient(c);
+            continue;
+        }
+
+        /* Update the client in the mem usage */
+        updateClientMemUsageAndBucket(c);
+
+        /* Process the pending command and input buffer. */
+        if (!c->read_error && c->io_flags & CLIENT_IO_PENDING_COMMAND) {
+            c->flags |= CLIENT_PENDING_COMMAND;
+            if (processPendingCommandAndInputBuffer(c) == C_ERR) {
+                /* If the client is no longer valid, it must be freed safely. */
+                continue;
+            }
+        }
+
+        /* We may have pending replies if io thread may not finish writing
+         * reply to client, so we did not put the client in pending write
+         * queue. And we should do that first since we may keep the client
+         * in main thread instead of returning to io threads. */
+        if (!(c->flags & CLIENT_PENDING_WRITE) && clientHasPendingReplies(c))
+            putClientInPendingWriteQueue(c);
+
+        /* The client only can be processed in the main thread, otherwise data
+         * race will happen, since we may touch client's data in main thread. */
+        if (isClientMustHandledByMainThread(c)) {
+            keepClientInMainThread(c);
+            continue;
+        }
+
+        /* Remove this client from pending write clients queue of main thread,
+         * And some clients may do not have reply if CLIENT REPLY OFF/SKIP. */
+        if (c->flags & CLIENT_PENDING_WRITE) {
+            c->flags &= ~CLIENT_PENDING_WRITE;
+            listUnlinkNode(server.clients_pending_write, &c->clients_pending_write_node);
+        }
+        c->running_tid = c->tid;
+        listLinkNodeHead(mainThreadPendingClientsToIOThreads[c->tid], node);
+        node = NULL;
+    }
+    if (node) zfree(node);
+
+    /* Trigger the io thread to handle these clients ASAP to make them processed
+     * in parallel.
+     *
+     * If AOF fsync policy is always, we should not let io thread handle these
+     * clients now since we don't flush AOF buffer to file and sync yet.
+     * So these clients will be delayed to send io threads in beforeSleep after
+     * flushAppendOnlyFile. 
+     * 
+     * If we are in processEventsWhileBlocked, we don't send clients to io threads
+     * now, we want to update server.events_processed_while_blocked accurately. */
+    if (listLength(mainThreadPendingClientsToIOThreads[t->id]) &&
+        server.aof_fsync != AOF_FSYNC_ALWAYS &&
+        !ProcessingEventsWhileBlocked)
+    {
+        pthread_mutex_lock(&(t->pending_clients_mutex));
+        listJoin(t->pending_clients, mainThreadPendingClientsToIOThreads[t->id]);
+        pthread_mutex_unlock(&(t->pending_clients_mutex));
+        triggerEventNotifier(t->pending_clients_notifier);
+    }
+}
+
+/* When the io thread finishes processing the client with the read event, it will
+ * notify the main thread through event triggering in IOThreadBeforeSleep. The main
+ * thread handles the event through this function. */
+void handleClientsFromIOThread(struct aeEventLoop *el, int fd, void *ptr, int mask) {
+    UNUSED(el);
+    UNUSED(mask);
+
+    IOThread *t = ptr;
+
+    /* Handle fd event first. */
+    serverAssert(fd == getReadEventFd(mainThreadPendingClientsNotifiers[t->id]));
+    handleEventNotifier(mainThreadPendingClientsNotifiers[t->id]);
+
+    /* Get the list of clients to process. */
+    pthread_mutex_lock(&mainThreadPendingClientsMutexes[t->id]);
+    listJoin(mainThreadProcessingClients[t->id], mainThreadPendingClients[t->id]);
+    pthread_mutex_unlock(&mainThreadPendingClientsMutexes[t->id]);
+    if (listLength(mainThreadProcessingClients[t->id]) == 0) return;
+
+    /* Process the clients from IO threads. */
+    processClientsFromIOThread(t);
+}
+
+/* In the new threaded io design, one thread may process multiple clients, so when
+ * an io thread notifies the main thread of an event, there may be multiple clients
+ * with commands that need to be processed. But in the event handler function
+ * handleClientsFromIOThread may be blocked when processing the specific command,
+ * the previous clients can not get a reply, and the subsequent clients can not be
+ * processed, so we need to handle this scenario in beforeSleep. The function is to
+ * process the commands of subsequent clients from io threads. And another function
+ * sendPendingClientsToIOThreads make sure clients from io thread can get replies.
+ * See also beforeSleep. */
+void processClientsOfAllIOThreads(void) {
+    for (int i = 1; i < server.io_threads_num; i++) {
+        processClientsFromIOThread(&IOThreads[i]);
+    }
+}
+
+/* After the main thread processes the clients, it will send the clients back to
+ * io threads to handle, and fire an event, the io thread handles the event by
+ * this function. If the client is not binded to the event loop, we should bind
+ * it first and install read handler, and we don't uninstall client read handler
+ * unless freeing client. If the client has pending reply, we just reply to client
+ * first, and then install write handler if needed. */
+void handleClientsFromMainThread(struct aeEventLoop *ae, int fd, void *ptr, int mask) {
+    UNUSED(ae);
+    UNUSED(mask);
+
+    IOThread *t = ptr;
+
+    /* Handle fd event first. */
+    serverAssert(fd == getReadEventFd(t->pending_clients_notifier));
+    handleEventNotifier(t->pending_clients_notifier);
+
+    pthread_mutex_lock(&t->pending_clients_mutex);
+    listJoin(t->processing_clients, t->pending_clients);
+    pthread_mutex_unlock(&t->pending_clients_mutex);
+    if (listLength(t->processing_clients) == 0) return;
+
+    listIter li;
+    listNode *ln;
+    listRewind(t->processing_clients, &li);
+    while((ln = listNext(&li))) {
+        client *c = listNodeValue(ln);
+        serverAssert(!(c->io_flags & (CLIENT_IO_READ_ENABLED | CLIENT_IO_WRITE_ENABLED)));
+        /* Main thread must handle clients with CLIENT_CLOSE_ASAP flag, since
+         * we only set io_flags when clients in io thread are freed ASAP. */
+        serverAssert(!(c->flags & CLIENT_CLOSE_ASAP));
+
+        /* Link client in IO thread clients list first. */
+        serverAssert(c->io_thread_client_list_node == NULL);
+        listAddNodeTail(t->clients, c);
+        c->io_thread_client_list_node = listLast(t->clients);
+
+        /* The client is asked to close, we just let main thread free it. */
+        if (c->io_flags & CLIENT_IO_CLOSE_ASAP) {
+            enqueuePendingClientsToMainThread(c, 1);
+            continue;
+        }
+
+        /* Enable read and write and reset some flags. */
+        c->io_flags |= CLIENT_IO_READ_ENABLED | CLIENT_IO_WRITE_ENABLED;
+        c->io_flags &= ~CLIENT_IO_PENDING_COMMAND;
+
+        /* Only bind once, we never remove read handler unless freeing client. */
+        if (!connHasEventLoop(c->conn)) {
+            connRebindEventLoop(c->conn, t->el);
+            serverAssert(!connHasReadHandler(c->conn));
+            connSetReadHandler(c->conn, readQueryFromClient);
+        }
+
+        /* If the client has pending replies, write replies to client. */
+        if (clientHasPendingReplies(c)) {
+            writeToClient(c, 0);
+            if (!(c->io_flags & CLIENT_IO_CLOSE_ASAP) && clientHasPendingReplies(c)) {
+                connSetWriteHandler(c->conn, sendReplyToClient);
+            }
+        }
+    }
+    listEmpty(t->processing_clients);
+}
+
+void IOThreadBeforeSleep(struct aeEventLoop *el) {
+    IOThread *t = el->privdata[0];
+
+    /* Handle pending data(typical TLS). */
+    connTypeProcessPendingData(el);
+
+    /* If any connection type(typical TLS) still has pending unread data don't sleep at all. */
+    aeSetDontWait(el, connTypeHasPendingData(el));
+
+    /* Check if i am being paused, pause myself and resume. */
+    handlePauseAndResume(t);
+
+    /* Check if there are clients to be processed in main thread, and then join
+     * them to the list of main thread. */
+    if (listLength(t->pending_clients_to_main_thread) > 0) {
+        pthread_mutex_lock(&mainThreadPendingClientsMutexes[t->id]);
+        listJoin(mainThreadPendingClients[t->id], t->pending_clients_to_main_thread);
+        pthread_mutex_unlock(&mainThreadPendingClientsMutexes[t->id]);
+        /* Trigger an event, maybe an error is returned when buffer is full
+         * if using pipe, but no worry, main thread will handle all clients
+         * in list when receiving a notification. */
+        triggerEventNotifier(mainThreadPendingClientsNotifiers[t->id]);
+    }
+}
+
+/* The main function of IO thread, it will run an event loop. The mian thread
+ * and IO thread will communicate through event notifier. */
+void *IOThreadMain(void *ptr) {
+    IOThread *t = ptr;
+    char thdname[16];
+    snprintf(thdname, sizeof(thdname), "io_thd_%d", t->id);
+    redis_set_thread_title(thdname);
+    redisSetCpuAffinity(server.server_cpulist);
+    makeThreadKillable();
+    aeSetBeforeSleepProc(t->el, IOThreadBeforeSleep);
+    aeMain(t->el);
+    return NULL;
+}
+
+/* Initialize the data structures needed for threaded I/O. */
+void initThreadedIO(void) {
+    if (server.io_threads_num <= 1) return;
+
+    server.io_threads_active = 1;
+
+    if (server.io_threads_num > IO_THREADS_MAX_NUM) {
+        serverLog(LL_WARNING,"Fatal: too many I/O threads configured. "
+                             "The maximum number is %d.", IO_THREADS_MAX_NUM);
+        exit(1);
+    }
+
+    /* Spawn and initialize the I/O threads. */
+    for (int i = 1; i < server.io_threads_num; i++) {
+        IOThread *t = &IOThreads[i];
+        t->id = i;
+        t->el = aeCreateEventLoop(server.maxclients+CONFIG_FDSET_INCR);
+        t->el->privdata[0] = t;
+        t->pending_clients = listCreate();
+        t->processing_clients = listCreate();
+        t->pending_clients_to_main_thread = listCreate();
+        t->clients = listCreate();
+        atomicSetWithSync(t->paused, IO_THREAD_UNPAUSED);
+
+        pthread_mutexattr_t *attr = NULL;
+        #if defined(__linux__) && defined(__GLIBC__)
+        attr = zmalloc(sizeof(pthread_mutexattr_t));
+        pthread_mutexattr_init(attr);
+        pthread_mutexattr_settype(attr, PTHREAD_MUTEX_ADAPTIVE_NP);
+        #endif
+        pthread_mutex_init(&t->pending_clients_mutex, attr);
+
+        t->pending_clients_notifier = createEventNotifier();
+        if (aeCreateFileEvent(t->el, getReadEventFd(t->pending_clients_notifier),
+                              AE_READABLE, handleClientsFromMainThread, t) != AE_OK)
+        {
+            serverLog(LL_WARNING, "Fatal: Can't register file event for IO thread notifications.");
+            exit(1);
+        }
+
+        /* Create IO thread */
+        if (pthread_create(&t->tid, NULL, IOThreadMain, (void*)t) != 0) {
+            serverLog(LL_WARNING, "Fatal: Can't initialize IO thread.");
+            exit(1);
+        }
+
+        /* For main thread */
+        mainThreadPendingClientsToIOThreads[i] = listCreate();
+        mainThreadPendingClients[i] = listCreate();
+        mainThreadProcessingClients[i] = listCreate();
+        pthread_mutex_init(&mainThreadPendingClientsMutexes[i], attr);
+        mainThreadPendingClientsNotifiers[i] = createEventNotifier();
+        if (aeCreateFileEvent(server.el, getReadEventFd(mainThreadPendingClientsNotifiers[i]),
+                              AE_READABLE, handleClientsFromIOThread, t) != AE_OK)
+        {
+            serverLog(LL_WARNING, "Fatal: Can't register file event for main thread notifications.");
+            exit(1);
+        }
+        if (attr) zfree(attr);
+    }
+}
+
+/* Kill the IO threads, TODO: release the applied resources. */
+void killIOThreads(void) {
+    if (server.io_threads_num <= 1) return;
+
+    int err, j;
+    for (j = 1; j < server.io_threads_num; j++) {
+        if (IOThreads[j].tid == pthread_self()) continue;
+        if (IOThreads[j].tid && pthread_cancel(IOThreads[j].tid) == 0) {
+            if ((err = pthread_join(IOThreads[j].tid,NULL)) != 0) {
+                serverLog(LL_WARNING,
+                    "IO thread(tid:%lu) can not be joined: %s",
+                        (unsigned long)IOThreads[j].tid, strerror(err));
+            } else {
+                serverLog(LL_WARNING,
+                    "IO thread(tid:%lu) terminated",(unsigned long)IOThreads[j].tid);
+            }
+        }
+    }
+}
diff --git a/src/multi.c b/src/multi.c
index 6d1ba5697..1956c3dd8 100644
--- a/src/multi.c
+++ b/src/multi.c
@@ -355,7 +355,12 @@ int isWatchedKeyExpired(client *c) {
 }
 
 /* "Touch" a key, so that if this key is being WATCHed by some client the
- * next EXEC will fail. */
+ * next EXEC will fail.
+ *
+ * Sanitizer suppression: IO threads also read c->flags, but never modify
+ * it or read the CLIENT_DIRTY_CAS bit, main thread just only modifies
+ * this bit, so there is actually no real data race. */
+REDIS_NO_SANITIZE("thread")
 void touchWatchedKey(redisDb *db, robj *key) {
     list *clients;
     listIter li;
@@ -404,6 +409,7 @@ void touchWatchedKey(redisDb *db, robj *key) {
  * replaced_with: for SWAPDB, the WATCH should be invalidated if
  * the key exists in either of them, and skipped only if it
  * doesn't exist in both. */
+REDIS_NO_SANITIZE("thread")
 void touchAllWatchedKeysInDb(redisDb *emptied, redisDb *replaced_with) {
     listIter li;
     listNode *ln;
diff --git a/src/networking.c b/src/networking.c
index 9a9515f77..8fb37af08 100644
--- a/src/networking.c
+++ b/src/networking.c
@@ -24,7 +24,6 @@
 
 static void setProtocolError(const char *errstr, client *c);
 static void pauseClientsByClient(mstime_t end, int isPauseClientAll);
-int postponeClientRead(client *c);
 char *getClientSockname(client *c);
 static inline int clientTypeIsSlave(client *c);
 int ProcessingEventsWhileBlocked = 0; /* See processEventsWhileBlocked(). */
@@ -132,6 +131,9 @@ client *createClient(connection *conn) {
     uint64_t client_id;
     atomicGetIncr(server.next_client_id, client_id, 1);
     c->id = client_id;
+    c->tid = IOTHREAD_MAIN_THREAD_ID;
+    c->running_tid = IOTHREAD_MAIN_THREAD_ID;
+    if (conn) server.io_threads_clients_num[c->tid]++;
 #ifdef LOG_REQ_RES
     reqresReset(c, 0);
     c->resp = server.client_default_resp;
@@ -163,6 +165,8 @@ client *createClient(connection *conn) {
     c->bulklen = -1;
     c->sentlen = 0;
     c->flags = 0;
+    c->io_flags = CLIENT_IO_READ_ENABLED | CLIENT_IO_WRITE_ENABLED;
+    c->read_error = 0;
     c->slot = -1;
     c->ctime = c->lastinteraction = server.unixtime;
     c->duration = 0;
@@ -195,8 +199,8 @@ client *createClient(connection *conn) {
     c->peerid = NULL;
     c->sockname = NULL;
     c->client_list_node = NULL;
+    c->io_thread_client_list_node = NULL;
     c->postponed_list_node = NULL;
-    c->pending_read_list_node = NULL;
     c->client_tracking_redirection = 0;
     c->client_tracking_prefixes = NULL;
     c->last_memory_usage = 0;
@@ -300,13 +304,8 @@ int prepareClientToWrite(client *c) {
     if (!c->conn) return C_ERR; /* Fake client for AOF loading. */
 
     /* Schedule the client to write the output buffers to the socket, unless
-     * it should already be setup to do so (it has already pending data).
-     *
-     * If CLIENT_PENDING_READ is set, we're in an IO thread and should
-     * not put the client in pending write queue. Instead, it will be
-     * done by handleClientsWithPendingReadsUsingThreads() upon return.
-     */
-    if (!clientHasPendingReplies(c) && io_threads_op == IO_THREADS_OP_IDLE)
+     * it should already be setup to do so (it has already pending data). */
+    if (!clientHasPendingReplies(c) && likely(c->running_tid == IOTHREAD_MAIN_THREAD_ID))
         putClientInPendingWriteQueue(c);
 
     /* Authorize the caller to queue in the output buffer of this client. */
@@ -1359,6 +1358,9 @@ void clientAcceptHandler(connection *conn) {
     moduleFireServerEvent(REDISMODULE_EVENT_CLIENT_CHANGE,
                           REDISMODULE_SUBEVENT_CLIENT_CHANGE_CONNECTED,
                           c);
+
+    /* Assign the client to an IO thread */
+    if (server.io_threads_num > 1) assignClientToIOThread(c);
 }
 
 void acceptCommonHandler(connection *conn, int flags, char *ip) {
@@ -1547,14 +1549,6 @@ void unlinkClient(client *c) {
         c->flags &= ~CLIENT_PENDING_WRITE;
     }
 
-    /* Remove from the list of pending reads if needed. */
-    serverAssert(!c->conn || io_threads_op == IO_THREADS_OP_IDLE);
-    if (c->pending_read_list_node != NULL) {
-        listDelNode(server.clients_pending_read,c->pending_read_list_node);
-        c->pending_read_list_node = NULL;
-    }
-
-
     /* When client was just unblocked because of a blocking operation,
      * remove it from the list of unblocked clients. */
     if (c->flags & CLIENT_UNBLOCKED) {
@@ -1631,7 +1625,7 @@ void deauthenticateAndCloseClient(client *c) {
  * If any data remained in the buffer, the client will take ownership of the buffer
  * and a new empty buffer will be allocated for the reusable buffer. */
 static void resetReusableQueryBuf(client *c) {
-    serverAssert(c->flags & CLIENT_REUSABLE_QUERYBUFFER);
+    serverAssert(c->io_flags & CLIENT_IO_REUSABLE_QUERYBUFFER);
     if (c->querybuf != thread_reusable_qb || sdslen(c->querybuf) > c->qb_pos) {
         /* If querybuf has been reallocated or there is still data left,
          * let the client take ownership of the reusable buffer. */
@@ -1645,7 +1639,7 @@ static void resetReusableQueryBuf(client *c) {
 
     /* Mark that the client is no longer using the reusable query buffer
      * and indicate that it is no longer used by any client. */
-    c->flags &= ~CLIENT_REUSABLE_QUERYBUFFER;
+    c->io_flags &= ~CLIENT_IO_REUSABLE_QUERYBUFFER;
     thread_reusable_qb_used = 0;
 }
 
@@ -1659,6 +1653,19 @@ void freeClient(client *c) {
         return;
     }
 
+    /* If the client is running in io thread, we can't free it directly. */
+    if (c->running_tid != IOTHREAD_MAIN_THREAD_ID) {
+        fetchClientFromIOThread(c);
+    }
+
+    /* We need to unbind connection of client from io thread event loop first. */
+    if (c->tid != IOTHREAD_MAIN_THREAD_ID) {
+        unbindClientFromIOThreadEventLoop(c);
+    }
+
+    /* Update the number of clients in the IO thread. */
+    if (c->conn) server.io_threads_clients_num[c->tid]--;
+
     /* For connected clients, call the disconnection event of modules hooks. */
     if (c->conn) {
         moduleFireServerEvent(REDISMODULE_EVENT_CLIENT_CHANGE,
@@ -1703,7 +1710,7 @@ void freeClient(client *c) {
     }
 
     /* Free the query buffer */
-    if (c->flags & CLIENT_REUSABLE_QUERYBUFFER)
+    if (c->io_flags & CLIENT_IO_REUSABLE_QUERYBUFFER)
         resetReusableQueryBuf(c);
     sdsfree(c->querybuf);
     c->querybuf = NULL;
@@ -1816,25 +1823,24 @@ void freeClient(client *c) {
  * a context where calling freeClient() is not possible, because the client
  * should be valid for the continuation of the flow of the program. */
 void freeClientAsync(client *c) {
-    /* We need to handle concurrent access to the server.clients_to_close list
-     * only in the freeClientAsync() function, since it's the only function that
-     * may access the list while Redis uses I/O threads. All the other accesses
-     * are in the context of the main thread while the other threads are
-     * idle. */
+    if (c->running_tid != IOTHREAD_MAIN_THREAD_ID) {
+        int main_thread = pthread_equal(pthread_self(), server.main_thread_id);
+        /* Make sure the main thread can access IO thread data safely. */
+        if (main_thread) pauseIOThread(c->tid);
+        if (!(c->flags & CLIENT_IO_CLOSE_ASAP)) {
+            c->io_flags |= CLIENT_IO_CLOSE_ASAP;
+            enqueuePendingClientsToMainThread(c, 1);
+        }
+        if (main_thread) resumeIOThread(c->tid);
+        return;
+    }
+
     if (c->flags & CLIENT_CLOSE_ASAP || c->flags & CLIENT_SCRIPT) return;
     c->flags |= CLIENT_CLOSE_ASAP;
     /* Replicas that was marked as CLIENT_CLOSE_ASAP should not keep the
      * replication backlog from been trimmed. */
     if (c->flags & CLIENT_SLAVE) freeReplicaReferencedReplBuffer(c);
-    if (server.io_threads_num == 1) {
-        /* no need to bother with locking if there's just one thread (the main thread) */
-        listAddNodeTail(server.clients_to_close,c);
-        return;
-    }
-    static pthread_mutex_t async_free_queue_mutex = PTHREAD_MUTEX_INITIALIZER;
-    pthread_mutex_lock(&async_free_queue_mutex);
     listAddNodeTail(server.clients_to_close,c);
-    pthread_mutex_unlock(&async_free_queue_mutex);
 }
 
 /* Log errors for invalid use and free the client in async way.
@@ -1867,7 +1873,7 @@ int beforeNextClient(client *c) {
 
     /* Skip the client processing if we're in an IO thread, in that case we'll perform
        this operation later (this function is called again) in the fan-in stage of the threading mechanism */
-    if (io_threads_op != IO_THREADS_OP_IDLE)
+    if (c && c->running_tid != IOTHREAD_MAIN_THREAD_ID)
         return C_OK;
     /* Handle async frees */
     /* Note: this doesn't make the server.clients_to_close list redundant because of
@@ -2052,8 +2058,12 @@ int _writeToClient(client *c, ssize_t *nwritten) {
  * set to 0. So when handler_installed is set to 0 the function must be
  * thread safe. */
 int writeToClient(client *c, int handler_installed) {
+    if (!(c->io_flags & CLIENT_IO_WRITE_ENABLED)) return C_OK;
     /* Update total number of writes on server */
     atomicIncr(server.stat_total_writes_processed, 1);
+    if (c->running_tid != IOTHREAD_MAIN_THREAD_ID) {
+        atomicIncr(server.stat_io_writes_processed, 1);
+    }
 
     ssize_t nwritten = 0, totwritten = 0;
 
@@ -2107,7 +2117,7 @@ int writeToClient(client *c, int handler_installed) {
          * is always called with handler_installed set to 0 from threads
          * so we are fine. */
         if (handler_installed) {
-            serverAssert(io_threads_op == IO_THREADS_OP_IDLE);
+            /* IO Thread also can do that now. */
             connSetWriteHandler(c->conn, NULL);
         }
 
@@ -2118,10 +2128,10 @@ int writeToClient(client *c, int handler_installed) {
         }
     }
     /* Update client's memory usage after writing.
-     * Since this isn't thread safe we do this conditionally. In case of threaded writes this is done in
-     * handleClientsWithPendingWritesUsingThreads(). */
-    if (io_threads_op == IO_THREADS_OP_IDLE)
+     * Since this isn't thread safe we do this conditionally. */
+    if (c->running_tid == IOTHREAD_MAIN_THREAD_ID) {
         updateClientMemUsageAndBucket(c);
+    }
     return C_OK;
 }
 
@@ -2153,6 +2163,15 @@ int handleClientsWithPendingWrites(void) {
         /* Don't write to clients that are going to be closed anyway. */
         if (c->flags & CLIENT_CLOSE_ASAP) continue;
 
+        /* Let IO thread handle the client if possible. */
+        if (server.io_threads_num > 1 &&
+            !(c->flags & CLIENT_CLOSE_AFTER_REPLY) &&
+            !isClientMustHandledByMainThread(c))
+        {
+            assignClientToIOThread(c);
+            continue;
+        }
+
         /* Try to write buffers to the client socket. */
         if (writeToClient(c,0) == C_ERR) continue;
 
@@ -2227,7 +2246,7 @@ void resetClient(client *c) {
  *    path, it is not really released, but only marked for later release. */
 void protectClient(client *c) {
     c->flags |= CLIENT_PROTECTED;
-    if (c->conn) {
+    if (c->conn && c->tid == IOTHREAD_MAIN_THREAD_ID) {
         connSetReadHandler(c->conn,NULL);
         connSetWriteHandler(c->conn,NULL);
     }
@@ -2238,7 +2257,8 @@ void unprotectClient(client *c) {
     if (c->flags & CLIENT_PROTECTED) {
         c->flags &= ~CLIENT_PROTECTED;
         if (c->conn) {
-            connSetReadHandler(c->conn,readQueryFromClient);
+            if (c->tid == IOTHREAD_MAIN_THREAD_ID)
+                connSetReadHandler(c->conn,readQueryFromClient);
             if (clientHasPendingReplies(c)) putClientInPendingWriteQueue(c);
         }
     }
@@ -2263,8 +2283,7 @@ int processInlineBuffer(client *c) {
     /* Nothing to do without a \r\n */
     if (newline == NULL) {
         if (sdslen(c->querybuf)-c->qb_pos > PROTO_INLINE_MAX_SIZE) {
-            addReplyError(c,"Protocol error: too big inline request");
-            setProtocolError("too big inline request",c);
+            c->read_error = CLIENT_READ_TOO_BIG_INLINE_REQUEST;
         }
         return C_ERR;
     }
@@ -2279,8 +2298,7 @@ int processInlineBuffer(client *c) {
     argv = sdssplitargs(aux,&argc);
     sdsfree(aux);
     if (argv == NULL) {
-        addReplyError(c,"Protocol error: unbalanced quotes in request");
-        setProtocolError("unbalanced quotes in inline request",c);
+        c->read_error = CLIENT_READ_UNBALANCED_QUOTES;
         return C_ERR;
     }
 
@@ -2299,8 +2317,7 @@ int processInlineBuffer(client *c) {
      * to keep the connection active. */
     if (querylen != 0 && c->flags & CLIENT_MASTER) {
         sdsfreesplitres(argv,argc);
-        serverLog(LL_WARNING,"WARNING: Receiving inline protocol from master, master stream corruption? Closing the master connection and discarding the cached master.");
-        setProtocolError("Master using the inline protocol. Desync?",c);
+        c->read_error = CLIENT_READ_MASTER_USING_INLINE_PROTOCAL;
         return C_ERR;
     }
 
@@ -2385,8 +2402,7 @@ int processMultibulkBuffer(client *c) {
         newline = strchr(c->querybuf+c->qb_pos,'\r');
         if (newline == NULL) {
             if (sdslen(c->querybuf)-c->qb_pos > PROTO_INLINE_MAX_SIZE) {
-                addReplyError(c,"Protocol error: too big mbulk count string");
-                setProtocolError("too big mbulk count string",c);
+                c->read_error = CLIENT_READ_TOO_BIG_MBULK_COUNT_STRING;
             }
             return C_ERR;
         }
@@ -2400,12 +2416,10 @@ int processMultibulkBuffer(client *c) {
         serverAssertWithInfo(c,NULL,c->querybuf[c->qb_pos] == '*');
         ok = string2ll(c->querybuf+1+c->qb_pos,newline-(c->querybuf+1+c->qb_pos),&ll);
         if (!ok || ll > INT_MAX) {
-            addReplyError(c,"Protocol error: invalid multibulk length");
-            setProtocolError("invalid mbulk count",c);
+            c->read_error = CLIENT_READ_INVALID_MULTIBUCK_LENGTH;
             return C_ERR;
         } else if (ll > 10 && authRequired(c)) {
-            addReplyError(c, "Protocol error: unauthenticated multibulk length");
-            setProtocolError("unauth mbulk count", c);
+            c->read_error = CLIENT_READ_UNAUTH_MBUCK_COUNT;
             return C_ERR;
         }
 
@@ -2432,9 +2446,7 @@ int processMultibulkBuffer(client *c) {
             newline = strchr(c->querybuf+c->qb_pos,'\r');
             if (newline == NULL) {
                 if (sdslen(c->querybuf)-c->qb_pos > PROTO_INLINE_MAX_SIZE) {
-                    addReplyError(c,
-                        "Protocol error: too big bulk count string");
-                    setProtocolError("too big bulk count string",c);
+                    c->read_error = CLIENT_READ_TOO_BIG_BUCK_COUNT_STRING;
                     return C_ERR;
                 }
                 break;
@@ -2445,22 +2457,17 @@ int processMultibulkBuffer(client *c) {
                 break;
 
             if (c->querybuf[c->qb_pos] != '$') {
-                addReplyErrorFormat(c,
-                    "Protocol error: expected '$', got '%c'",
-                    c->querybuf[c->qb_pos]);
-                setProtocolError("expected $ but got something else",c);
+                c->read_error = CLIENT_READ_EXPECTED_DOLLAR;
                 return C_ERR;
             }
 
             ok = string2ll(c->querybuf+c->qb_pos+1,newline-(c->querybuf+c->qb_pos+1),&ll);
             if (!ok || ll < 0 ||
                 (!(c->flags & CLIENT_MASTER) && ll > server.proto_max_bulk_len)) {
-                addReplyError(c,"Protocol error: invalid bulk length");
-                setProtocolError("invalid bulk length",c);
+                c->read_error = CLIENT_READ_INVALID_BUCK_LENGTH;
                 return C_ERR;
             } else if (ll > 16384 && authRequired(c)) {
-                addReplyError(c, "Protocol error: unauthenticated bulk length");
-                setProtocolError("unauth bulk length", c);
+                c->read_error = CLIENT_READ_UNAUTH_BUCK_LENGTH;
                 return C_ERR;
             }
 
@@ -2637,6 +2644,74 @@ int processPendingCommandAndInputBuffer(client *c) {
     return C_OK;
 }
 
+void handleClientReadError(client *c) {
+    switch (c->read_error) {
+        case CLIENT_READ_TOO_BIG_INLINE_REQUEST:
+            addReplyError(c,"Protocol error: too big inline request");
+            setProtocolError("too big inline request",c);
+            break;
+        case CLIENT_READ_UNBALANCED_QUOTES:
+            addReplyError(c,"Protocol error: unbalanced quotes in request");
+            setProtocolError("unbalanced quotes in request",c);
+            break;
+        case CLIENT_READ_MASTER_USING_INLINE_PROTOCAL:
+            serverLog(LL_WARNING,"WARNING: Receiving inline protocol from master, master stream corruption? Closing the master connection and discarding the cached master.");
+            setProtocolError("Master using the inline protocol. Desync?",c);
+            break;
+        case CLIENT_READ_TOO_BIG_MBULK_COUNT_STRING:
+            addReplyError(c,"Protocol error: too big mbulk count string");
+            setProtocolError("too big mbulk count string",c);
+            break;
+        case CLIENT_READ_TOO_BIG_BUCK_COUNT_STRING:
+            addReplyError(c, "Protocol error: too big bulk count string");
+            setProtocolError("too big bulk count string",c);
+            break;
+        case CLIENT_READ_EXPECTED_DOLLAR:
+            addReplyErrorFormat(c,
+                "Protocol error: expected '$', got '%c'",
+                c->querybuf[c->qb_pos]);
+            setProtocolError("expected $ but got something else",c);
+            break;
+        case CLIENT_READ_INVALID_BUCK_LENGTH:
+            addReplyError(c,"Protocol error: invalid bulk length");
+            setProtocolError("invalid bulk length",c);
+            break;
+        case CLIENT_READ_UNAUTH_BUCK_LENGTH:
+            addReplyError(c, "Protocol error: unauthenticated bulk length");
+            setProtocolError("unauth bulk length", c);
+            break;
+        case CLIENT_READ_INVALID_MULTIBUCK_LENGTH:
+            addReplyError(c,"Protocol error: invalid multibulk length");
+            setProtocolError("invalid mbulk count",c);
+            break;
+        case CLIENT_READ_UNAUTH_MBUCK_COUNT:
+            addReplyError(c, "Protocol error: unauthenticated multibulk length");
+            setProtocolError("unauth mbulk count", c);
+            break;
+        case CLIENT_READ_CONN_DISCONNECTED:
+            serverLog(LL_VERBOSE, "Reading from client: %s",connGetLastError(c->conn));
+            break;
+        case CLIENT_READ_CONN_CLOSED:
+            if (server.verbosity <= LL_VERBOSE) {
+                sds info = catClientInfoString(sdsempty(), c);
+                serverLog(LL_VERBOSE, "Client closed connection %s", info);
+                sdsfree(info);
+            }
+            break;
+        case CLIENT_READ_REACHED_MAX_QUERYBUF: {
+            sds ci = catClientInfoString(sdsempty(),c), bytes = sdsempty();
+            bytes = sdscatrepr(bytes,c->querybuf,64);
+            serverLog(LL_WARNING,"Closing client that reached max query buffer length: %s (qbuf initial bytes: %s)", ci, bytes);
+            sdsfree(ci);
+            sdsfree(bytes);
+            break;
+        }
+        default:
+            serverPanic("Unknown client read error");
+            break;
+    }
+}
+
 /* This function is called every time, in the client structure 'c', there is
  * more query buffer to process, because we read more data from the socket
  * or because a client was blocked and later reactivated, so there could be
@@ -2656,7 +2731,7 @@ int processInputBuffer(client *c) {
          * condition on the slave. We want just to accumulate the replication
          * stream (instead of replying -BUSY like we do with other clients) and
          * later resume the processing. */
-        if (isInsideYieldingLongCommand() && c->flags & CLIENT_MASTER) break;
+        if (c->flags & CLIENT_MASTER && isInsideYieldingLongCommand()) break;
 
         /* CLIENT_CLOSE_AFTER_REPLY closes the connection once the reply is
          * written to the client. Make sure to not let the reply grow after
@@ -2675,23 +2750,34 @@ int processInputBuffer(client *c) {
         }
 
         if (c->reqtype == PROTO_REQ_INLINE) {
-            if (processInlineBuffer(c) != C_OK) break;
+            if (processInlineBuffer(c) != C_OK) {
+                if (c->running_tid != IOTHREAD_MAIN_THREAD_ID && c->read_error)
+                    enqueuePendingClientsToMainThread(c, 0);
+                break;
+            }
         } else if (c->reqtype == PROTO_REQ_MULTIBULK) {
-            if (processMultibulkBuffer(c) != C_OK) break;
+            if (processMultibulkBuffer(c) != C_OK) {
+                if (c->running_tid != IOTHREAD_MAIN_THREAD_ID && c->read_error)
+                    enqueuePendingClientsToMainThread(c, 0);
+                break;
+            }
         } else {
             serverPanic("Unknown request type");
         }
 
         /* Multibulk processing could see a <= 0 length. */
         if (c->argc == 0) {
-            resetClientInternal(c, 0);
+            freeClientArgvInternal(c, 0);
+            c->reqtype = 0;
+            c->multibulklen = 0;
+            c->bulklen = -1;
         } else {
             /* If we are in the context of an I/O thread, we can't really
              * execute the command here. All we can do is to flag the client
              * as one that needs to process the command. */
-            if (io_threads_op != IO_THREADS_OP_IDLE) {
-                serverAssert(io_threads_op == IO_THREADS_OP_READ);
-                c->flags |= CLIENT_PENDING_COMMAND;
+            if (c->running_tid != IOTHREAD_MAIN_THREAD_ID) {
+                c->io_flags |= CLIENT_IO_PENDING_COMMAND;
+                enqueuePendingClientsToMainThread(c, 0);
                 break;
             }
 
@@ -2732,7 +2818,7 @@ int processInputBuffer(client *c) {
     /* Update client memory usage after processing the query buffer, this is
      * important in case the query buffer is big and wasn't drained during
      * the above loop (because of partially sent big commands). */
-    if (io_threads_op == IO_THREADS_OP_IDLE)
+    if (c->running_tid == IOTHREAD_MAIN_THREAD_ID)
         updateClientMemUsageAndBucket(c);
 
     return C_OK;
@@ -2742,13 +2828,14 @@ void readQueryFromClient(connection *conn) {
     client *c = connGetPrivateData(conn);
     int nread, big_arg = 0;
     size_t qblen, readlen;
-
-    /* Check if we want to read from the client later when exiting from
-     * the event loop. This is the case if threaded I/O is enabled. */
-    if (postponeClientRead(c)) return;
+    if (!(c->io_flags & CLIENT_IO_READ_ENABLED)) return;
+    c->read_error = 0;
 
     /* Update total number of reads on server */
     atomicIncr(server.stat_total_reads_processed, 1);
+    if (c->running_tid != IOTHREAD_MAIN_THREAD_ID) {
+        atomicIncr(server.stat_io_reads_processed, 1);
+    }
 
     readlen = PROTO_IOBUF_LEN;
     /* If this is a multi bulk request, and we are processing a bulk reply
@@ -2793,7 +2880,7 @@ void readQueryFromClient(connection *conn) {
             /* Assign the reusable query buffer to the client and mark it as in use. */
             serverAssert(sdslen(thread_reusable_qb) == 0);
             c->querybuf = thread_reusable_qb;
-            c->flags |= CLIENT_REUSABLE_QUERYBUFFER;
+            c->io_flags |= CLIENT_IO_REUSABLE_QUERYBUFFER;
             thread_reusable_qb_used = 1;
         }
     }
@@ -2821,16 +2908,12 @@ void readQueryFromClient(connection *conn) {
         if (connGetState(conn) == CONN_STATE_CONNECTED) {
             goto done;
         } else {
-            serverLog(LL_VERBOSE, "Reading from client: %s",connGetLastError(c->conn));
+            c->read_error = CLIENT_READ_CONN_DISCONNECTED;
             freeClientAsync(c);
             goto done;
         }
     } else if (nread == 0) {
-        if (server.verbosity <= LL_VERBOSE) {
-            sds info = catClientInfoString(sdsempty(), c);
-            serverLog(LL_VERBOSE, "Client closed connection %s", info);
-            sdsfree(info);
-        }
+        c->read_error = CLIENT_READ_CONN_CLOSED;
         freeClientAsync(c);
         goto done;
     }
@@ -2853,13 +2936,9 @@ void readQueryFromClient(connection *conn) {
          *
          * For unauthenticated clients, the query buffer cannot exceed 1MB at most. */
         (c->mstate.argv_len_sums + sdslen(c->querybuf) > server.client_max_querybuf_len ||
-         (c->mstate.argv_len_sums + sdslen(c->querybuf) > 1024*1024 && authRequired(c)))) {
-        sds ci = catClientInfoString(sdsempty(),c), bytes = sdsempty();
-
-        bytes = sdscatrepr(bytes,c->querybuf,64);
-        serverLog(LL_WARNING,"Closing client that reached max query buffer length: %s (qbuf initial bytes: %s)", ci, bytes);
-        sdsfree(ci);
-        sdsfree(bytes);
+         (c->mstate.argv_len_sums + sdslen(c->querybuf) > 1024*1024 && authRequired(c))))
+    {
+        c->read_error = CLIENT_READ_REACHED_MAX_QUERYBUF;
         freeClientAsync(c);
         atomicIncr(server.stat_client_qbuf_limit_disconnections, 1);
         goto done;
@@ -2871,7 +2950,13 @@ void readQueryFromClient(connection *conn) {
          c = NULL;
 
 done:
-    if (c && (c->flags & CLIENT_REUSABLE_QUERYBUFFER)) {
+    if (c && c->read_error) {
+        if (c->running_tid == IOTHREAD_MAIN_THREAD_ID) {
+            handleClientReadError(c);
+        }
+    }
+
+    if (c && (c->io_flags & CLIENT_IO_REUSABLE_QUERYBUFFER)) {
         serverAssert(c->qb_pos == 0); /* Ensure the client's query buffer is trimmed in processInputBuffer */
         resetReusableQueryBuf(c);
     }
@@ -2933,6 +3018,16 @@ char *getClientSockname(client *c) {
 sds catClientInfoString(sds s, client *client) {
     char flags[17], events[3], conninfo[CONN_INFO_LEN], *p;
 
+    /* Pause IO thread to access data of the client safely. */
+    int paused = 0;
+    if (client->running_tid != IOTHREAD_MAIN_THREAD_ID &&
+        pthread_equal(server.main_thread_id, pthread_self()) &&
+        !server.crashing)
+    {
+        paused = 1;
+        pauseIOThread(client->running_tid);
+    }
+
     p = flags;
     if (client->flags & CLIENT_SLAVE) {
         if (client->flags & CLIENT_MONITOR)
@@ -3006,7 +3101,10 @@ sds catClientInfoString(sds s, client *client) {
         " redir=%I", (client->flags & CLIENT_TRACKING) ? (long long) client->client_tracking_redirection : -1,
         " resp=%i", client->resp,
         " lib-name=%s", client->lib_name ? (char*)client->lib_name->ptr : "",
-        " lib-ver=%s", client->lib_ver ? (char*)client->lib_ver->ptr : ""));
+        " lib-ver=%s", client->lib_ver ? (char*)client->lib_ver->ptr : "",
+        " io-thread=%i", client->tid));
+
+    if (paused) resumeIOThread(client->running_tid);
     return ret;
 }
 
@@ -3016,6 +3114,17 @@ sds getAllClientsInfoString(int type) {
     client *client;
     sds o = sdsnewlen(SDS_NOINIT,200*listLength(server.clients));
     sdsclear(o);
+
+    /* Pause all IO threads to access data of clients safely, and pausing the
+     * specific IO thread will not repeatedly execute in catClientInfoString. */
+    int allpaused = 0;
+    if (server.io_threads_num > 1 && !server.crashing &&
+        pthread_equal(server.main_thread_id, pthread_self()))
+    {
+        allpaused = 1;
+        pauseAllIOThreads();
+    }
+
     listRewind(server.clients,&li);
     while ((ln = listNext(&li)) != NULL) {
         client = listNodeValue(ln);
@@ -3023,6 +3132,8 @@ sds getAllClientsInfoString(int type) {
         o = catClientInfoString(o,client);
         o = sdscatlen(o,"\n",1);
     }
+
+    if (allpaused) resumeAllIOThreads();
     return o;
 }
 
@@ -4331,388 +4442,6 @@ void processEventsWhileBlocked(void) {
     server.cmd_time_snapshot = prev_cmd_time_snapshot;
 }
 
-/* ==========================================================================
- * Threaded I/O
- * ========================================================================== */
-
-#define IO_THREADS_MAX_NUM 128
-
-typedef struct __attribute__((aligned(CACHE_LINE_SIZE))) threads_pending {
-    redisAtomic unsigned long value;
-} threads_pending;
-
-pthread_t io_threads[IO_THREADS_MAX_NUM];
-pthread_mutex_t io_threads_mutex[IO_THREADS_MAX_NUM];
-threads_pending io_threads_pending[IO_THREADS_MAX_NUM];
-int io_threads_op;      /* IO_THREADS_OP_IDLE, IO_THREADS_OP_READ or IO_THREADS_OP_WRITE. */ // TODO: should access to this be atomic??!
-
-/* This is the list of clients each thread will serve when threaded I/O is
- * used. We spawn io_threads_num-1 threads, since one is the main thread
- * itself. */
-list *io_threads_list[IO_THREADS_MAX_NUM];
-
-static inline unsigned long getIOPendingCount(int i) {
-    unsigned long count = 0;
-    atomicGetWithSync(io_threads_pending[i].value, count);
-    return count;
-}
-
-static inline void setIOPendingCount(int i, unsigned long count) {
-    atomicSetWithSync(io_threads_pending[i].value, count);
-}
-
-void *IOThreadMain(void *myid) {
-    /* The ID is the thread number (from 0 to server.io_threads_num-1), and is
-     * used by the thread to just manipulate a single sub-array of clients. */
-    long id = (unsigned long)myid;
-    char thdname[16];
-
-    snprintf(thdname, sizeof(thdname), "io_thd_%ld", id);
-    redis_set_thread_title(thdname);
-    redisSetCpuAffinity(server.server_cpulist);
-    makeThreadKillable();
-
-    while(1) {
-        /* Wait for start */
-        for (int j = 0; j < 1000000; j++) {
-            if (getIOPendingCount(id) != 0) break;
-        }
-
-        /* Give the main thread a chance to stop this thread. */
-        if (getIOPendingCount(id) == 0) {
-            pthread_mutex_lock(&io_threads_mutex[id]);
-            pthread_mutex_unlock(&io_threads_mutex[id]);
-            continue;
-        }
-
-        serverAssert(getIOPendingCount(id) != 0);
-
-        /* Process: note that the main thread will never touch our list
-         * before we drop the pending count to 0. */
-        listIter li;
-        listNode *ln;
-        listRewind(io_threads_list[id],&li);
-        while((ln = listNext(&li))) {
-            client *c = listNodeValue(ln);
-            if (io_threads_op == IO_THREADS_OP_WRITE) {
-                writeToClient(c,0);
-            } else if (io_threads_op == IO_THREADS_OP_READ) {
-                readQueryFromClient(c->conn);
-            } else {
-                serverPanic("io_threads_op value is unknown");
-            }
-        }
-        listEmpty(io_threads_list[id]);
-        setIOPendingCount(id, 0);
-    }
-}
-
-/* Initialize the data structures needed for threaded I/O. */
-void initThreadedIO(void) {
-    server.io_threads_active = 0; /* We start with threads not active. */
-
-    /* Indicate that io-threads are currently idle */
-    io_threads_op = IO_THREADS_OP_IDLE;
-
-    /* Don't spawn any thread if the user selected a single thread:
-     * we'll handle I/O directly from the main thread. */
-    if (server.io_threads_num == 1) return;
-
-    if (server.io_threads_num > IO_THREADS_MAX_NUM) {
-        serverLog(LL_WARNING,"Fatal: too many I/O threads configured. "
-                             "The maximum number is %d.", IO_THREADS_MAX_NUM);
-        exit(1);
-    }
-
-    /* Spawn and initialize the I/O threads. */
-    for (int i = 0; i < server.io_threads_num; i++) {
-        /* Things we do for all the threads including the main thread. */
-        io_threads_list[i] = listCreate();
-        if (i == 0) continue; /* Thread 0 is the main thread. */
-
-        /* Things we do only for the additional threads. */
-        pthread_t tid;
-        pthread_mutex_init(&io_threads_mutex[i],NULL);
-        setIOPendingCount(i, 0);
-        pthread_mutex_lock(&io_threads_mutex[i]); /* Thread will be stopped. */
-        if (pthread_create(&tid,NULL,IOThreadMain,(void*)(long)i) != 0) {
-            serverLog(LL_WARNING,"Fatal: Can't initialize IO thread.");
-            exit(1);
-        }
-        io_threads[i] = tid;
-    }
-}
-
-void killIOThreads(void) {
-    int err, j;
-    for (j = 0; j < server.io_threads_num; j++) {
-        if (io_threads[j] == pthread_self()) continue;
-        if (io_threads[j] && pthread_cancel(io_threads[j]) == 0) {
-            if ((err = pthread_join(io_threads[j],NULL)) != 0) {
-                serverLog(LL_WARNING,
-                    "IO thread(tid:%lu) can not be joined: %s",
-                        (unsigned long)io_threads[j], strerror(err));
-            } else {
-                serverLog(LL_WARNING,
-                    "IO thread(tid:%lu) terminated",(unsigned long)io_threads[j]);
-            }
-        }
-    }
-}
-
-void startThreadedIO(void) {
-    serverAssert(server.io_threads_active == 0);
-    for (int j = 1; j < server.io_threads_num; j++)
-        pthread_mutex_unlock(&io_threads_mutex[j]);
-    server.io_threads_active = 1;
-}
-
-void stopThreadedIO(void) {
-    /* We may have still clients with pending reads when this function
-     * is called: handle them before stopping the threads. */
-    handleClientsWithPendingReadsUsingThreads();
-    serverAssert(server.io_threads_active == 1);
-    for (int j = 1; j < server.io_threads_num; j++)
-        pthread_mutex_lock(&io_threads_mutex[j]);
-    server.io_threads_active = 0;
-}
-
-/* This function checks if there are not enough pending clients to justify
- * taking the I/O threads active: in that case I/O threads are stopped if
- * currently active. We track the pending writes as a measure of clients
- * we need to handle in parallel, however the I/O threading is disabled
- * globally for reads as well if we have too little pending clients.
- *
- * The function returns 0 if the I/O threading should be used because there
- * are enough active threads, otherwise 1 is returned and the I/O threads
- * could be possibly stopped (if already active) as a side effect. */
-int stopThreadedIOIfNeeded(void) {
-    int pending = listLength(server.clients_pending_write);
-
-    /* Return ASAP if IO threads are disabled (single threaded mode). */
-    if (server.io_threads_num == 1) return 1;
-
-    if (pending < (server.io_threads_num*2)) {
-        if (server.io_threads_active) stopThreadedIO();
-        return 1;
-    } else {
-        return 0;
-    }
-}
-
-/* This function achieves thread safety using a fan-out -> fan-in paradigm:
- * Fan out: The main thread fans out work to the io-threads which block until
- * setIOPendingCount() is called with a value larger than 0 by the main thread.
- * Fan in: The main thread waits until getIOPendingCount() returns 0. Then
- * it can safely perform post-processing and return to normal synchronous
- * work. */
-int handleClientsWithPendingWritesUsingThreads(void) {
-    int processed = listLength(server.clients_pending_write);
-    if (processed == 0) return 0; /* Return ASAP if there are no clients. */
-
-    /* If I/O threads are disabled or we have few clients to serve, don't
-     * use I/O threads, but the boring synchronous code. */
-    if (server.io_threads_num == 1 || stopThreadedIOIfNeeded()) {
-        return handleClientsWithPendingWrites();
-    }
-
-    /* Start threads if needed. */
-    if (!server.io_threads_active) startThreadedIO();
-
-    /* Distribute the clients across N different lists. */
-    listIter li;
-    listNode *ln;
-    listRewind(server.clients_pending_write,&li);
-    int item_id = 0;
-    while((ln = listNext(&li))) {
-        client *c = listNodeValue(ln);
-        c->flags &= ~CLIENT_PENDING_WRITE;
-
-        /* Remove clients from the list of pending writes since
-         * they are going to be closed ASAP. */
-        if (c->flags & CLIENT_CLOSE_ASAP) {
-            listUnlinkNode(server.clients_pending_write, ln);
-            continue;
-        }
-
-        /* Since all replicas and replication backlog use global replication
-         * buffer, to guarantee data accessing thread safe, we must put all
-         * replicas client into io_threads_list[0] i.e. main thread handles
-         * sending the output buffer of all replicas. */
-        if (unlikely(clientTypeIsSlave(c))) {
-            listAddNodeTail(io_threads_list[0],c);
-            continue;
-        }
-
-        int target_id = item_id % server.io_threads_num;
-        listAddNodeTail(io_threads_list[target_id],c);
-        item_id++;
-    }
-
-    /* Give the start condition to the waiting threads, by setting the
-     * start condition atomic var. */
-    io_threads_op = IO_THREADS_OP_WRITE;
-    for (int j = 1; j < server.io_threads_num; j++) {
-        int count = listLength(io_threads_list[j]);
-        setIOPendingCount(j, count);
-    }
-
-    /* Also use the main thread to process a slice of clients. */
-    listRewind(io_threads_list[0],&li);
-    while((ln = listNext(&li))) {
-        client *c = listNodeValue(ln);
-        writeToClient(c,0);
-    }
-    listEmpty(io_threads_list[0]);
-
-    /* Wait for all the other threads to end their work. */
-    while(1) {
-        unsigned long pending = 0;
-        for (int j = 1; j < server.io_threads_num; j++)
-            pending += getIOPendingCount(j);
-        if (pending == 0) break;
-    }
-
-    io_threads_op = IO_THREADS_OP_IDLE;
-
-    /* Run the list of clients again to install the write handler where
-     * needed. */
-    listRewind(server.clients_pending_write,&li);
-    while((ln = listNext(&li))) {
-        client *c = listNodeValue(ln);
-
-        /* Update the client in the mem usage after we're done processing it in the io-threads */
-        updateClientMemUsageAndBucket(c);
-
-        /* Install the write handler if there are pending writes in some
-         * of the clients. */
-        if (clientHasPendingReplies(c)) {
-            installClientWriteHandler(c);
-        }
-    }
-    while(listLength(server.clients_pending_write) > 0) {
-        listUnlinkNode(server.clients_pending_write, server.clients_pending_write->head);
-    }
-
-    /* Update processed count on server */
-    server.stat_io_writes_processed += processed;
-
-    return processed;
-}
-
-/* Return 1 if we want to handle the client read later using threaded I/O.
- * This is called by the readable handler of the event loop.
- * As a side effect of calling this function the client is put in the
- * pending read clients and flagged as such. */
-int postponeClientRead(client *c) {
-    if (server.io_threads_active &&
-        server.io_threads_do_reads &&
-        !ProcessingEventsWhileBlocked &&
-        !(c->flags & (CLIENT_MASTER|CLIENT_SLAVE|CLIENT_BLOCKED)) &&
-        io_threads_op == IO_THREADS_OP_IDLE)
-    {
-        listAddNodeHead(server.clients_pending_read,c);
-        c->pending_read_list_node = listFirst(server.clients_pending_read);
-        return 1;
-    } else {
-        return 0;
-    }
-}
-
-/* When threaded I/O is also enabled for the reading + parsing side, the
- * readable handler will just put normal clients into a queue of clients to
- * process (instead of serving them synchronously). This function runs
- * the queue using the I/O threads, and process them in order to accumulate
- * the reads in the buffers, and also parse the first command available
- * rendering it in the client structures.
- * This function achieves thread safety using a fan-out -> fan-in paradigm:
- * Fan out: The main thread fans out work to the io-threads which block until
- * setIOPendingCount() is called with a value larger than 0 by the main thread.
- * Fan in: The main thread waits until getIOPendingCount() returns 0. Then
- * it can safely perform post-processing and return to normal synchronous
- * work. */
-int handleClientsWithPendingReadsUsingThreads(void) {
-    if (!server.io_threads_active || !server.io_threads_do_reads) return 0;
-    int processed = listLength(server.clients_pending_read);
-    if (processed == 0) return 0;
-
-    /* Distribute the clients across N different lists. */
-    listIter li;
-    listNode *ln;
-    listRewind(server.clients_pending_read,&li);
-    int item_id = 0;
-    while((ln = listNext(&li))) {
-        client *c = listNodeValue(ln);
-        int target_id = item_id % server.io_threads_num;
-        listAddNodeTail(io_threads_list[target_id],c);
-        item_id++;
-    }
-
-    /* Give the start condition to the waiting threads, by setting the
-     * start condition atomic var. */
-    io_threads_op = IO_THREADS_OP_READ;
-    for (int j = 1; j < server.io_threads_num; j++) {
-        int count = listLength(io_threads_list[j]);
-        setIOPendingCount(j, count);
-    }
-
-    /* Also use the main thread to process a slice of clients. */
-    listRewind(io_threads_list[0],&li);
-    while((ln = listNext(&li))) {
-        client *c = listNodeValue(ln);
-        readQueryFromClient(c->conn);
-    }
-    listEmpty(io_threads_list[0]);
-
-    /* Wait for all the other threads to end their work. */
-    while(1) {
-        unsigned long pending = 0;
-        for (int j = 1; j < server.io_threads_num; j++)
-            pending += getIOPendingCount(j);
-        if (pending == 0) break;
-    }
-
-    io_threads_op = IO_THREADS_OP_IDLE;
-
-    /* Run the list of clients again to process the new buffers. */
-    while(listLength(server.clients_pending_read)) {
-        ln = listFirst(server.clients_pending_read);
-        client *c = listNodeValue(ln);
-        listDelNode(server.clients_pending_read,ln);
-        c->pending_read_list_node = NULL;
-
-        serverAssert(!(c->flags & CLIENT_BLOCKED));
-
-        if (beforeNextClient(c) == C_ERR) {
-            /* If the client is no longer valid, we avoid
-             * processing the client later. So we just go
-             * to the next. */
-            continue;
-        }
-
-        /* Once io-threads are idle we can update the client in the mem usage */
-        updateClientMemUsageAndBucket(c);
-
-        if (processPendingCommandAndInputBuffer(c) == C_ERR) {
-            /* If the client is no longer valid, we avoid
-             * processing the client later. So we just go
-             * to the next. */
-            continue;
-        }
-
-        /* We may have pending replies if a thread readQueryFromClient() produced
-         * replies and did not put the client in pending write queue (it can't).
-         */
-        if (!(c->flags & CLIENT_PENDING_WRITE) && clientHasPendingReplies(c))
-            putClientInPendingWriteQueue(c);
-    }
-
-    /* Update processed count on server */
-    server.stat_io_reads_processed += processed;
-
-    return processed;
-}
-
 /* Returns the actual client eviction limit based on current configuration or
  * 0 if no limit. */
 size_t getClientEvictionLimit(void) {
@@ -4752,11 +4481,34 @@ void evictClients(void) {
         listNode *ln = listNext(&bucket_iter);
         if (ln) {
             client *c = ln->value;
-            sds ci = catClientInfoString(sdsempty(),c);
-            serverLog(LL_NOTICE, "Evicting client: %s", ci);
-            freeClient(c);
-            sdsfree(ci);
-            server.stat_evictedclients++;
+            size_t last_memory = c->last_memory_usage;
+            int tid = c->running_tid;
+            if (tid != IOTHREAD_MAIN_THREAD_ID) {
+                pauseIOThread(tid);
+                /* We need to update the client memory usage and bucket if the client
+                 * is running in IO thread. This is because the client memory usage
+                 * and bucket are updated 'only' in the main thread, such as processing
+                 * command and clientsCron, it may delay updating, to avoid incorrectly
+                 * evicting clients, we update again before evicting, if the memory
+                 * used by the client does not decrease or memory usage bucket is not
+                 * changed, then we will evict it, otherwise, not evict it. */
+                updateClientMemUsageAndBucket(c);
+            }
+            if (c->last_memory_usage >= last_memory ||
+                c->mem_usage_bucket == &server.client_mem_usage_buckets[curr_bucket])
+            {
+                sds ci = catClientInfoString(sdsempty(),c);
+                serverLog(LL_NOTICE, "Evicting client: %s", ci);
+                freeClient(c);
+                sdsfree(ci);
+                server.stat_evictedclients++;
+            }
+            if (tid != IOTHREAD_MAIN_THREAD_ID) {
+                resumeIOThread(tid);
+                /* The 'next' of 'bucket_iter' may be changed after updating client memory
+                 * usage and freeing client, so let reset 'bucket_iter'. */
+                listRewind(server.client_mem_usage_buckets[curr_bucket].clients, &bucket_iter);
+            }
         } else {
             curr_bucket--;
             if (curr_bucket < 0) {
diff --git a/src/replication.c b/src/replication.c
index abf930e61..79a55d39b 100644
--- a/src/replication.c
+++ b/src/replication.c
@@ -2925,7 +2925,7 @@ write_error: /* Handle sendCommand() errors. */
 }
 
 int connectWithMaster(void) {
-    server.repl_transfer_s = connCreate(connTypeOfReplication());
+    server.repl_transfer_s = connCreate(server.el, connTypeOfReplication());
     if (connConnect(server.repl_transfer_s, server.masterhost, server.masterport,
                 server.bind_source_addr, syncWithMaster) == C_ERR) {
         serverLog(LL_WARNING,"Unable to connect to MASTER: %s",
diff --git a/src/server.c b/src/server.c
index 4b729fede..0b4c95ce8 100644
--- a/src/server.c
+++ b/src/server.c
@@ -963,7 +963,7 @@ void removeClientFromMemUsageBucket(client *c, int allow_eviction) {
  * returns 1 if client eviction for this client is allowed, 0 otherwise.
  */
 int updateClientMemUsageAndBucket(client *c) {
-    serverAssert(io_threads_op == IO_THREADS_OP_IDLE && c->conn);
+    serverAssert(pthread_equal(pthread_self(), server.main_thread_id) && c->conn);
     int allow_eviction = clientEvictionAllowed(c);
     removeClientFromMemUsageBucket(c, allow_eviction);
 
@@ -1015,6 +1015,7 @@ void getExpansiveClientsInfo(size_t *in_usage, size_t *out_usage) {
  * default server.hz value is 10, so sometimes here we need to process thousands
  * of clients per second, turning this function into a source of latency.
  */
+#define CLIENTS_CRON_PAUSE_IOTHREAD 8
 #define CLIENTS_CRON_MIN_ITERATIONS 5
 void clientsCron(void) {
     /* Try to process at least numclients/server.hz of clients
@@ -1049,6 +1050,15 @@ void clientsCron(void) {
     ClientsPeakMemInput[zeroidx] = 0;
     ClientsPeakMemOutput[zeroidx] = 0;
 
+    /* Pause the IO threads that are processing clients, to let us access clients
+     * safely. In order to avoid increasing CPU usage by pausing all threads when
+     * there are too many io threads, we pause io threads in multiple batches. */
+    static int start = 1, end = 0;
+    if (server.io_threads_num >= 1 && listLength(server.clients) > 0) {
+        end = start + CLIENTS_CRON_PAUSE_IOTHREAD - 1;
+        if (end >= server.io_threads_num) end = server.io_threads_num - 1;
+        pauseIOThreadsRange(start, end);
+    }
 
     while(listLength(server.clients) && iterations--) {
         client *c;
@@ -1059,6 +1069,15 @@ void clientsCron(void) {
         head = listFirst(server.clients);
         c = listNodeValue(head);
         listRotateHeadToTail(server.clients);
+
+        if (c->running_tid != IOTHREAD_MAIN_THREAD_ID &&
+            !(c->running_tid >= start && c->running_tid <= end))
+        {
+            /* Skip clients that are being processed by the IO threads that
+             * are not paused. */
+            continue;
+        }
+
         /* The following functions do different service checks on the client.
          * The protocol is that they return non-zero if the client was
          * terminated. */
@@ -1080,6 +1099,14 @@ void clientsCron(void) {
 
         if (closeClientOnOutputBufferLimitReached(c, 0)) continue;
     }
+
+    /* Resume the IO threads that were paused */
+    if (end) {
+        resumeIOThreadsRange(start, end);
+        start = end + 1;
+        if (start >= server.io_threads_num) start = 1;
+        end = 0;
+    }
 }
 
 /* This function handles 'background' operations we are required to do
@@ -1528,9 +1555,6 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
         migrateCloseTimedoutSockets();
     }
 
-    /* Stop the I/O threads if we don't have enough pending work. */
-    stopThreadedIOIfNeeded();
-
     /* Resize tracking keys table if needed. This is also done at every
      * command execution, but we want to be sure that if the last command
      * executed changes the value via CONFIG SET, the server will perform
@@ -1682,24 +1706,28 @@ void beforeSleep(struct aeEventLoop *eventLoop) {
      * events to handle. */
     if (ProcessingEventsWhileBlocked) {
         uint64_t processed = 0;
-        processed += handleClientsWithPendingReadsUsingThreads();
-        processed += connTypeProcessPendingData();
+        processed += connTypeProcessPendingData(server.el);
         if (server.aof_state == AOF_ON || server.aof_state == AOF_WAIT_REWRITE)
             flushAppendOnlyFile(0);
         processed += handleClientsWithPendingWrites();
         processed += freeClientsInAsyncFreeQueue();
+
+        /* Let the clients after the blocking call be processed. */
+        processClientsOfAllIOThreads();
+        /* New connections may have been established while blocked, clients from
+         * IO thread may have replies to write, ensure they are promptly sent to
+         * IO threads. */
+        processed += sendPendingClientsToIOThreads();
+
         server.events_processed_while_blocked += processed;
         return;
     }
 
-    /* We should handle pending reads clients ASAP after event loop. */
-    handleClientsWithPendingReadsUsingThreads();
-
     /* Handle pending data(typical TLS). (must be done before flushAppendOnlyFile) */
-    connTypeProcessPendingData();
+    connTypeProcessPendingData(server.el);
 
     /* If any connection type(typical TLS) still has pending unread data don't sleep at all. */
-    int dont_sleep = connTypeHasPendingData();
+    int dont_sleep = connTypeHasPendingData(server.el);
 
     /* Call the Redis Cluster before sleep function. Note that this function
      * may change the state of Redis Cluster (from ok to fail or vice versa),
@@ -1765,8 +1793,8 @@ void beforeSleep(struct aeEventLoop *eventLoop) {
     long long prev_fsynced_reploff = server.fsynced_reploff;
 
     /* Write the AOF buffer on disk,
-     * must be done before handleClientsWithPendingWritesUsingThreads,
-     * in case of appendfsync=always. */
+     * must be done before handleClientsWithPendingWrites and
+     * sendPendingClientsToIOThreads, in case of appendfsync=always. */
     if (server.aof_state == AOF_ON || server.aof_state == AOF_WAIT_REWRITE)
         flushAppendOnlyFile(0);
 
@@ -1788,7 +1816,10 @@ void beforeSleep(struct aeEventLoop *eventLoop) {
     }
 
     /* Handle writes with pending output buffers. */
-    handleClientsWithPendingWritesUsingThreads();
+    handleClientsWithPendingWrites();
+
+    /* Let io thread to handle its pending clients. */
+    sendPendingClientsToIOThreads();
 
     /* Record cron time in beforeSleep. This does not include the time consumed by AOF writing and IO writing above. */
     monotime cron_start_time_after_write = getMonotonicUs();
@@ -2117,6 +2148,7 @@ void initServerConfig(void) {
     memset(server.blocked_clients_by_type,0,
            sizeof(server.blocked_clients_by_type));
     server.shutdown_asap = 0;
+    server.crashing = 0;
     server.shutdown_flags = 0;
     server.shutdown_mstime = 0;
     server.cluster_module_flags = CLUSTER_MODULE_FLAG_NONE;
@@ -2583,9 +2615,9 @@ void resetServerStats(void) {
     server.stat_sync_full = 0;
     server.stat_sync_partial_ok = 0;
     server.stat_sync_partial_err = 0;
-    server.stat_io_reads_processed = 0;
+    atomicSet(server.stat_io_reads_processed, 0);
     atomicSet(server.stat_total_reads_processed, 0);
-    server.stat_io_writes_processed = 0;
+    atomicSet(server.stat_io_writes_processed, 0);
     atomicSet(server.stat_total_writes_processed, 0);
     atomicSet(server.stat_client_qbuf_limit_disconnections, 0);
     server.stat_client_outbuf_limit_disconnections = 0;
@@ -2778,6 +2810,7 @@ void initServer(void) {
     server.aof_last_write_errno = 0;
     server.repl_good_slaves_count = 0;
     server.last_sig_received = 0;
+    memset(server.io_threads_clients_num, 0, sizeof(server.io_threads_clients_num));
 
     /* Initiate acl info struct */
     server.acl_info.invalid_cmd_accesses = 0;
@@ -5535,7 +5568,7 @@ void releaseInfoSectionDict(dict *sec) {
  * The resulting dictionary should be released with releaseInfoSectionDict. */
 dict *genInfoSectionDict(robj **argv, int argc, char **defaults, int *out_all, int *out_everything) {
     char *default_sections[] = {
-        "server", "clients", "memory", "persistence", "stats", "replication",
+        "server", "clients", "memory", "persistence", "stats", "replication", "threads",
         "cpu", "module_list", "errorstats", "cluster", "keyspace", "keysizes", NULL};
     if (!defaults)
         defaults = default_sections;
@@ -5886,6 +5919,7 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
         long long current_active_defrag_time = server.stat_last_active_defrag_time ?
             (long long) elapsedUs(server.stat_last_active_defrag_time): 0;
         long long stat_client_qbuf_limit_disconnections;
+        long long stat_io_reads_processed, stat_io_writes_processed;
         atomicGet(server.stat_total_reads_processed, stat_total_reads_processed);
         atomicGet(server.stat_total_writes_processed, stat_total_writes_processed);
         atomicGet(server.stat_net_input_bytes, stat_net_input_bytes);
@@ -5893,6 +5927,8 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
         atomicGet(server.stat_net_repl_input_bytes, stat_net_repl_input_bytes);
         atomicGet(server.stat_net_repl_output_bytes, stat_net_repl_output_bytes);
         atomicGet(server.stat_client_qbuf_limit_disconnections, stat_client_qbuf_limit_disconnections);
+        atomicGet(server.stat_io_reads_processed, stat_io_reads_processed);
+        atomicGet(server.stat_io_writes_processed, stat_io_writes_processed);
 
         if (sections++) info = sdscat(info,"\r\n");
         info = sdscatprintf(info, "# Stats\r\n" FMTARGS(
@@ -5944,8 +5980,8 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
             "dump_payload_sanitizations:%lld\r\n", server.stat_dump_payload_sanitizations,
             "total_reads_processed:%lld\r\n", stat_total_reads_processed,
             "total_writes_processed:%lld\r\n", stat_total_writes_processed,
-            "io_threaded_reads_processed:%lld\r\n", server.stat_io_reads_processed,
-            "io_threaded_writes_processed:%lld\r\n", server.stat_io_writes_processed,
+            "io_threaded_reads_processed:%lld\r\n", stat_io_reads_processed,
+            "io_threaded_writes_processed:%lld\r\n", stat_io_writes_processed,
             "client_query_buffer_limit_disconnections:%lld\r\n", stat_client_qbuf_limit_disconnections,
             "client_output_buffer_limit_disconnections:%lld\r\n", server.stat_client_outbuf_limit_disconnections,
             "reply_buffer_shrinks:%lld\r\n", server.stat_reply_buffer_shrinks,
@@ -6094,6 +6130,15 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
 #endif  /* RUSAGE_THREAD */
     }
 
+    /* Threads */
+    if (all_sections || (dictFind(section_dict,"threads") != NULL)) {
+        if (sections++) info = sdscat(info,"\r\n");
+        info = sdscatprintf(info, "# Threads\r\n");
+        for (j = 0; j < server.io_threads_num; j++) {
+            info = sdscatprintf(info, "io_thread_%d:clients=%d\r\n", j, server.io_threads_clients_num[j]);
+        }
+    }
+
     /* Modules */
     if (all_sections || (dictFind(section_dict,"module_list") != NULL) || (dictFind(section_dict,"modules") != NULL)) {
         if (sections++) info = sdscat(info,"\r\n");
diff --git a/src/server.h b/src/server.h
index 205d73c68..bc965999e 100644
--- a/src/server.h
+++ b/src/server.h
@@ -61,6 +61,7 @@ typedef long long ustime_t; /* microsecond time type. */
                            N-elements flat arrays */
 #include "rax.h"     /* Radix tree */
 #include "connection.h" /* Connection abstraction */
+#include "eventnotifier.h" /* Event notification */
 
 #define REDISMODULE_CORE 1
 typedef struct redisObject robj;
@@ -184,6 +185,14 @@ extern int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT];
 /* Hash table parameters */
 #define HASHTABLE_MAX_LOAD_FACTOR 1.618   /* Maximum hash table load factor. */
 
+/* Max number of IO threads */
+#define IO_THREADS_MAX_NUM 128
+
+/* Main thread id for doing IO work, whatever we enable or disable io thread
+ * the main thread always does IO work, so we can consider that the main thread
+ * is the io thread 0. */
+#define IOTHREAD_MAIN_THREAD_ID 0
+
 /* Command flags. Please check the definition of struct redisCommand in this file
  * for more information about the meaning of every flag. */
 #define CMD_WRITE (1ULL<<0)
@@ -385,11 +394,33 @@ extern int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT];
 #define CLIENT_MODULE_PREVENT_AOF_PROP (1ULL<<48) /* Module client do not want to propagate to AOF */
 #define CLIENT_MODULE_PREVENT_REPL_PROP (1ULL<<49) /* Module client do not want to propagate to replica */
 #define CLIENT_REPROCESSING_COMMAND (1ULL<<50) /* The client is re-processing the command. */
-#define CLIENT_REUSABLE_QUERYBUFFER (1ULL<<51) /* The client is using the reusable query buffer. */
 
 /* Any flag that does not let optimize FLUSH SYNC to run it in bg as blocking client ASYNC */
 #define CLIENT_AVOID_BLOCKING_ASYNC_FLUSH (CLIENT_DENY_BLOCKING|CLIENT_MULTI|CLIENT_LUA_DEBUG|CLIENT_LUA_DEBUG_SYNC|CLIENT_MODULE)
 
+/* Client flags for client IO */
+#define CLIENT_IO_READ_ENABLED (1ULL<<0) /* Client can read from socket. */
+#define CLIENT_IO_WRITE_ENABLED (1ULL<<1) /* Client can write to socket. */
+#define CLIENT_IO_PENDING_COMMAND (1ULL<<2) /* Similar to CLIENT_PENDING_COMMAND. */
+#define CLIENT_IO_REUSABLE_QUERYBUFFER (1ULL<<3) /* The client is using the reusable query buffer. */
+#define CLIENT_IO_CLOSE_ASAP (1ULL<<4) /* Close this client ASAP in IO thread. */
+
+/* Definitions for client read errors. These error codes are used to indicate
+ * various issues that can occur while reading or parsing data from a client. */
+#define CLIENT_READ_TOO_BIG_INLINE_REQUEST 1
+#define CLIENT_READ_UNBALANCED_QUOTES 2
+#define CLIENT_READ_MASTER_USING_INLINE_PROTOCAL 3
+#define CLIENT_READ_TOO_BIG_MBULK_COUNT_STRING 4
+#define CLIENT_READ_TOO_BIG_BUCK_COUNT_STRING 5
+#define CLIENT_READ_EXPECTED_DOLLAR 6
+#define CLIENT_READ_INVALID_BUCK_LENGTH 7
+#define CLIENT_READ_UNAUTH_BUCK_LENGTH 8
+#define CLIENT_READ_INVALID_MULTIBUCK_LENGTH 9
+#define CLIENT_READ_UNAUTH_MBUCK_COUNT 10
+#define CLIENT_READ_CONN_DISCONNECTED 11
+#define CLIENT_READ_CONN_CLOSED 12
+#define CLIENT_READ_REACHED_MAX_QUERYBUF 13
+
 /* Client block type (btype field in client structure)
  * if CLIENT_BLOCKED flag is set. */
 typedef enum blocking_type {
@@ -578,6 +609,12 @@ typedef enum {
 #define SHUTDOWN_NOW 4          /* Don't wait for replicas to catch up. */
 #define SHUTDOWN_FORCE 8        /* Don't let errors prevent shutdown. */
 
+/* IO thread pause status */
+#define IO_THREAD_UNPAUSED      0
+#define IO_THREAD_PAUSING       1
+#define IO_THREAD_PAUSED        2
+#define IO_THREAD_RESUMING      3
+
 /* Command call flags, see call() function */
 #define CMD_CALL_NONE 0
 #define CMD_CALL_PROPAGATE_AOF (1<<0)
@@ -1159,6 +1196,10 @@ typedef struct client {
     uint64_t id;            /* Client incremental unique ID. */
     uint64_t flags;         /* Client flags: CLIENT_* macros. */
     connection *conn;
+    uint8_t tid;            /* Thread assigned ID this client is bound to. */
+    uint8_t running_tid;    /* Thread assigned ID this client is running on. */
+    uint8_t io_flags;       /* Accessed by both main and IO threads, but not modified concurrently */
+    uint8_t read_error;     /* Client read error: CLIENT_READ_* macros. */
     int resp;               /* RESP protocol version. Can be 2 or 3. */
     redisDb *db;            /* Pointer to currently SELECTed DB. */
     robj *name;             /* As set by CLIENT SETNAME. */
@@ -1226,8 +1267,8 @@ typedef struct client {
     sds peerid;             /* Cached peer ID. */
     sds sockname;           /* Cached connection target address. */
     listNode *client_list_node; /* list node in client list */
+    listNode *io_thread_client_list_node; /* list node in io thread client list */
     listNode *postponed_list_node; /* list node within the postponed list */
-    listNode *pending_read_list_node; /* list node in clients pending read list */
     void *module_blocked_client; /* Pointer to the RedisModuleBlockedClient associated with this
                                   * client. This is set in case of module authentication before the
                                   * unblocked client is reprocessed to handle reply callbacks. */
@@ -1280,6 +1321,20 @@ typedef struct client {
 #endif
 } client;
 
+typedef struct __attribute__((aligned(CACHE_LINE_SIZE))) {
+    uint8_t id;                                 /* The unique ID assigned, if IO_THREADS_MAX_NUM is more
+                                                 * than 256, we should also promote the data type. */
+    pthread_t tid;                              /* Pthread ID */
+    redisAtomic int paused;                     /* Paused status for the io thread. */
+    aeEventLoop *el;                            /* Main event loop of io thread. */
+    list *pending_clients;                      /* List of clients with pending writes. */
+    list *processing_clients;                   /* List of clients being processed. */
+    eventNotifier *pending_clients_notifier;    /* Used to wake up the loop when write should be performed. */
+    pthread_mutex_t pending_clients_mutex;      /* Mutex for pending write list */
+    list *pending_clients_to_main_thread;       /* Clients that are waiting to be executed by the main thread. */
+    list *clients;                              /* IO thread managed clients. */
+} IOThread;
+
 /* ACL information */
 typedef struct aclInfo {
     long long user_auth_failures; /* Auth failure counts on user level */
@@ -1568,6 +1623,7 @@ struct redisServer {
     int errors_enabled;         /* If true, errorstats is enabled, and we will add new errors. */
     unsigned int lruclock; /* Clock for LRU eviction */
     volatile sig_atomic_t shutdown_asap; /* Shutdown ordered by signal handler. */
+    volatile sig_atomic_t crashing;      /* Server is crashing report. */
     mstime_t shutdown_mstime;   /* Timestamp to limit graceful shutdown. */
     int last_sig_received;      /* Indicates the last SIGNAL received, if any (e.g., SIGINT or SIGTERM). */
     int shutdown_flags;         /* Flags passed to prepareForShutdown(). */
@@ -1638,6 +1694,7 @@ struct redisServer {
     redisAtomic uint64_t next_client_id; /* Next client unique ID. Incremental. */
     int protected_mode;         /* Don't accept external connections. */
     int io_threads_num;         /* Number of IO threads to use. */
+    int io_threads_clients_num[IO_THREADS_MAX_NUM]; /* Number of clients assigned to each IO thread. */
     int io_threads_do_reads;    /* Read and parse from IO threads? */
     int io_threads_active;      /* Is IO threads currently active? */
     long long events_processed_while_blocked; /* processEventsWhileBlocked() */
@@ -1710,8 +1767,8 @@ struct redisServer {
     long long stat_unexpected_error_replies; /* Number of unexpected (aof-loading, replica to master, etc.) error replies */
     long long stat_total_error_replies; /* Total number of issued error replies ( command + rejected errors ) */
     long long stat_dump_payload_sanitizations; /* Number deep dump payloads integrity validations. */
-    long long stat_io_reads_processed; /* Number of read events processed by IO / Main threads */
-    long long stat_io_writes_processed; /* Number of write events processed by IO / Main threads */
+    redisAtomic long long stat_io_reads_processed; /* Number of read events processed by IO / Main threads */
+    redisAtomic long long stat_io_writes_processed; /* Number of write events processed by IO / Main threads */
     redisAtomic long long stat_total_reads_processed; /* Total number of read events processed */
     redisAtomic long long stat_total_writes_processed; /* Total number of write events processed */
     redisAtomic long long stat_client_qbuf_limit_disconnections;  /* Total number of clients reached query buf length limit */
@@ -2461,11 +2518,6 @@ typedef struct {
 #define OBJ_HASH_KEY 1
 #define OBJ_HASH_VALUE 2
 
-#define IO_THREADS_OP_IDLE 0
-#define IO_THREADS_OP_READ 1
-#define IO_THREADS_OP_WRITE 2
-extern int io_threads_op;
-
 /* Hash-field data type (of t_hash.c) */
 typedef mstr hfield;
 extern  mstrKind mstrFieldKind;
@@ -2680,9 +2732,6 @@ void whileBlockedCron(void);
 void blockingOperationStarts(void);
 void blockingOperationEnds(void);
 int handleClientsWithPendingWrites(void);
-int handleClientsWithPendingWritesUsingThreads(void);
-int handleClientsWithPendingReadsUsingThreads(void);
-int stopThreadedIOIfNeeded(void);
 int clientHasPendingReplies(client *c);
 int updateClientMemUsageAndBucket(client *c);
 void removeClientFromMemUsageBucket(client *c, int allow_eviction);
@@ -2691,13 +2740,32 @@ int writeToClient(client *c, int handler_installed);
 void linkClient(client *c);
 void protectClient(client *c);
 void unprotectClient(client *c);
-void initThreadedIO(void);
 client *lookupClientByID(uint64_t id);
 int authRequired(client *c);
 void putClientInPendingWriteQueue(client *c);
 /* reply macros */
 #define ADD_REPLY_BULK_CBUFFER_STRING_CONSTANT(c, str) addReplyBulkCBuffer(c, str, strlen(str))
 
+/* iothread.c - the threaded io implementation */
+void initThreadedIO(void);
+void killIOThreads(void);
+void pauseIOThread(int id);
+void resumeIOThread(int id);
+void pauseAllIOThreads(void);
+void resumeAllIOThreads(void);
+void pauseIOThreadsRange(int start, int end);
+void resumeIOThreadsRange(int start, int end);
+int resizeAllIOThreadsEventLoops(size_t newsize);
+int sendPendingClientsToIOThreads(void);
+void enqueuePendingClientsToMainThread(client *c, int unbind);
+void putInPendingClienstForIOThreads(client *c);
+void handleClientReadError(client *c);
+void unbindClientFromIOThreadEventLoop(client *c);
+void processClientsOfAllIOThreads(void);
+void assignClientToIOThread(client *c);
+void fetchClientFromIOThread(client *c);
+int isClientMustHandledByMainThread(client *c);
+
 /* logreqres.c - logging of requests and responses */
 void reqresReset(client *c, int free_buf);
 void reqresSaveClientReplyOffset(client *c);
@@ -3901,7 +3969,6 @@ void xorDigest(unsigned char *digest, const void *ptr, size_t len);
 sds catSubCommandFullname(const char *parent_name, const char *sub_name);
 void commandAddSubcommand(struct redisCommand *parent, struct redisCommand *subcommand, const char *declared_name);
 void debugDelay(int usec);
-void killIOThreads(void);
 void killThreads(void);
 void makeThreadKillable(void);
 void swapMainDbWithTempDb(redisDb *tempDb);
diff --git a/src/socket.c b/src/socket.c
index 33c28588a..fd6335251 100644
--- a/src/socket.c
+++ b/src/socket.c
@@ -53,11 +53,12 @@ static ConnectionType CT_Socket;
  * be embedded in different structs, not just client.
  */
 
-static connection *connCreateSocket(void) {
+static connection *connCreateSocket(struct aeEventLoop *el) {
     connection *conn = zcalloc(sizeof(connection));
     conn->type = &CT_Socket;
     conn->fd = -1;
     conn->iovcnt = IOV_MAX;
+    conn->el = el;
 
     return conn;
 }
@@ -72,9 +73,9 @@ static connection *connCreateSocket(void) {
  * is not in an error state (which is not possible for a socket connection,
  * but could but possible with other protocols).
  */
-static connection *connCreateAcceptedSocket(int fd, void *priv) {
+static connection *connCreateAcceptedSocket(struct aeEventLoop *el, int fd, void *priv) {
     UNUSED(priv);
-    connection *conn = connCreateSocket();
+    connection *conn = connCreateSocket(el);
     conn->fd = fd;
     conn->state = CONN_STATE_ACCEPTING;
     return conn;
@@ -93,7 +94,7 @@ static int connSocketConnect(connection *conn, const char *addr, int port, const
     conn->state = CONN_STATE_CONNECTING;
 
     conn->conn_handler = connect_handler;
-    aeCreateFileEvent(server.el, conn->fd, AE_WRITABLE,
+    aeCreateFileEvent(conn->el, conn->fd, AE_WRITABLE,
             conn->type->ae_handler, conn);
 
     return C_OK;
@@ -114,7 +115,7 @@ static void connSocketShutdown(connection *conn) {
 /* Close the connection and free resources. */
 static void connSocketClose(connection *conn) {
     if (conn->fd != -1) {
-        aeDeleteFileEvent(server.el,conn->fd, AE_READABLE | AE_WRITABLE);
+        if (conn->el) aeDeleteFileEvent(conn->el, conn->fd, AE_READABLE | AE_WRITABLE);
         close(conn->fd);
         conn->fd = -1;
     }
@@ -190,6 +191,15 @@ static int connSocketAccept(connection *conn, ConnectionCallbackFunc accept_hand
     return ret;
 }
 
+/* Rebind the connection to another event loop, read/write handlers must not
+ * be installed in the current event loop, otherwise it will cause two event
+ * loops to manage the same connection at the same time. */
+static int connSocketRebindEventLoop(connection *conn, aeEventLoop *el) {
+    serverAssert(!conn->el && !conn->read_handler && !conn->write_handler);
+    conn->el = el;
+    return C_OK;
+}
+
 /* Register a write handler, to be called when the connection is writable.
  * If NULL, the existing handler is removed.
  *
@@ -207,9 +217,9 @@ static int connSocketSetWriteHandler(connection *conn, ConnectionCallbackFunc fu
     else
         conn->flags &= ~CONN_FLAG_WRITE_BARRIER;
     if (!conn->write_handler)
-        aeDeleteFileEvent(server.el,conn->fd,AE_WRITABLE);
+        aeDeleteFileEvent(conn->el,conn->fd,AE_WRITABLE);
     else
-        if (aeCreateFileEvent(server.el,conn->fd,AE_WRITABLE,
+        if (aeCreateFileEvent(conn->el,conn->fd,AE_WRITABLE,
                     conn->type->ae_handler,conn) == AE_ERR) return C_ERR;
     return C_OK;
 }
@@ -222,9 +232,9 @@ static int connSocketSetReadHandler(connection *conn, ConnectionCallbackFunc fun
 
     conn->read_handler = func;
     if (!conn->read_handler)
-        aeDeleteFileEvent(server.el,conn->fd,AE_READABLE);
+        aeDeleteFileEvent(conn->el,conn->fd,AE_READABLE);
     else
-        if (aeCreateFileEvent(server.el,conn->fd,
+        if (aeCreateFileEvent(conn->el,conn->fd,
                     AE_READABLE,conn->type->ae_handler,conn) == AE_ERR) return C_ERR;
     return C_OK;
 }
@@ -250,7 +260,7 @@ static void connSocketEventHandler(struct aeEventLoop *el, int fd, void *clientD
             conn->state = CONN_STATE_CONNECTED;
         }
 
-        if (!conn->write_handler) aeDeleteFileEvent(server.el,conn->fd,AE_WRITABLE);
+        if (!conn->write_handler) aeDeleteFileEvent(conn->el, conn->fd, AE_WRITABLE);
 
         if (!callHandler(conn, conn->conn_handler)) return;
         conn->conn_handler = NULL;
@@ -291,7 +301,6 @@ static void connSocketAcceptHandler(aeEventLoop *el, int fd, void *privdata, int
     int cport, cfd;
     int max = server.max_new_conns_per_cycle;
     char cip[NET_IP_STR_LEN];
-    UNUSED(el);
     UNUSED(mask);
     UNUSED(privdata);
 
@@ -304,7 +313,7 @@ static void connSocketAcceptHandler(aeEventLoop *el, int fd, void *privdata, int
             return;
         }
         serverLog(LL_VERBOSE,"Accepted %s:%d", cip, cport);
-        acceptCommonHandler(connCreateAcceptedSocket(cfd, NULL),0,cip);
+        acceptCommonHandler(connCreateAcceptedSocket(el,cfd,NULL), 0, cip);
     }
 }
 
@@ -397,6 +406,10 @@ static ConnectionType CT_Socket = {
     .blocking_connect = connSocketBlockingConnect,
     .accept = connSocketAccept,
 
+    /* event loop */
+    .unbind_event_loop = NULL,
+    .rebind_event_loop = connSocketRebindEventLoop,
+
     /* IO */
     .write = connSocketWrite,
     .writev = connSocketWritev,
diff --git a/src/tls.c b/src/tls.c
index 3cc504ad1..3c7b5c0a0 100644
--- a/src/tls.c
+++ b/src/tls.c
@@ -75,10 +75,6 @@ static int parseProtocolsConfig(const char *str) {
     return protocols;
 }
 
-/* list of connections with pending data already read from the socket, but not
- * served to the reader yet. */
-static list *pending_list = NULL;
-
 /**
  * OpenSSL global initialization and locking handling callbacks.
  * Note that this is only required for OpenSSL < 1.1.0.
@@ -144,8 +140,6 @@ static void tlsInit(void) {
     if (!RAND_poll()) {
         serverLog(LL_WARNING, "OpenSSL: Failed to seed random number generator.");
     }
-
-    pending_list = listCreate();
 }
 
 static void tlsCleanup(void) {
@@ -435,20 +429,21 @@ typedef struct tls_connection {
     listNode *pending_list_node;
 } tls_connection;
 
-static connection *createTLSConnection(int client_side) {
+static connection *createTLSConnection(struct aeEventLoop *el, int client_side) {
     SSL_CTX *ctx = redis_tls_ctx;
     if (client_side && redis_tls_client_ctx)
         ctx = redis_tls_client_ctx;
     tls_connection *conn = zcalloc(sizeof(tls_connection));
     conn->c.type = &CT_TLS;
     conn->c.fd = -1;
+    conn->c.el = el;
     conn->c.iovcnt = IOV_MAX;
     conn->ssl = SSL_new(ctx);
     return (connection *) conn;
 }
 
-static connection *connCreateTLS(void) {
-    return createTLSConnection(1);
+static connection *connCreateTLS(struct aeEventLoop *el) {
+    return createTLSConnection(el, 1);
 }
 
 /* Fetch the latest OpenSSL error and store it in the connection */
@@ -468,10 +463,11 @@ static void updateTLSError(tls_connection *conn) {
  * Callers should use connGetState() and verify the created connection
  * is not in an error state.
  */
-static connection *connCreateAcceptedTLS(int fd, void *priv) {
+static connection *connCreateAcceptedTLS(struct aeEventLoop *el, int fd, void *priv) {
     int require_auth = *(int *)priv;
-    tls_connection *conn = (tls_connection *) createTLSConnection(0);
+    tls_connection *conn = (tls_connection *) createTLSConnection(el, 0);
     conn->c.fd = fd;
+    conn->c.el = el;
     conn->c.state = CONN_STATE_ACCEPTING;
 
     if (!conn->ssl) {
@@ -575,17 +571,17 @@ static int updateStateAfterSSLIO(tls_connection *conn, int ret_value, int update
 }
 
 static void registerSSLEvent(tls_connection *conn, WantIOType want) {
-    int mask = aeGetFileEvents(server.el, conn->c.fd);
+    int mask = aeGetFileEvents(conn->c.el, conn->c.fd);
 
     switch (want) {
         case WANT_READ:
-            if (mask & AE_WRITABLE) aeDeleteFileEvent(server.el, conn->c.fd, AE_WRITABLE);
-            if (!(mask & AE_READABLE)) aeCreateFileEvent(server.el, conn->c.fd, AE_READABLE,
+            if (mask & AE_WRITABLE) aeDeleteFileEvent(conn->c.el, conn->c.fd, AE_WRITABLE);
+            if (!(mask & AE_READABLE)) aeCreateFileEvent(conn->c.el, conn->c.fd, AE_READABLE,
                         tlsEventHandler, conn);
             break;
         case WANT_WRITE:
-            if (mask & AE_READABLE) aeDeleteFileEvent(server.el, conn->c.fd, AE_READABLE);
-            if (!(mask & AE_WRITABLE)) aeCreateFileEvent(server.el, conn->c.fd, AE_WRITABLE,
+            if (mask & AE_READABLE) aeDeleteFileEvent(conn->c.el, conn->c.fd, AE_READABLE);
+            if (!(mask & AE_WRITABLE)) aeCreateFileEvent(conn->c.el, conn->c.fd, AE_WRITABLE,
                         tlsEventHandler, conn);
             break;
         default:
@@ -595,19 +591,42 @@ static void registerSSLEvent(tls_connection *conn, WantIOType want) {
 }
 
 static void updateSSLEvent(tls_connection *conn) {
-    int mask = aeGetFileEvents(server.el, conn->c.fd);
+    serverAssert(conn->c.el);
+    int mask = aeGetFileEvents(conn->c.el, conn->c.fd);
     int need_read = conn->c.read_handler || (conn->flags & TLS_CONN_FLAG_WRITE_WANT_READ);
     int need_write = conn->c.write_handler || (conn->flags & TLS_CONN_FLAG_READ_WANT_WRITE);
 
     if (need_read && !(mask & AE_READABLE))
-        aeCreateFileEvent(server.el, conn->c.fd, AE_READABLE, tlsEventHandler, conn);
+        aeCreateFileEvent(conn->c.el, conn->c.fd, AE_READABLE, tlsEventHandler, conn);
     if (!need_read && (mask & AE_READABLE))
-        aeDeleteFileEvent(server.el, conn->c.fd, AE_READABLE);
+        aeDeleteFileEvent(conn->c.el, conn->c.fd, AE_READABLE);
 
     if (need_write && !(mask & AE_WRITABLE))
-        aeCreateFileEvent(server.el, conn->c.fd, AE_WRITABLE, tlsEventHandler, conn);
+        aeCreateFileEvent(conn->c.el, conn->c.fd, AE_WRITABLE, tlsEventHandler, conn);
     if (!need_write && (mask & AE_WRITABLE))
-        aeDeleteFileEvent(server.el, conn->c.fd, AE_WRITABLE);
+        aeDeleteFileEvent(conn->c.el, conn->c.fd, AE_WRITABLE);
+}
+
+/* Add a connection to the list of connections with pending data that has
+ * already been read from the socket but has not yet been served to the reader. */
+static void tlsPendingAdd(tls_connection *conn) {
+    if (!conn->c.el->privdata[1])
+        conn->c.el->privdata[1] = listCreate();
+
+    list *pending_list = conn->c.el->privdata[1];
+    if (!conn->pending_list_node) {
+        listAddNodeTail(pending_list, conn);
+        conn->pending_list_node = listLast(pending_list);
+    }
+}
+
+/* Removes a connection from the list of connections with pending data. */
+static void tlsPendingRemove(tls_connection *conn) {
+    if (conn->pending_list_node) {
+        list *pending_list = conn->c.el->privdata[1];
+        listDelNode(pending_list, conn->pending_list_node);
+        conn->pending_list_node = NULL;
+    }
 }
 
 static void tlsHandleEvent(tls_connection *conn, int mask) {
@@ -718,13 +737,9 @@ static void tlsHandleEvent(tls_connection *conn, int mask) {
              * to a list of pending connection that should be handled anyway. */
             if ((mask & AE_READABLE)) {
                 if (SSL_pending(conn->ssl) > 0) {
-                    if (!conn->pending_list_node) {
-                        listAddNodeTail(pending_list, conn);
-                        conn->pending_list_node = listLast(pending_list);
-                    }
+                    tlsPendingAdd(conn);
                 } else if (conn->pending_list_node) {
-                    listDelNode(pending_list, conn->pending_list_node);
-                    conn->pending_list_node = NULL;
+                    tlsPendingRemove(conn);
                 }
             }
 
@@ -734,7 +749,8 @@ static void tlsHandleEvent(tls_connection *conn, int mask) {
             break;
     }
 
-    updateSSLEvent(conn);
+    /* The event loop may have been unbound during the event processing above. */
+    if (conn->c.el) updateSSLEvent(conn);
 }
 
 static void tlsEventHandler(struct aeEventLoop *el, int fd, void *clientData, int mask) {
@@ -748,7 +764,6 @@ static void tlsAcceptHandler(aeEventLoop *el, int fd, void *privdata, int mask)
     int cport, cfd;
     int max = server.max_new_tls_conns_per_cycle;
     char cip[NET_IP_STR_LEN];
-    UNUSED(el);
     UNUSED(mask);
     UNUSED(privdata);
 
@@ -761,7 +776,7 @@ static void tlsAcceptHandler(aeEventLoop *el, int fd, void *privdata, int mask)
             return;
         }
         serverLog(LL_VERBOSE,"Accepted %s:%d", cip, cport);
-        acceptCommonHandler(connCreateAcceptedTLS(cfd, &server.tls_auth_clients),0,cip);
+        acceptCommonHandler(connCreateAcceptedTLS(el,cfd,&server.tls_auth_clients), 0, cip);
     }
 }
 
@@ -806,6 +821,7 @@ static void connTLSClose(connection *conn_) {
     }
 
     if (conn->pending_list_node) {
+        list *pending_list = conn->c.el->privdata[1];
         listDelNode(pending_list, conn->pending_list_node);
         conn->pending_list_node = NULL;
     }
@@ -863,6 +879,33 @@ static int connTLSConnect(connection *conn_, const char *addr, int port, const c
     return C_OK;
 }
 
+static void connTLSUnbindEventLoop(connection *conn_) {
+    tls_connection *conn = (tls_connection *) conn_;
+
+    /* We need to remove all events from the old event loop. The subsequent
+     * updateSSLEvent() will add the appropriate events to the new event loop. */
+    if (conn->c.el) {
+        int mask = aeGetFileEvents(conn->c.el, conn->c.fd);
+        if (mask & AE_READABLE) aeDeleteFileEvent(conn->c.el, conn->c.fd, AE_READABLE);
+        if (mask & AE_WRITABLE) aeDeleteFileEvent(conn->c.el, conn->c.fd, AE_WRITABLE);
+
+        /* Check if there are pending events and handle accordingly. */
+        int has_pending = conn->pending_list_node != NULL;
+        if (has_pending) tlsPendingRemove(conn);
+    }
+}
+
+static int connTLSRebindEventLoop(connection *conn_, aeEventLoop *el) {
+    tls_connection *conn = (tls_connection *) conn_;
+    serverAssert(!conn->c.el && !conn->c.read_handler &&
+                 !conn->c.write_handler && !conn->pending_list_node);
+    conn->c.el = el;
+    if (el && SSL_pending(conn->ssl)) tlsPendingAdd(conn);
+    /* Add the appropriate events to the new event loop. */
+    updateSSLEvent((tls_connection *) conn);
+    return C_OK;
+}
+
 static int connTLSWrite(connection *conn_, const void *data, size_t data_len) {
     tls_connection *conn = (tls_connection *) conn_;
     int ret;
@@ -1044,16 +1087,19 @@ static const char *connTLSGetType(connection *conn_) {
     return CONN_TYPE_TLS;
 }
 
-static int tlsHasPendingData(void) {
+static int tlsHasPendingData(struct aeEventLoop *el) {
+    list *pending_list = el->privdata[1];
     if (!pending_list)
         return 0;
     return listLength(pending_list) > 0;
 }
 
-static int tlsProcessPendingData(void) {
+static int tlsProcessPendingData(struct aeEventLoop *el) {
     listIter li;
     listNode *ln;
 
+    list *pending_list = el->privdata[1];
+    if (!pending_list) return 0;
     int processed = listLength(pending_list);
     listRewind(pending_list,&li);
     while((ln = listNext(&li))) {
@@ -1114,6 +1160,10 @@ static ConnectionType CT_TLS = {
     .blocking_connect = connTLSBlockingConnect,
     .accept = connTLSAccept,
 
+    /* event loop */
+    .unbind_event_loop = connTLSUnbindEventLoop,
+    .rebind_event_loop = connTLSRebindEventLoop,
+
     /* IO */
     .read = connTLSRead,
     .write = connTLSWrite,
diff --git a/src/tracking.c b/src/tracking.c
index 8ff14369d..5eec3e1d1 100644
--- a/src/tracking.c
+++ b/src/tracking.c
@@ -253,6 +253,7 @@ void trackingRememberKeys(client *tracking, client *executing) {
  * - Following a flush command, to send a single RESP NULL to indicate
  *   that all keys are now invalid. */
 void sendTrackingMessage(client *c, char *keyname, size_t keylen, int proto) {
+    int paused = 0;
     uint64_t old_flags = c->flags;
     c->flags |= CLIENT_PUSHING;
 
@@ -275,6 +276,11 @@ void sendTrackingMessage(client *c, char *keyname, size_t keylen, int proto) {
         if (!(old_flags & CLIENT_PUSHING)) c->flags &= ~CLIENT_PUSHING;
         c = redir;
         using_redirection = 1;
+        /* Start to touch another client data. */
+        if (c->running_tid != IOTHREAD_MAIN_THREAD_ID) {
+            pauseIOThread(c->running_tid);
+            paused = 1;
+        }
         old_flags = c->flags;
         c->flags |= CLIENT_PUSHING;
     }
@@ -296,7 +302,7 @@ void sendTrackingMessage(client *c, char *keyname, size_t keylen, int proto) {
          * it since RESP2 does not support push messages in the same
          * connection. */
         if (!(old_flags & CLIENT_PUSHING)) c->flags &= ~CLIENT_PUSHING;
-        return;
+        goto done;
     }
 
     /* Send the "value" part, which is the array of keys. */
@@ -308,6 +314,17 @@ void sendTrackingMessage(client *c, char *keyname, size_t keylen, int proto) {
     }
     updateClientMemUsageAndBucket(c);
     if (!(old_flags & CLIENT_PUSHING)) c->flags &= ~CLIENT_PUSHING;
+
+done:
+    if (paused) {
+        if (clientHasPendingReplies(c)) {
+            serverAssert(!(c->flags & CLIENT_PENDING_WRITE));
+            /* Actually we install write handler of client which is in IO thread
+             * event loop, it is safe since the io thread is paused */
+            connSetWriteHandler(c->conn, sendReplyToClient);
+        }
+        resumeIOThread(c->running_tid);
+    }
 }
 
 /* This function is called when a key is modified in Redis and in the case
diff --git a/src/unix.c b/src/unix.c
index eb5850765..b61cb6d49 100644
--- a/src/unix.c
+++ b/src/unix.c
@@ -74,18 +74,19 @@ static int connUnixListen(connListener *listener) {
     return C_OK;
 }
 
-static connection *connCreateUnix(void) {
+static connection *connCreateUnix(struct aeEventLoop *el) {
     connection *conn = zcalloc(sizeof(connection));
     conn->type = &CT_Unix;
     conn->fd = -1;
     conn->iovcnt = IOV_MAX;
+    conn->el = el;
 
     return conn;
 }
 
-static connection *connCreateAcceptedUnix(int fd, void *priv) {
+static connection *connCreateAcceptedUnix(struct aeEventLoop *el, int fd, void *priv) {
     UNUSED(priv);
-    connection *conn = connCreateUnix();
+    connection *conn = connCreateUnix(el);
     conn->fd = fd;
     conn->state = CONN_STATE_ACCEPTING;
     return conn;
@@ -107,7 +108,7 @@ static void connUnixAcceptHandler(aeEventLoop *el, int fd, void *privdata, int m
             return;
         }
         serverLog(LL_VERBOSE,"Accepted connection to %s", server.unixsocket);
-        acceptCommonHandler(connCreateAcceptedUnix(cfd, NULL),CLIENT_UNIX_SOCKET,NULL);
+        acceptCommonHandler(connCreateAcceptedUnix(el, cfd, NULL),CLIENT_UNIX_SOCKET,NULL);
     }
 }
 
@@ -123,6 +124,10 @@ static int connUnixAccept(connection *conn, ConnectionCallbackFunc accept_handle
     return connectionTypeTcp()->accept(conn, accept_handler);
 }
 
+static int connUnixRebindEventLoop(connection *conn, aeEventLoop *el) {
+    return connectionTypeTcp()->rebind_event_loop(conn, el);
+}
+
 static int connUnixWrite(connection *conn, const void *data, size_t data_len) {
     return connectionTypeTcp()->write(conn, data, data_len);
 }
@@ -186,6 +191,10 @@ static ConnectionType CT_Unix = {
     .blocking_connect = NULL,
     .accept = connUnixAccept,
 
+    /* event loop */
+    .unbind_event_loop = NULL,
+    .rebind_event_loop = connUnixRebindEventLoop,
+
     /* IO */
     .write = connUnixWrite,
     .writev = connUnixWritev,
diff --git a/tests/integration/shutdown.tcl b/tests/integration/shutdown.tcl
index b2ec32cbd..4169d64b7 100644
--- a/tests/integration/shutdown.tcl
+++ b/tests/integration/shutdown.tcl
@@ -156,6 +156,11 @@ test "Shutting down master waits for replica then fails" {
             set rd2 [redis_deferring_client -1]
             $rd1 shutdown
             $rd2 shutdown
+            wait_for_condition 100 10 {
+                [llength [regexp -all -inline {cmd=shutdown} [$master client list]]] eq 2
+            } else {
+                fail "shutdown did not arrive"
+            }
             set info_clients [$master info clients]
             assert_match "*connected_clients:3*" $info_clients
             assert_match "*blocked_clients:2*" $info_clients
@@ -209,6 +214,11 @@ test "Shutting down master waits for replica then aborted" {
             set rd2 [redis_deferring_client -1]
             $rd1 shutdown
             $rd2 shutdown
+            wait_for_condition 100 10 {
+                [llength [regexp -all -inline {cmd=shutdown} [$master client list]]] eq 2
+            } else {
+                fail "shutdown did not arrive"
+            }
             set info_clients [$master info clients]
             assert_match "*connected_clients:3*" $info_clients
             assert_match "*blocked_clients:2*" $info_clients
diff --git a/tests/support/util.tcl b/tests/support/util.tcl
index f374c3dc9..c240a286c 100644
--- a/tests/support/util.tcl
+++ b/tests/support/util.tcl
@@ -698,6 +698,16 @@ proc latencyrstat_percentiles {cmd r} {
     }
 }
 
+proc get_io_thread_clients {id {client r}} {
+    set pattern "io_thread_$id:clients=(\[0-9\]+)"
+    set info [$client info threads]
+    if {[regexp $pattern $info _ value]} {
+        return $value
+    } else {
+        return -1
+    }
+}
+
 proc generate_fuzzy_traffic_on_key {key type duration} {
     # Commands per type, blocking commands removed
     # TODO: extract these from COMMAND DOCS, and improve to include other types
diff --git a/tests/unit/client-eviction.tcl b/tests/unit/client-eviction.tcl
index 7e8270aa8..3caaf9bd4 100644
--- a/tests/unit/client-eviction.tcl
+++ b/tests/unit/client-eviction.tcl
@@ -108,7 +108,11 @@ start_server {} {
             $rr write [join [list "*1\r\n\$$maxmemory_clients_actual\r\n" [string repeat v $maxmemory_clients_actual]] ""]
             $rr flush
         } e
-        assert {![client_exists $cname]}
+        wait_for_condition 100 10 {
+            ![client_exists $cname]
+        } else {
+            fail "Failed to evict client"
+        }
         $rr close
 
         # Restore settings
@@ -360,6 +364,13 @@ start_server {} {
         resume_process $server_pid
         r ping ;# make sure a full event loop cycle is processed before issuing CLIENT LIST
 
+        # wait for get commands to be processed
+        wait_for_condition 100 10 {
+            [expr {[regexp {calls=(\d+)} [cmdrstat get r] -> calls] ? $calls : 0}] >= 2
+        } else {
+            fail "get did not arrive"
+        }
+
         # Validate obuf-clients were disconnected (because of obuf limit)
         catch {client_field obuf-client1 name} e
         assert_match {no client named obuf-client1 found*} $e
@@ -367,7 +378,9 @@ start_server {} {
         assert_match {no client named obuf-client2 found*} $e
 
         # Validate qbuf-client is still connected and wasn't evicted
-        assert_equal [client_field qbuf-client name] {qbuf-client}
+        if {[lindex [r config get io-threads] 1] == 1} {
+            assert_equal [client_field qbuf-client name] {qbuf-client}
+        }
 
         $rr1 close
         $rr2 close
@@ -404,8 +417,11 @@ start_server {} {
 
         # Decrease maxmemory_clients and expect client eviction
         r config set maxmemory-clients [expr $maxmemory_clients / 2]
-        set connected_clients [llength [lsearch -all [split [string trim [r client list]] "\r\n"] *name=client*]]
-        assert {$connected_clients > 0 && $connected_clients < $client_count}
+        wait_for_condition 200 10 {
+            [llength [regexp -all -inline {name=client} [r client list]]] < $client_count
+        } else {
+            fail "Failed to evict clients"
+        }
 
         foreach rr $rrs {$rr close}
     }
@@ -463,8 +479,11 @@ start_server {} {
         assert {$total_client_mem <= $maxmemory_clients}
 
         # Make sure we have only half of our clients now
-        set connected_clients [llength [lsearch -all [split [string trim [r client list]] "\r\n"] *name=client*]]
-        assert {$connected_clients == [expr $client_count / 2]}
+        wait_for_condition 200 100 {
+            [llength [regexp -all -inline {name=client} [r client list]]] == $client_count / 2
+        } else {
+            fail "Failed to evict clients"
+        }
 
         # Restore the reply buffer resize to default
         r debug replybuffer resizing 1
@@ -519,7 +538,8 @@ start_server {} {
         foreach size [lreverse $sizes] {
             set control_mem [client_field control tot-mem]
             set total_mem [expr $total_mem - $clients_per_size * $size]
-            r config set maxmemory-clients [expr $total_mem + $control_mem]
+            # allow some tolerance when using io threads
+            r config set maxmemory-clients [expr $total_mem + $control_mem + 1000]
             set clients [split [string trim [r client list]] "\r\n"]
             # Verify only relevant clients were evicted
             for {set i 0} {$i < [llength $sizes]} {incr i} {
diff --git a/tests/unit/info.tcl b/tests/unit/info.tcl
index 6e2d381f5..fc66fb510 100644
--- a/tests/unit/info.tcl
+++ b/tests/unit/info.tcl
@@ -313,7 +313,7 @@ start_server {tags {"info" "external:skip"}} {
             assert_lessthan $cycle2 [expr $cycle1+10] ;# we expect 2 or 3 cycles here, but allow some tolerance
             if {$::verbose} { puts "eventloop metrics el_sum1: $el_sum1, el_sum2: $el_sum2" }
             assert_morethan $el_sum2 $el_sum1
-            assert_lessthan $el_sum2 [expr $el_sum1+30000] ;# we expect roughly 100ms here, but allow some tolerance
+            assert_lessthan $el_sum2 [expr $el_sum1+100000] ;# we expect roughly 100ms here, but allow some tolerance
             if {$::verbose} { puts "eventloop metrics cmd_sum1: $cmd_sum1, cmd_sum2: $cmd_sum2" }
             assert_morethan $cmd_sum2 $cmd_sum1
             assert_lessthan $cmd_sum2 [expr $cmd_sum1+15000] ;# we expect about tens of ms here, but allow some tolerance
diff --git a/tests/unit/introspection.tcl b/tests/unit/introspection.tcl
index fbd1d14fe..2ba1a8c96 100644
--- a/tests/unit/introspection.tcl
+++ b/tests/unit/introspection.tcl
@@ -6,8 +6,13 @@ start_server {tags {"introspection"}} {
     }
 
     test {CLIENT LIST} {
-        r client list
-    } {id=* addr=*:* laddr=*:* fd=* name=* age=* idle=* flags=N db=* sub=0 psub=0 ssub=0 multi=-1 watch=0 qbuf=26 qbuf-free=* argv-mem=* multi-mem=0 rbs=* rbp=* obl=0 oll=0 omem=0 tot-mem=* events=r cmd=client|list user=* redir=-1 resp=*}
+        set client_list [r client list]
+        if {[lindex [r config get io-threads] 1] == 1} {
+            assert_match {id=* addr=*:* laddr=*:* fd=* name=* age=* idle=* flags=N db=* sub=0 psub=0 ssub=0 multi=-1 watch=0 qbuf=26 qbuf-free=* argv-mem=* multi-mem=0 rbs=* rbp=* obl=0 oll=0 omem=0 tot-mem=* events=r cmd=client|list user=* redir=-1 resp=*} $client_list
+        } else {
+            assert_match {id=* addr=*:* laddr=*:* fd=* name=* age=* idle=* flags=N db=* sub=0 psub=0 ssub=0 multi=-1 watch=0 qbuf=0 qbuf-free=* argv-mem=* multi-mem=0 rbs=* rbp=* obl=0 oll=0 omem=0 tot-mem=* events=r cmd=client|list user=* redir=-1 resp=*} $client_list
+        }
+    }
 
     test {CLIENT LIST with IDs} {
         set myid [r client id]
@@ -16,8 +21,13 @@ start_server {tags {"introspection"}} {
     }
 
     test {CLIENT INFO} {
-        r client info
-    } {id=* addr=*:* laddr=*:* fd=* name=* age=* idle=* flags=N db=* sub=0 psub=0 ssub=0 multi=-1 watch=0 qbuf=26 qbuf-free=* argv-mem=* multi-mem=0 rbs=* rbp=* obl=0 oll=0 omem=0 tot-mem=* events=r cmd=client|info user=* redir=-1 resp=*}
+        set client [r client info]
+        if {[lindex [r config get io-threads] 1] == 1} {
+            assert_match {id=* addr=*:* laddr=*:* fd=* name=* age=* idle=* flags=N db=* sub=0 psub=0 ssub=0 multi=-1 watch=0 qbuf=26 qbuf-free=* argv-mem=* multi-mem=0 rbs=* rbp=* obl=0 oll=0 omem=0 tot-mem=* events=r cmd=client|info user=* redir=-1 resp=*} $client
+        } else {
+            assert_match {id=* addr=*:* laddr=*:* fd=* name=* age=* idle=* flags=N db=* sub=0 psub=0 ssub=0 multi=-1 watch=0 qbuf=0 qbuf-free=* argv-mem=* multi-mem=0 rbs=* rbp=* obl=0 oll=0 omem=0 tot-mem=* events=r cmd=client|info user=* redir=-1 resp=*} $client
+        }
+    } 
 
     test {CLIENT KILL with illegal arguments} {
         assert_error "ERR wrong number of arguments for 'client|kill' command" {r client kill}
@@ -86,6 +96,11 @@ start_server {tags {"introspection"}} {
         assert {$connected_clients >= 3}
         set res [r client kill skipme yes]
         assert {$res == $connected_clients - 1}
+        wait_for_condition 1000 10 {
+            [s connected_clients] eq 1
+        } else {
+            fail "Can't kill all clients except the current one"
+        }
 
         # Kill all clients, including `me`
         set rd3 [redis_deferring_client]
@@ -304,6 +319,9 @@ start_server {tags {"introspection"}} {
         $rd read ; # Discard the OK
         
         $bc blpop mylist 0
+        # make sure the blpop arrives first
+        $bc flush
+        after 100
         wait_for_blocked_clients_count 1
         r lpush mylist 1
         wait_for_blocked_clients_count 0
@@ -904,3 +922,62 @@ test {CONFIG REWRITE handles alias config properly} {
         assert_equal [r config get hash-max-listpack-entries] {hash-max-listpack-entries 100}
     }
 } {} {external:skip}
+
+test {IO threads client number} {
+    start_server {overrides {io-threads 2} tags {external:skip}} {
+        set iothread_clients [get_io_thread_clients 1]
+        assert_equal $iothread_clients [s connected_clients]
+        assert_equal [get_io_thread_clients 0] 0
+
+        r script debug yes ; # Transfer to main thread
+        assert_equal [get_io_thread_clients 0] 1
+        assert_equal [get_io_thread_clients 1] [expr $iothread_clients - 1]
+
+        set iothread_clients [get_io_thread_clients 1]
+        set rd1 [redis_deferring_client]
+        set rd2 [redis_deferring_client]
+        assert_equal [get_io_thread_clients 1] [expr $iothread_clients + 2]
+        $rd1 close
+        $rd2 close
+        wait_for_condition 1000 10 {
+            [get_io_thread_clients 1] eq $iothread_clients
+        } else {
+            fail "Fail to close clients of io thread 1"
+        }
+        assert_equal [get_io_thread_clients 0] 1
+
+        r script debug no ; # Transfer to io thread
+        assert_equal [get_io_thread_clients 0] 0
+        assert_equal [get_io_thread_clients 1] [expr $iothread_clients + 1]
+    }
+}
+
+test {Clients are evenly distributed among io threads} {
+    start_server {overrides {io-threads 4} tags {external:skip}} {
+        set cur_clients [s connected_clients]
+        assert_equal $cur_clients 1
+        global rdclients
+        for {set i 1} {$i < 9} {incr i} {
+            set rdclients($i) [redis_deferring_client]
+        }
+        for {set i 1} {$i <= 3} {incr i} {
+            assert_equal [get_io_thread_clients $i] 3
+        }
+
+        $rdclients(3) close
+        $rdclients(4) close
+        wait_for_condition 1000 10 {
+            [get_io_thread_clients 1] eq 2 &&
+            [get_io_thread_clients 2] eq 2 &&
+            [get_io_thread_clients 3] eq 3
+        } else {
+            fail "Fail to close clients"
+        }
+
+        set  $rdclients(3) [redis_deferring_client]
+        set  $rdclients(4) [redis_deferring_client]
+        for {set i 1} {$i <= 3} {incr i} {
+            assert_equal [get_io_thread_clients $i] 3
+        }
+    }
+}
diff --git a/tests/unit/lazyfree.tcl b/tests/unit/lazyfree.tcl
index b4ade4031..cb3a4b014 100644
--- a/tests/unit/lazyfree.tcl
+++ b/tests/unit/lazyfree.tcl
@@ -10,6 +10,7 @@ start_server {tags {"lazyfree"}} {
         set peak_mem [s used_memory]
         assert {[r unlink myset] == 1}
         assert {$peak_mem > $orig_mem+1000000}
+        reconnect ;# free the memory of reused argv of client
         wait_for_condition 50 100 {
             [s used_memory] < $peak_mem &&
             [s used_memory] < $orig_mem*2
@@ -32,6 +33,7 @@ start_server {tags {"lazyfree"}} {
         set peak_mem [s used_memory]
         r flushdb async
         assert {$peak_mem > $orig_mem+1000000}
+        reconnect ;# free the memory of reused argv of client
         wait_for_condition 50 100 {
             [s used_memory] < $peak_mem &&
             [s used_memory] < $orig_mem*2
diff --git a/tests/unit/maxmemory.tcl b/tests/unit/maxmemory.tcl
index 363dab472..966ac4487 100644
--- a/tests/unit/maxmemory.tcl
+++ b/tests/unit/maxmemory.tcl
@@ -29,7 +29,11 @@ start_server {tags {"maxmemory" "external:skip"}} {
         set dbsize [r dbsize]
         
         if $client_eviction {
-            return [expr $evicted_clients > 0 && $evicted_keys == 0 && $dbsize == 50]
+            if {[lindex [r config get io-threads] 1] == 1} {
+                return [expr $evicted_clients > 0 && $evicted_keys == 0 && $dbsize == 50]
+            } else {
+                return [expr $evicted_clients >= 0 && $evicted_keys >= 0 && $dbsize <= 50]
+            }
         } else {
             return [expr $evicted_clients == 0 && $evicted_keys > 0 && $dbsize < 50]
         }
diff --git a/tests/unit/memefficiency.tcl b/tests/unit/memefficiency.tcl
index 130289aff..92c1f572c 100644
--- a/tests/unit/memefficiency.tcl
+++ b/tests/unit/memefficiency.tcl
@@ -420,7 +420,10 @@ run_solo {defrag} {
                 $rd_pubsub read ; # Discard subscribe replies
                 $rd_pubsub ssubscribe $channel_name
                 $rd_pubsub read ; # Discard ssubscribe replies
-                $rd set k$j $channel_name
+                # Pub/Sub clients are handled in the main thread, so their memory is
+                # allocated there. Using the SETBIT command avoids the main thread
+                # referencing argv from IO threads.
+                $rd setbit k$j [expr {[string length $channel_name] * 8}] 1
                 $rd read ; # Discard set replies
             }
 
@@ -583,6 +586,123 @@ run_solo {defrag} {
             }
         }
 
+        test "Active defrag for argv retained by the main thread from IO thread: $type" {
+            r flushdb
+            r config set hz 100
+            r config set activedefrag no
+            wait_for_defrag_stop 500 100
+            r config resetstat
+            set io_threads [lindex [r config get io-threads] 1]
+            if {$io_threads == 1} {
+                r config set active-defrag-threshold-lower 5
+            } else {
+                r config set active-defrag-threshold-lower 10
+            }
+            r config set active-defrag-cycle-min 65
+            r config set active-defrag-cycle-max 75
+            r config set active-defrag-ignore-bytes 1000kb
+            r config set maxmemory 0
+
+            # Create some clients so that they are distributed among different io threads.
+            set clients {}
+            for {set i 0} {$i < 8} {incr i} {
+                lappend clients [redis_client]
+            }
+
+            # Populate memory with interleaving key pattern of same size
+            set dummy "[string repeat x 400]"
+            set n 10000
+            for {set i 0} {$i < [llength $clients]} {incr i} {
+                set rr [lindex $clients $i]
+                for {set j 0} {$j < $n} {incr j} {
+                    $rr set "k$i-$j" $dummy
+                }
+            }
+
+            # If io-threads is enable, verify that memory allocation is not from the main thread.
+            if {$io_threads != 1} {
+                # At least make sure that bin 448 is created in the main thread's arena.
+                r set k dummy
+                r del k
+
+                # We created 10000 string keys of 400 bytes each for each client, so when the memory
+                # allocation for the 448 bin in the main thread is significantly smaller than this,
+                # we can conclude that the memory allocation is not coming from it.
+                set malloc_stats [r memory malloc-stats]
+                if {[regexp {(?s)arenas\[0\]:.*?448[ ]+[\d]+[ ]+([\d]+)[ ]} $malloc_stats - allocated]} {
+                    # Ensure the allocation for bin 448 in the main thread’s arena
+                    # is far less than 4375k (10000 * 448 bytes).
+                    assert_lessthan $allocated 200000
+                } else {
+                    fail "Failed to get the main thread's malloc stats."
+                }
+            }
+
+            after 120 ;# serverCron only updates the info once in 100ms
+            if {$::verbose} {
+                puts "used [s allocator_allocated]"
+                puts "rss [s allocator_active]"
+                puts "frag [s allocator_frag_ratio]"
+                puts "frag_bytes [s allocator_frag_bytes]"
+            }
+            assert_lessthan [s allocator_frag_ratio] 1.05
+
+            # Delete keys with even indices to create fragmentation.
+            for {set i 0} {$i < [llength $clients]} {incr i} {
+                set rd [lindex $clients $i]
+                for {set j 0} {$j < $n} {incr j 2} {
+                    $rd del "k$i-$j"
+                }
+            }
+            for {set i 0} {$i < [llength $clients]} {incr i} {
+                [lindex $clients $i] close
+            }
+
+            after 120 ;# serverCron only updates the info once in 100ms
+            if {$::verbose} {
+                puts "used [s allocator_allocated]"
+                puts "rss [s allocator_active]"
+                puts "frag [s allocator_frag_ratio]"
+                puts "frag_bytes [s allocator_frag_bytes]"
+            }
+            assert_morethan [s allocator_frag_ratio] 1.4
+
+            catch {r config set activedefrag yes} e
+            if {[r config get activedefrag] eq "activedefrag yes"} {
+            
+                # wait for the active defrag to start working (decision once a second)
+                wait_for_condition 50 100 {
+                    [s total_active_defrag_time] ne 0
+                } else {
+                    after 120 ;# serverCron only updates the info once in 100ms
+                    puts [r info memory]
+                    puts [r info stats]
+                    puts [r memory malloc-stats]
+                    fail "defrag not started."
+                }
+
+                # wait for the active defrag to stop working
+                wait_for_defrag_stop 500 100
+
+                # test the fragmentation is lower
+                after 120 ;# serverCron only updates the info once in 100ms
+                if {$::verbose} {
+                    puts "used [s allocator_allocated]"
+                    puts "rss [s allocator_active]"
+                    puts "frag [s allocator_frag_ratio]"
+                    puts "frag_bytes [s allocator_frag_bytes]"
+                }
+
+                if {$io_threads == 1} {
+                    assert_lessthan_equal [s allocator_frag_ratio] 1.05
+                } else {
+                    # TODO: When multithreading is enabled, argv may be created in the io thread
+                    # and kept in the main thread, which can cause fragmentation to become worse.
+                    assert_lessthan_equal [s allocator_frag_ratio] 1.1
+                }
+            }
+        }
+
         if {$type eq "standalone"} { ;# skip in cluster mode
         test "Active defrag big list: $type" {
             r flushdb
diff --git a/tests/unit/moduleapi/blockedclient.tcl b/tests/unit/moduleapi/blockedclient.tcl
index 22b2c4bae..28cc76fe8 100644
--- a/tests/unit/moduleapi/blockedclient.tcl
+++ b/tests/unit/moduleapi/blockedclient.tcl
@@ -130,7 +130,12 @@ foreach call_type {nested normal} {
         $rd flush
 
         # make sure we get BUSY error, and that we didn't get it too early
-        assert_error {*BUSY Slow module operation*} {r ping}
+        wait_for_condition 50 100 {
+            ([catch {r ping} reply] == 1) &&
+            ([string match {*BUSY Slow module operation*} $reply])
+        } else {
+            fail "Failed waiting for busy slow response"
+        }
         assert_morethan_equal [expr [clock clicks -milliseconds]-$start] $busy_time_limit
 
         # abort the blocking operation
diff --git a/tests/unit/pubsub.tcl b/tests/unit/pubsub.tcl
index 1defb5158..9a4f1196b 100644
--- a/tests/unit/pubsub.tcl
+++ b/tests/unit/pubsub.tcl
@@ -85,6 +85,11 @@ start_server {tags {"pubsub network"}} {
         set rd1 [redis_deferring_client]
         assert_equal {1 2 3} [subscribe $rd1 {chan1 chan2 chan3}]
         unsubscribe $rd1
+        wait_for_condition 100 10 {
+            [regexp {cmd=unsubscribe} [r client list]] eq 1
+        } else {
+            fail "unsubscribe did not arrive"
+        }
         assert_equal 0 [r publish chan1 hello]
         assert_equal 0 [r publish chan2 hello]
         assert_equal 0 [r publish chan3 hello]
@@ -158,6 +163,11 @@ start_server {tags {"pubsub network"}} {
         set rd1 [redis_deferring_client]
         assert_equal {1 2 3} [psubscribe $rd1 {chan1.* chan2.* chan3.*}]
         punsubscribe $rd1
+        wait_for_condition 100 10 {
+            [regexp {cmd=punsubscribe} [r client list]] eq 1
+        } else {
+            fail "punsubscribe did not arrive"
+        }
         assert_equal 0 [r publish chan1.hi hello]
         assert_equal 0 [r publish chan2.hi hello]
         assert_equal 0 [r publish chan3.hi hello]
diff --git a/tests/unit/pubsubshard.tcl b/tests/unit/pubsubshard.tcl
index 6e3fb61c1..a3c841d36 100644
--- a/tests/unit/pubsubshard.tcl
+++ b/tests/unit/pubsubshard.tcl
@@ -46,6 +46,11 @@ start_server {tags {"pubsubshard external:skip"}} {
         assert_equal {2} [ssubscribe $rd1 {chan2}]
         assert_equal {3} [ssubscribe $rd1 {chan3}]
         sunsubscribe $rd1
+        wait_for_condition 100 10 {
+            [regexp {cmd=sunsubscribe} [r client list]] eq 1
+        } else {
+            fail "sunsubscribe did not arrive"
+        }
         assert_equal 0 [r SPUBLISH chan1 hello]
         assert_equal 0 [r SPUBLISH chan2 hello]
         assert_equal 0 [r SPUBLISH chan3 hello]
diff --git a/tests/unit/querybuf.tcl b/tests/unit/querybuf.tcl
index d05911156..9dcf986e8 100644
--- a/tests/unit/querybuf.tcl
+++ b/tests/unit/querybuf.tcl
@@ -166,7 +166,12 @@ start_server {tags {"querybuf"}} {
         # The client executing the command is currently using the reusable query buffer,
         # so the size shown is that of the reusable query buffer. It will be returned
         # to the reusable query buffer after command execution.
-        assert_match {*qbuf=26 qbuf-free=* cmd=client|list *} $res
+        # Note that if IO threads are enabled, the reusable query buffer will be dereferenced earlier.
+        if {[lindex [r config get io-threads] 1] == 1} {
+            assert_match {*qbuf=26 qbuf-free=* cmd=client|list *} $res
+        } else {
+            assert_match {*qbuf=0 qbuf-free=* cmd=client|list *} $res
+        }
 
         $rd close
     } 
diff --git a/tests/unit/type/list.tcl b/tests/unit/type/list.tcl
index 9f46a8beb..fad95b970 100644
--- a/tests/unit/type/list.tcl
+++ b/tests/unit/type/list.tcl
@@ -1100,6 +1100,11 @@ foreach {pop} {BLPOP BLMPOP_LEFT} {
         $watching_client get somekey{t}
         $watching_client read
         $watching_client exec
+        wait_for_condition 100 10 {
+            [regexp {cmd=exec} [r client list]] eq 1
+        } else {
+            fail "exec did not arrive"
+        }
         # Blocked BLPOPLPUSH may create problems, unblock it.
         r lpush srclist{t} element
         set res [$watching_client read]