diff --git a/src/sentinel.c b/src/sentinel.c index dbc3e9876..f65e29876 100644 --- a/src/sentinel.c +++ b/src/sentinel.c @@ -552,10 +552,10 @@ void sentinelIsRunning(void) { /* Create a sentinelAddr object and return it on success. * On error NULL is returned and errno is set to: - * ENOENT: Can't resolve the hostname. + * ENOENT: Can't resolve the hostname, unless accept_unresolved is non-zero. * EINVAL: Invalid port number. */ -sentinelAddr *createSentinelAddr(char *hostname, int port) { +sentinelAddr *createSentinelAddr(char *hostname, int port, int is_accept_unresolved) { char ip[NET_IP_STR_LEN]; sentinelAddr *sa; @@ -565,8 +565,14 @@ sentinelAddr *createSentinelAddr(char *hostname, int port) { } if (anetResolve(NULL,hostname,ip,sizeof(ip), sentinel.resolve_hostnames ? ANET_NONE : ANET_IP_ONLY) == ANET_ERR) { - errno = ENOENT; - return NULL; + serverLog(LL_WARNING, "Failed to resolve hostname '%s'", hostname); + if (sentinel.resolve_hostnames && is_accept_unresolved) { + ip[0] = '\0'; + } + else { + errno = ENOENT; + return NULL; + } } sa = zmalloc(sizeof(*sa)); sa->hostname = sdsnew(hostname); @@ -598,6 +604,15 @@ int sentinelAddrIsEqual(sentinelAddr *a, sentinelAddr *b) { return a->port == b->port && !strcasecmp(a->ip,b->ip); } +/* Return non-zero if the two addresses are equal, either by address + * or by hostname if they could not have been resolved. + */ +int sentinelAddrOrHostnameEqual(sentinelAddr *a, sentinelAddr *b) { + return a->port == b->port && + (!strcmp(a->ip, b->ip) || + !strcasecmp(a->hostname, b->hostname)); +} + /* Return non-zero if a hostname matches an address. */ int sentinelAddrEqualsHostname(sentinelAddr *a, char *hostname) { char ip[NET_IP_STR_LEN]; @@ -1259,7 +1274,7 @@ sentinelRedisInstance *createSentinelRedisInstance(char *name, int flags, char * serverAssert((flags & SRI_MASTER) || master != NULL); /* Check address validity. */ - addr = createSentinelAddr(hostname,port); + addr = createSentinelAddr(hostname,port,1); if (addr == NULL) return NULL; /* For slaves use ip/host:port as name. */ @@ -1387,7 +1402,7 @@ sentinelRedisInstance *sentinelRedisInstanceLookupSlave( * If that is the case, depending on configuration we either resolve * it and use the IP address or fail. */ - addr = createSentinelAddr(slave_addr, port); + addr = createSentinelAddr(slave_addr, port, 0); if (!addr) return NULL; key = announceSentinelAddrAndPort(addr); releaseSentinelAddr(addr); @@ -1450,8 +1465,10 @@ sentinelRedisInstance *getSentinelRedisInstanceByAddrAndRunID(dict *instances, c serverAssert(addr || runid); /* User must pass at least one search param. */ if (addr != NULL) { - /* Resolve addr, we use the IP as a key even if a hostname is used */ - ri_addr = createSentinelAddr(addr, port); + /* Try to resolve addr. If hostnames are used, we're accepting an ri_addr + * that contains an hostname only and can still be matched based on that. + */ + ri_addr = createSentinelAddr(addr,port,1); if (!ri_addr) return NULL; } di = dictGetIterator(instances); @@ -1460,8 +1477,7 @@ sentinelRedisInstance *getSentinelRedisInstanceByAddrAndRunID(dict *instances, c if (runid && !ri->runid) continue; if ((runid == NULL || strcmp(ri->runid, runid) == 0) && - (addr == NULL || (strcmp(ri->addr->ip, ri_addr->ip) == 0 && - ri->addr->port == port))) + (addr == NULL || sentinelAddrOrHostnameEqual(ri->addr, ri_addr))) { instance = ri; break; @@ -1565,7 +1581,7 @@ int sentinelResetMasterAndChangeAddress(sentinelRedisInstance *master, char *hos dictIterator *di; dictEntry *de; - newaddr = createSentinelAddr(hostname,port); + newaddr = createSentinelAddr(hostname,port,0); if (newaddr == NULL) return C_ERR; /* There can be only 0 or 1 slave that has the newaddr. @@ -2355,6 +2371,7 @@ static int instanceLinkNegotiateTLS(redisAsyncContext *context) { * is disconnected. Note that link->disconnected is true even if just * one of the two links (commands and pub/sub) is missing. */ void sentinelReconnectInstance(sentinelRedisInstance *ri) { + if (ri->link->disconnected == 0) return; if (ri->addr->port == 0) return; /* port == 0 means invalid address. */ instanceLink *link = ri->link; @@ -2365,7 +2382,21 @@ void sentinelReconnectInstance(sentinelRedisInstance *ri) { /* Commands connection. */ if (link->cc == NULL) { + + /* It might be that the instance is disconnected because it wasn't available earlier when the instance + * allocated, say during failover, and therefore we failed to resolve its ip. + * Another scenario is that the instance restarted with new ip, and we should resolve its new ip based on + * its hostname */ + if (sentinel.resolve_hostnames) { + sentinelAddr *tryResolveAddr = createSentinelAddr(ri->addr->hostname, ri->addr->port, 0); + if (tryResolveAddr != NULL) { + releaseSentinelAddr(ri->addr); + ri->addr = tryResolveAddr; + } + } + link->cc = redisAsyncConnectBind(ri->addr->ip,ri->addr->port,server.bind_source_addr); + if (link->cc && !link->cc->err) anetCloexec(link->cc->c.fd); if (!link->cc) { sentinelEvent(LL_DEBUG,"-cmd-link-reconnection",ri,"%@ #Failed to establish connection"); @@ -4892,6 +4923,7 @@ sentinelRedisInstance *sentinelSelectSlave(sentinelRedisInstance *master) { max_master_down_time += master->down_after_period * 10; di = dictGetIterator(master->slaves); + while((de = dictNext(di)) != NULL) { sentinelRedisInstance *slave = dictGetVal(de); mstime_t info_validity_time;