Skip to content

Commit 2b5d7a1

Browse files
authored
Merge pull request #64 from frimpler/persistent_timeout_fix_pr
PR for Issue 1188 (persistent timeout in call home scenarios)
2 parents 1fc4b3b + c7a2acb commit 2b5d7a1

5 files changed

Lines changed: 147 additions & 46 deletions

File tree

src/session_client.c

Lines changed: 115 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -898,65 +898,139 @@ nc_connect_inout(int fdin, int fdout, struct ly_ctx *ctx)
898898
return NULL;
899899
}
900900

901-
int
902-
nc_sock_connect(const char* host, uint16_t port)
901+
/*
902+
Helper for a non-blocking connect (which is required because of the locking
903+
concept for e.g. call home settings). For more details see nc_sock_connect().
904+
*/
905+
static int
906+
_non_blocking_connect(int timeout, int* sock_pending, struct addrinfo *res)
903907
{
904-
int i, sock = -1, flags;
905-
struct addrinfo hints, *res_list, *res;
906-
char port_s[6]; /* length of string representation of short int */
907-
908-
snprintf(port_s, 6, "%u", port);
909-
910-
/* Connect to a server */
911-
memset(&hints, 0, sizeof hints);
912-
hints.ai_family = AF_UNSPEC;
913-
hints.ai_socktype = SOCK_STREAM;
914-
hints.ai_protocol = IPPROTO_TCP;
915-
i = getaddrinfo(host, port_s, &hints, &res_list);
916-
if (i != 0) {
917-
ERR("Unable to translate the host address (%s).", gai_strerror(i));
918-
return -1;
919-
}
920-
921-
for (res = res_list; res != NULL; res = res->ai_next) {
908+
int flags, ret=0;
909+
int sock = -1;
910+
fd_set wset;
911+
struct timeval ts;
912+
int error = 0;
913+
socklen_t len = sizeof(int);
914+
915+
if (sock_pending && *sock_pending != -1) {
916+
VRB("Trying to connect the pending socket=%d.", *sock_pending );
917+
sock = *sock_pending;
918+
} else {
919+
assert(res);
920+
VRB("Trying to connect via %s.", (res->ai_family == AF_INET6) ? "IPv6" : "IPv4");
921+
/* Connect to a server */
922922
sock = socket(res->ai_family, res->ai_socktype, res->ai_protocol);
923923
if (sock == -1) {
924-
/* socket was not created, try another resource */
925-
continue;
924+
ERR("socket couldn't be created.", strerror(errno));
925+
return -1;
926926
}
927-
928-
if (connect(sock, res->ai_addr, res->ai_addrlen) == -1) {
929-
/* network connection failed, try another resource */
930-
close(sock);
931-
sock = -1;
932-
continue;
933-
}
934-
935927
/* make the socket non-blocking */
936928
if (((flags = fcntl(sock, F_GETFL)) == -1) || (fcntl(sock, F_SETFL, flags | O_NONBLOCK) == -1)) {
937929
ERR("Fcntl failed (%s).", strerror(errno));
938-
goto error;
930+
goto cleanup;
931+
}
932+
/* non-blocking connect! */
933+
if (connect(sock, res->ai_addr, res->ai_addrlen) < 0) {
934+
if (errno != EINPROGRESS) {
935+
/* network connection failed, try another resource */
936+
ERR("connect failed: (%s).", strerror(errno));
937+
goto cleanup;
938+
}
939+
}
940+
/* check the usability of the socket */
941+
if (getsockopt(sock, SOL_SOCKET, SO_ERROR, &error, &len) < 0) {
942+
ERR("getsockopt failed: (%s).", strerror(errno));
943+
goto cleanup;
944+
}
945+
if (error == ECONNREFUSED) {
946+
/* network connection failed, try another resource */
947+
VRB("getsockopt error: (%s).", strerror(error));
948+
goto cleanup;
939949
}
940-
941-
/* we're done, network connection established */
942-
break;
943950
}
951+
ts.tv_sec = timeout;
952+
ts.tv_usec = 0;
944953

945-
if (sock != -1) {
946-
VRB("Successfully connected to %s:%s over %s.", host, port_s, (res->ai_family == AF_INET6) ? "IPv6" : "IPv4");
954+
FD_ZERO(&wset);
955+
FD_SET(sock, &wset);
956+
957+
if ((ret = select(sock + 1, NULL, &wset, NULL, (timeout != -1) ? &ts : NULL)) < 0) {
958+
ERR("select failed: (%s).", strerror(errno));
959+
goto cleanup;
947960
}
948-
freeaddrinfo(res_list);
949961

962+
if (ret == 0) { //we had a timeout
963+
VRB("timed out after %ds (%s).", timeout, strerror(errno));
964+
if (sock_pending) {
965+
/* no sock-close, we'll try it again */
966+
*sock_pending = sock;
967+
} else {
968+
close(sock);
969+
}
970+
return -1;
971+
}
950972
return sock;
951973

952-
error:
953-
if (sock > -1) {
954-
close(sock);
974+
cleanup:
975+
if (sock_pending) {
976+
*sock_pending = -1;
955977
}
956-
freeaddrinfo(res_list);
978+
close(sock);
957979
return -1;
958980
}
959981

982+
/* A given timeout value limits the time how long the function blocks. If it has to block
983+
only for some seconds, a socket connection might not yet have been fully established.
984+
Therefore the active (pending) socket will be stored in *sock_pending, but the return
985+
value will be -1. In such a case a subsequent invokation is required, by providing the
986+
stored sock_pending, again.
987+
In general, if this function returns -1, when a timeout has been given, this function
988+
has to be invoked, until it returns a valid socket.
989+
*/
990+
int
991+
nc_sock_connect(const char* host, uint16_t port, int timeout, int* sock_pending)
992+
{
993+
int i;
994+
int sock = sock_pending?*sock_pending:-1;
995+
struct addrinfo hints, *res_list, *res;
996+
char port_s[6]; /* length of string representation of short int */
997+
998+
VRB("nc_sock_connect(%s, %u, %d, %d)", host, port, timeout, sock);
999+
1000+
/* no pending socket */
1001+
if (sock == -1) {
1002+
/* Connect to a server */
1003+
snprintf(port_s, 6, "%u", port);
1004+
memset(&hints, 0, sizeof hints);
1005+
hints.ai_family = AF_UNSPEC;
1006+
hints.ai_socktype = SOCK_STREAM;
1007+
hints.ai_protocol = IPPROTO_TCP;
1008+
i = getaddrinfo(host, port_s, &hints, &res_list);
1009+
if (i != 0) {
1010+
ERR("Unable to translate the host address (%s).", gai_strerror(i));
1011+
return -1;
1012+
}
1013+
1014+
for (res = res_list; res != NULL; res = res->ai_next) {
1015+
sock = _non_blocking_connect(timeout, sock_pending, res);
1016+
if (sock == -1 && (!sock_pending || *sock_pending == -1)) {
1017+
/* try the next resource */
1018+
continue;
1019+
}
1020+
VRB("Successfully connected to %s:%s over %s.", host, port_s, (res->ai_family == AF_INET6) ? "IPv6" : "IPv4");
1021+
break;
1022+
}
1023+
freeaddrinfo(res_list);
1024+
1025+
} else {
1026+
/* try to get a connection with the pending socket */
1027+
assert(sock_pending);
1028+
sock = _non_blocking_connect(timeout, sock_pending, NULL);
1029+
}
1030+
1031+
return sock;
1032+
}
1033+
9601034
static NC_MSG_TYPE
9611035
get_msg(struct nc_session *session, int timeout, uint64_t msgid, struct lyxml_elem **msg)
9621036
{

src/session_client_ssh.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1528,7 +1528,7 @@ _nc_connect_libssh(ssh_session ssh_session, struct ly_ctx *ctx, struct nc_client
15281528
ssh_options_set(session->ti.libssh.session, SSH_OPTIONS_HOST, host);
15291529

15301530
/* create and connect socket */
1531-
sock = nc_sock_connect(host, port);
1531+
sock = nc_sock_connect(host, port, -1, NULL);
15321532
if (sock == -1) {
15331533
ERR("Unable to connect to %s:%u (%s).", host, port, strerror(errno));
15341534
goto fail;
@@ -1677,7 +1677,7 @@ nc_connect_ssh(const char *host, uint16_t port, struct ly_ctx *ctx)
16771677
}
16781678

16791679
/* create and assign communication socket */
1680-
sock = nc_sock_connect(host, port);
1680+
sock = nc_sock_connect(host, port, -1, NULL);
16811681
if (sock == -1) {
16821682
ERR("Unable to connect to %s:%u (%s).", host, port, strerror(errno));
16831683
goto fail;

src/session_client_tls.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -617,7 +617,7 @@ nc_connect_tls(const char *host, unsigned short port, struct ly_ctx *ctx)
617617
}
618618

619619
/* create and assign socket */
620-
sock = nc_sock_connect(host, port);
620+
sock = nc_sock_connect(host, port, -1, NULL);
621621
if (sock == -1) {
622622
ERR("Unable to connect to %s:%u (%s).", host, port, strerror(errno));
623623
goto fail;

src/session_p.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,7 @@ struct nc_server_opts {
238238
const char *name;
239239
const char *address;
240240
uint16_t port;
241+
int sock_pending;
241242
} *ch_endpts;
242243
uint16_t ch_endpt_count;
243244
union {
@@ -524,9 +525,11 @@ NC_MSG_TYPE nc_handshake_io(struct nc_session *session);
524525
*
525526
* @param[in] host Hostname to connect to.
526527
* @param[in] port Port to connect on.
528+
* @param[in] timeout for blocking the connect+select call (-1 for infinite).
529+
* @param[in] sock_pending for exchanging the pending socket, if the blocking timeout was != -1
527530
* @return Connected socket or -1 on error.
528531
*/
529-
int nc_sock_connect(const char *host, uint16_t port);
532+
int nc_sock_connect(const char *host, uint16_t port, int timeout, int* sock_pending);
530533

531534
/**
532535
* @brief Accept a new socket connection.

src/session_server.c

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,7 @@ API int
491491
nc_server_init(struct ly_ctx *ctx)
492492
{
493493
const struct lys_node *rpc;
494+
pthread_rwlockattr_t attr;
494495

495496
if (!ctx) {
496497
ERRARG("ctx");
@@ -516,6 +517,23 @@ nc_server_init(struct ly_ctx *ctx)
516517
server_opts.new_session_id = 1;
517518
pthread_spin_init(&server_opts.sid_lock, PTHREAD_PROCESS_PRIVATE);
518519

520+
errno=0;
521+
522+
if (pthread_rwlockattr_init(&attr) == 0) {
523+
if (pthread_rwlockattr_setkind_np(&attr, PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP) == 0) {
524+
if (pthread_rwlock_init(&server_opts.endpt_lock, &attr) != 0) {
525+
ERR("%s: failed to init rwlock(%s).", __FUNCTION__, strerror(errno));
526+
}
527+
if (pthread_rwlock_init(&server_opts.ch_client_lock, &attr) != 0) {
528+
ERR("%s: failed to init rwlock(%s).", __FUNCTION__, strerror(errno));
529+
}
530+
} else {
531+
ERR("%s: failed set attribute (%s).", __FUNCTION__, strerror(errno));
532+
}
533+
pthread_rwlockattr_destroy(&attr);
534+
} else {
535+
ERR("%s: failed init attribute (%s).", __FUNCTION__, strerror(errno));
536+
}
519537
return 0;
520538
}
521539

@@ -2263,6 +2281,7 @@ nc_server_ch_client_add_endpt(const char *client_name, const char *endpt_name)
22632281
client->ch_endpts[client->ch_endpt_count - 1].name = lydict_insert(server_opts.ctx, endpt_name, 0);
22642282
client->ch_endpts[client->ch_endpt_count - 1].address = NULL;
22652283
client->ch_endpts[client->ch_endpt_count - 1].port = 0;
2284+
client->ch_endpts[client->ch_endpt_count - 1].sock_pending = -1;
22662285

22672286
/* UNLOCK */
22682287
nc_server_ch_client_unlock(client);
@@ -2293,6 +2312,9 @@ nc_server_ch_client_del_endpt(const char *client_name, const char *endpt_name)
22932312
for (i = 0; i < client->ch_endpt_count; ++i) {
22942313
lydict_remove(server_opts.ctx, client->ch_endpts[i].name);
22952314
lydict_remove(server_opts.ctx, client->ch_endpts[i].address);
2315+
if (client->ch_endpts[i].sock_pending != -1) {
2316+
close(client->ch_endpts[i].sock_pending);
2317+
}
22962318
}
22972319
free(client->ch_endpts);
22982320
client->ch_endpts = NULL;
@@ -2678,10 +2700,12 @@ nc_connect_ch_client_endpt(struct nc_ch_client *client, struct nc_ch_endpt *endp
26782700
int sock, ret;
26792701
struct timespec ts_cur;
26802702

2681-
sock = nc_sock_connect(endpt->address, endpt->port);
2703+
sock = nc_sock_connect(endpt->address, endpt->port, 5, &endpt->sock_pending);
26822704
if (sock < 0) {
26832705
return NC_MSG_ERROR;
26842706
}
2707+
/* no need to store the socket as pending any longer */
2708+
endpt->sock_pending = -1;
26852709

26862710
*session = nc_new_session(NC_SERVER, 0);
26872711
if (!(*session)) {

0 commit comments

Comments
 (0)