[pgpool-general: 9289] Why so many TCP descriptors in CLOSE_WAIT state?

Thu Dec 12 07:29:11 JST 2024

pgpool processes occasionally run out of file descriptors (currently set to
1024).  In researching that, I noticed some processes have hundreds of
descriptors in CLOSED_WAIT state, and am wondering if that could be the
cause.

PgPool 4.5.4
Postgresql 14.15

PgPool on nodes FISPCDSPGS202a, FISPCDSPGS202b. FISPCDSPGS202c
VIP on node FISPCDSPGS202c
Postgresql on nodes FISPCDSPGS202a, FISPCDSPGS202b.

These commands were run on FISPCDSPGS202c:

$ sudo lsof -n -iTCP -sTCP:CLOSE_WAIT | grep 1353478 | wc
    796    7960   94752

$ sudo ps -fp1353478
UID          PID    PPID  C STIME TTY          TIME CMD
postgres 1353478 1353268  0 02:41 ?        00:00:03 pgpool: fis_cds cdsbmop
10.55.96.146(59522) idle

$ sudo lsof -n -iTCP -sTCP:CLOSE_WAIT | grep 1353478 | wc
    796    7960   94752

$ sudo lsof -n -iTCP -sTCP:CLOSE_WAIT | grep 1353478 | wc
    796    7960   94752

$ sudo lsof -n -iTCP -sTCP:CLOSE_WAIT | grep 1353478 | head
pgpool   1353478 postgres   12u  IPv4 4241106787      0t0  TCP
10.109.165.13:distinct->10.55.96.156:40032 (CLOSE_WAIT)
pgpool   1353478 postgres   13u  IPv4 4241106788      0t0  TCP
10.109.165.13:distinct->10.55.96.156:40074 (CLOSE_WAIT)
pgpool   1353478 postgres   14u  IPv4 4241106789      0t0  TCP
10.109.165.13:distinct->10.55.96.156:40094 (CLOSE_WAIT)
pgpool   1353478 postgres   17u  IPv4 4241106793      0t0  TCP
10.109.165.13:distinct->10.55.96.156:40180 (CLOSE_WAIT)
pgpool   1353478 postgres   18u  IPv4 4241106794      0t0  TCP
10.109.165.13:distinct->10.55.96.156:40204 (CLOSE_WAIT)
pgpool   1353478 postgres   20u  IPv4 4241106796      0t0  TCP
10.109.165.13:distinct->10.55.96.156:40266 (CLOSE_WAIT)
pgpool   1353478 postgres   21u  IPv4 4241106797      0t0  TCP
10.109.165.13:distinct->10.55.96.156:40294 (CLOSE_WAIT)
pgpool   1353478 postgres   22u  IPv4 4241106798      0t0  TCP
10.109.165.13:distinct->10.55.96.156:40310 (CLOSE_WAIT)
pgpool   1353478 postgres   23u  IPv4 4241106799      0t0  TCP
10.109.165.13:distinct->10.55.96.156:42570 (CLOSE_WAIT)
pgpool   1353478 postgres   25u  IPv4 4241106801      0t0  TCP
10.109.165.13:distinct->10.55.96.156:42624 (CLOSE_WAIT)

Here is the pgpool.conf file.  (It mostly contains non-default values.)

$ cat /etc/pgpool-II/pgpool.conf
listen_addresses = '*'
port = 9999
unix_socket_directories = '/var/run/postgresql'
backend_clustering_mode = 'streaming_replication'

pcp_listen_addresses = '*'
pcp_port = 9898
pcp_socket_dir = '/var/run/postgresql'

listen_backlog_multiplier = 2
serialize_accept = on
num_init_children = 200
max_pool = 2
reserved_connections = 3
client_idle_limit = 0
child_life_time = 0

load_balance_mode = off

backend_hostname0 = 'FISPCDSPGS202a'
backend_port0 = 5432
backend_weight0 = 2
backend_data_directory0 = '/Database/14/data'
backend_flag0 = 'ALLOW_TO_FAILOVER'
backend_application_name0 = 'server0'

backend_hostname1 = 'FISPCDSPGS202b'
backend_port1 = 5432
backend_weight1 = 1
backend_data_directory1 = '/Database/14/data'
backend_flag1 = 'ALLOW_TO_FAILOVER'
backend_application_name1 = 'server1'

enable_pool_hba = on
pool_passwd = '/etc/pgpool-II/pool_passwd'

log_destination = 'stderr'
log_line_prefix = '%m: %a pid %p: '
log_connections = off
log_hostname = on
log_statement = off
log_per_node_statement = off
log_client_messages = on
log_min_messages = info
logging_collector = on
log_directory = '/var/log/pgpool2'
log_filename = 'pgpool-%F_%H.log'
log_truncate_on_rotation = on
log_rotation_age = 1h
log_rotation_size = 0

sr_check_user = 'pool_health_check'
sr_check_database = 'pool_health_check'

health_check_period = 5
health_check_user = 'pool_health_check'
health_check_database = 'pool_health_check'
health_check_max_retries = 10
health_check_retry_delay = 10

use_watchdog = on
wd_priority = 1
delegate_ip = '10.109.165.13'

hostname0 = 'FISPCDSPGS202a'
wd_port0 = 9000
pgpool_port0 = 9999

hostname1 = 'FISPCDSPGS202b'
wd_port1 = 9000
pgpool_port1 = 9999

hostname2 = 'FISPCDSPGS202c'
wd_port2 = 9000
pgpool_port2 = 9999

wd_ipc_socket_dir = '/var/run/postgresql'
if_up_cmd = '/usr/bin/sudo /sbin/ip addr add $_IP_$/26 dev ens192 label
ens192:0'
if_down_cmd = '/usr/bin/sudo /sbin/ip addr del $_IP_$/26 dev ens192'
arping_cmd = '/usr/bin/sudo /usr/sbin/arping -U $_IP_$ -w 1 -I ens192'

wd_escalation_command = '/etc/pgpool-II/escalation.sh'
wd_lifecheck_method = 'heartbeat'

heartbeat_hostname0 = 'FISPCDSPGS202a'
heartbeat_port0 = 9694
heartbeat_device0 = 'ens192'

heartbeat_hostname1 = 'FISPCDSPGS202b'
heartbeat_port1 = 9694
heartbeat_device1 = 'ens192'

heartbeat_hostname2 = 'FISPCDSPGS202c'
heartbeat_port2 = 9694
heartbeat_device2 = 'ens192'

wd_heartbeat_keepalive = 2
wd_heartbeat_deadtime = 30

memory_cache_enabled = off
#memory_cache_enabled = on
memqcache_oiddir = '/var/log/pgpool2/oiddir'
memqcache_total_size = 1024MB
memqcache_max_num_cache = 1048576

-- 
Death to <Redacted>, and butter sauce.
Don't boil me, I'm still alive.
<Redacted> lobster!
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://www.pgpool.net/pipermail/pgpool-general/attachments/20241211/1274090c/attachment.htm>