[pgpool-general: 9290] Re: Why so many TCP descriptors in CLOSE_WAIT state?

Thu Dec 12 12:21:04 JST 2024

> pgpool processes occasionally run out of file descriptors (currently set to
> 1024).  In researching that, I noticed some processes have hundreds of
> descriptors in CLOSED_WAIT state, and am wondering if that could be the
> cause.

In my understanding CLOSE_WAIT status of pgpool process means that
pgpool is waiting for the client process connecting to a socket to
close or shutdown the connection. So looks like a client side problem,
not pgpool side problem.

In the mean time you can tweak the kernel tcp parameters
(tcp_keepalive_time etc.) on the server which pgpool is running on so
that CLOSE_WAIT disappears earlier.

Best reagards,
--
Tatsuo Ishii
SRA OSS K.K.
English: http://www.sraoss.co.jp/index_en/
Japanese:http://www.sraoss.co.jp

> PgPool 4.5.4
> Postgresql 14.15
> 
> PgPool on nodes FISPCDSPGS202a, FISPCDSPGS202b. FISPCDSPGS202c
> VIP on node FISPCDSPGS202c
> Postgresql on nodes FISPCDSPGS202a, FISPCDSPGS202b.
> 
> These commands were run on FISPCDSPGS202c:
> 
> $ sudo lsof -n -iTCP -sTCP:CLOSE_WAIT | grep 1353478 | wc
>     796    7960   94752
> 
> $ sudo ps -fp1353478
> UID          PID    PPID  C STIME TTY          TIME CMD
> postgres 1353478 1353268  0 02:41 ?        00:00:03 pgpool: fis_cds cdsbmop
> 10.55.96.146(59522) idle
> 
> $ sudo lsof -n -iTCP -sTCP:CLOSE_WAIT | grep 1353478 | wc
>     796    7960   94752
> 
> $ sudo lsof -n -iTCP -sTCP:CLOSE_WAIT | grep 1353478 | wc
>     796    7960   94752
> 
> $ sudo lsof -n -iTCP -sTCP:CLOSE_WAIT | grep 1353478 | head
> pgpool   1353478 postgres   12u  IPv4 4241106787      0t0  TCP
> 10.109.165.13:distinct->10.55.96.156:40032 (CLOSE_WAIT)
> pgpool   1353478 postgres   13u  IPv4 4241106788      0t0  TCP
> 10.109.165.13:distinct->10.55.96.156:40074 (CLOSE_WAIT)
> pgpool   1353478 postgres   14u  IPv4 4241106789      0t0  TCP
> 10.109.165.13:distinct->10.55.96.156:40094 (CLOSE_WAIT)
> pgpool   1353478 postgres   17u  IPv4 4241106793      0t0  TCP
> 10.109.165.13:distinct->10.55.96.156:40180 (CLOSE_WAIT)
> pgpool   1353478 postgres   18u  IPv4 4241106794      0t0  TCP
> 10.109.165.13:distinct->10.55.96.156:40204 (CLOSE_WAIT)
> pgpool   1353478 postgres   20u  IPv4 4241106796      0t0  TCP
> 10.109.165.13:distinct->10.55.96.156:40266 (CLOSE_WAIT)
> pgpool   1353478 postgres   21u  IPv4 4241106797      0t0  TCP
> 10.109.165.13:distinct->10.55.96.156:40294 (CLOSE_WAIT)
> pgpool   1353478 postgres   22u  IPv4 4241106798      0t0  TCP
> 10.109.165.13:distinct->10.55.96.156:40310 (CLOSE_WAIT)
> pgpool   1353478 postgres   23u  IPv4 4241106799      0t0  TCP
> 10.109.165.13:distinct->10.55.96.156:42570 (CLOSE_WAIT)
> pgpool   1353478 postgres   25u  IPv4 4241106801      0t0  TCP
> 10.109.165.13:distinct->10.55.96.156:42624 (CLOSE_WAIT)
> 
> Here is the pgpool.conf file.  (It mostly contains non-default values.)
> 
> $ cat /etc/pgpool-II/pgpool.conf
> listen_addresses = '*'
> port = 9999
> unix_socket_directories = '/var/run/postgresql'
> backend_clustering_mode = 'streaming_replication'
> 
> pcp_listen_addresses = '*'
> pcp_port = 9898
> pcp_socket_dir = '/var/run/postgresql'
> 
> listen_backlog_multiplier = 2
> serialize_accept = on
> num_init_children = 200
> max_pool = 2
> reserved_connections = 3
> client_idle_limit = 0
> child_life_time = 0
> 
> load_balance_mode = off
> 
> backend_hostname0 = 'FISPCDSPGS202a'
> backend_port0 = 5432
> backend_weight0 = 2
> backend_data_directory0 = '/Database/14/data'
> backend_flag0 = 'ALLOW_TO_FAILOVER'
> backend_application_name0 = 'server0'
> 
> backend_hostname1 = 'FISPCDSPGS202b'
> backend_port1 = 5432
> backend_weight1 = 1
> backend_data_directory1 = '/Database/14/data'
> backend_flag1 = 'ALLOW_TO_FAILOVER'
> backend_application_name1 = 'server1'
> 
> enable_pool_hba = on
> pool_passwd = '/etc/pgpool-II/pool_passwd'
> 
> log_destination = 'stderr'
> log_line_prefix = '%m: %a pid %p: '
> log_connections = off
> log_hostname = on
> log_statement = off
> log_per_node_statement = off
> log_client_messages = on
> log_min_messages = info
> logging_collector = on
> log_directory = '/var/log/pgpool2'
> log_filename = 'pgpool-%F_%H.log'
> log_truncate_on_rotation = on
> log_rotation_age = 1h
> log_rotation_size = 0
> 
> sr_check_user = 'pool_health_check'
> sr_check_database = 'pool_health_check'
> 
> health_check_period = 5
> health_check_user = 'pool_health_check'
> health_check_database = 'pool_health_check'
> health_check_max_retries = 10
> health_check_retry_delay = 10
> 
> use_watchdog = on
> wd_priority = 1
> delegate_ip = '10.109.165.13'
> 
> hostname0 = 'FISPCDSPGS202a'
> wd_port0 = 9000
> pgpool_port0 = 9999
> 
> hostname1 = 'FISPCDSPGS202b'
> wd_port1 = 9000
> pgpool_port1 = 9999
> 
> hostname2 = 'FISPCDSPGS202c'
> wd_port2 = 9000
> pgpool_port2 = 9999
> 
> wd_ipc_socket_dir = '/var/run/postgresql'
> if_up_cmd = '/usr/bin/sudo /sbin/ip addr add $_IP_$/26 dev ens192 label
> ens192:0'
> if_down_cmd = '/usr/bin/sudo /sbin/ip addr del $_IP_$/26 dev ens192'
> arping_cmd = '/usr/bin/sudo /usr/sbin/arping -U $_IP_$ -w 1 -I ens192'
> 
> wd_escalation_command = '/etc/pgpool-II/escalation.sh'
> wd_lifecheck_method = 'heartbeat'
> 
> heartbeat_hostname0 = 'FISPCDSPGS202a'
> heartbeat_port0 = 9694
> heartbeat_device0 = 'ens192'
> 
> heartbeat_hostname1 = 'FISPCDSPGS202b'
> heartbeat_port1 = 9694
> heartbeat_device1 = 'ens192'
> 
> heartbeat_hostname2 = 'FISPCDSPGS202c'
> heartbeat_port2 = 9694
> heartbeat_device2 = 'ens192'
> 
> wd_heartbeat_keepalive = 2
> wd_heartbeat_deadtime = 30
> 
> memory_cache_enabled = off
> #memory_cache_enabled = on
> memqcache_oiddir = '/var/log/pgpool2/oiddir'
> memqcache_total_size = 1024MB
> memqcache_max_num_cache = 1048576
> 
> -- 
> Death to <Redacted>, and butter sauce.
> Don't boil me, I'm still alive.
> <Redacted> lobster!