-
Bug
-
Resolution: Done-Errata
-
Normal
-
rhel-8.10
-
None
-
pcp-6.3.2-1.el9
-
No
-
Moderate
-
2
-
rhel-pt-pcp
-
ssg_platform_tools
-
21
-
2
-
QE ack, Dev ack
-
False
-
False
-
-
No
-
PCP Sprint 12, PCP Sprint 13
-
Unspecified Release Note Type - Unknown
-
-
x86_64
-
None
What were you trying to do that didn't work?
Sending approx 2k metrics from Satellite via statsd to PCP, a random segfault of pmdastatsd happens:
Sep 5 21:50:53 pmoravec-sat615 kernel: pmdastatsd[921663]: segfault at 7fafc11a5018 ip 00007fafc90b7516 sp 00007ffda859a440 error 4 in libpcp_pmda.so.3[7fafc90ae000+18000]
Please provide the package NVR for which bug is seen:
pcp-pmda-statsd-5.3.7-20.el8_10.x86_64
How reproducible:
100% within a hour
Steps to reproduce
1. Install Satellite6.15
2. Follow https://github.com/pmoravec/sat-perf-correlation
3. Enable all Satellite metrics to send via statsd (modify the "Add needed allowed_labels" in Asnible playbook in the sat-perf-correlation by '.*' everywhere)
4. Generate some load to Satellite (see reproducer below)
5. Wait a hour.
Expected results
No segfault
Actual results
Segfault with backtrace:
(gdb) thread apply all bt full
Thread 4 (Thread 0x7f2d3980f700 (LWP 1039630)):
#0 futex_wait_cancelable (private=0, expected=0, futex_word=0x55997bf59960) at ../sysdeps/unix/sysv/linux/futex-internal.h:88
__ret = -512
oldtype = 0
err = <optimized out>
oldtype = <optimized out>
err = <optimized out>
__ret = <optimized out>
resultvar = <optimized out>
__arg4 = <optimized out>
__arg3 = <optimized out>
__arg2 = <optimized out>
__arg1 = <optimized out>
_a4 = <optimized out>
_a3 = <optimized out>
_a2 = <optimized out>
_a1 = <optimized out>
#1 __pthread_cond_wait_common (abstime=0x0, mutex=0x55997bf59910, cond=0x55997bf59938) at pthread_cond_wait.c:502
spin = 0
buffer = {__routine = 0x7f2d40d56200 <__condvar_cleanup_waiting>, __arg = 0x7f2d3980eb70, __canceltype = 738206992, __prev = 0x0}
cbuffer = {wseq = 1331888, cond = 0x55997bf59938, mutex = 0x55997bf59910, private = 0}
rt = <optimized out>
err = <optimized out>
g = 0
flags = <optimized out>
g1_start = <optimized out>
signals = <optimized out>
result = 0
wseq = 1331888
seq = 665944
private = 0
maxspin = <optimized out>
err = <optimized out>
result = <optimized out>
wseq = <optimized out>
g = <optimized out>
seq = <optimized out>
flags = <optimized out>
private = <optimized out>
signals = <optimized out>
g1_start = <optimized out>
spin = <optimized out>
buffer = <optimized out>
cbuffer = <optimized out>
rt = <optimized out>
s = <optimized out>
--Type <RET> for more, q to quit, c to continue without paging--
#2 __pthread_cond_wait (cond=cond@entry=0x55997bf59938, mutex=mutex@entry=0x55997bf59910) at pthread_cond_wait.c:655
No locals.
#3 0x00007f2d40f6978e in buffered_chan_recv (data=0x7f2d3980ec40, chan=0x55997bf598b0) at src/chan.c:280
msg = <optimized out>
msg = <optimized out>
#4 chan_recv (chan=chan@entry=0x55997bf598b0, data=data@entry=0x7f2d3980ec40) at src/chan.c:239
No locals.
#5 0x000055997a753424 in aggregator_exec (args=<optimized out>) at aggregators.c:61
success_recv = <optimized out>
config = 0x55997a961b80 <config>
metrics_container = 0x55997bf559c0
stats_container = 0x55997bf55a30
parser_to_aggregator = 0x55997bf598b0
message = 0x7f2d34019810
t0 = {tv_sec = 97881, tv_nsec = 670904310}
t1 = {tv_sec = 97881, tv_nsec = 670904821}
time_spent_aggregating = 511
should_exit = 0
#6 0x00007f2d40d501ca in start_thread (arg=<optimized out>) at pthread_create.c:479
ret = <optimized out>
pd = <optimized out>
unwind_buf = {cancel_jmp_buf = {{jmp_buf = {139832215009024, -7359956588598194139, 140731863178062, 140731863178063, 0, 139832215006592, 7459752619671490597, 7459843141556572197},
mask_was_saved = 0}}, priv = {pad = {0x0, 0x0, 0x0, 0x0}, data = {prev = 0x0, cleanup = 0x0, canceltype = 0}}}
not_first_call = <optimized out>
#7 0x00007f2d3f11b8d3 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
No locals.
Thread 3 (Thread 0x7f2d3a010700 (LWP 1039629)):
#0 futex_wait_cancelable (private=0, expected=0, futex_word=0x55997bf57834) at ../sysdeps/unix/sysv/linux/futex-internal.h:88
__ret = 0
oldtype = 0
err = <optimized out>
oldtype = <optimized out>
err = <optimized out>
__ret = <optimized out>
resultvar = <optimized out>
__arg4 = <optimized out>
__arg3 = <optimized out>
__arg2 = <optimized out>
__arg1 = <optimized out>
_a4 = <optimized out>
_a3 = <optimized out>
_a2 = <optimized out>
_a1 = <optimized out>
#1 __pthread_cond_wait_common (abstime=0x0, mutex=0x55997bf577e0, cond=0x55997bf57808) at pthread_cond_wait.c:502
spin = 0
buffer = {__routine = 0x7f2d40d56200 <__condvar_cleanup_waiting>, __arg = 0x7f2d3a00fb70, __canceltype = 872512320, __prev = 0x0}
--Type <RET> for more, q to quit, c to continue without paging--
cbuffer = {wseq = 1395111, cond = 0x55997bf57808, mutex = 0x55997bf577e0, private = 0}
rt = <optimized out>
err = <optimized out>
g = 1
flags = <optimized out>
g1_start = <optimized out>
signals = <optimized out>
result = 0
wseq = 1395111
seq = 697555
private = 0
maxspin = <optimized out>
err = <optimized out>
result = <optimized out>
wseq = <optimized out>
g = <optimized out>
seq = <optimized out>
flags = <optimized out>
private = <optimized out>
signals = <optimized out>
g1_start = <optimized out>
spin = <optimized out>
buffer = <optimized out>
cbuffer = <optimized out>
rt = <optimized out>
s = <optimized out>
#2 __pthread_cond_wait (cond=cond@entry=0x55997bf57808, mutex=mutex@entry=0x55997bf577e0) at pthread_cond_wait.c:655
No locals.
#3 0x00007f2d40f6978e in buffered_chan_recv (data=0x7f2d3a00fc40, chan=0x55997bf57780) at src/chan.c:280
msg = <optimized out>
msg = <optimized out>
#4 chan_recv (chan=chan@entry=0x55997bf57780, data=data@entry=0x7f2d3a00fc40) at src/chan.c:239
No locals.
#5 0x000055997a7574f0 in parser_exec (args=<optimized out>) at parsers.c:55
success_recv = <optimized out>
parsed = 0x7f2d2c005110
tok = <optimized out>
network_end_message = 0x55997a75d08f "PMDASTATSD_EXIT"
config = <optimized out>
network_listener_to_parser = <optimized out>
parser_to_aggregator = <optimized out>
parse_datagram = 0x55997a757d00 <basic_parser_parse>
datagram = 0x7f2d3401a630
delim = "\n"
t0 = {tv_sec = 97881, tv_nsec = 670901044}
t1 = {tv_sec = 97881, tv_nsec = 670901275}
time_spent_parsing = <optimized out>
--Type <RET> for more, q to quit, c to continue without paging--
should_exit = 0
message = <optimized out>
#6 0x00007f2d40d501ca in start_thread (arg=<optimized out>) at pthread_create.c:479
ret = <optimized out>
pd = <optimized out>
unwind_buf = {cancel_jmp_buf = {{jmp_buf = {139832223401728, -7359956588598194139, 140731863178062, 140731863178063, 0, 139832223399296, 7459749320599736357, 7459843141556572197},
mask_was_saved = 0}}, priv = {pad = {0x0, 0x0, 0x0, 0x0}, data = {prev = 0x0, cleanup = 0x0, canceltype = 0}}}
not_first_call = <optimized out>
#7 0x00007f2d3f11b8d3 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
No locals.
Thread 2 (Thread 0x7f2d3a811700 (LWP 1039628)):
#0 0x00007f2d3f2171ff in __GI___select (nfds=nfds@entry=4, readfds=readfds@entry=0x7f2d3a810b70, writefds=writefds@entry=0x0, exceptfds=exceptfds@entry=0x0,
timeout=timeout@entry=0x7f2d3a810b30) at ../sysdeps/unix/sysv/linux/select.c:41
resultvar = 18446744073709551102
sc_cancel_oldtype = 0
sc_ret = <optimized out>
#1 0x000055997a75712e in network_listener_exec (args=<optimized out>) at network-listener.c:80
end_message = 0x55997a75d08f "PMDASTATSD_EXIT"
config = <optimized out>
network_listener_to_parser = 0x55997bf57780
hints = {ai_flags = 33, ai_family = 0, ai_socktype = 2, ai_protocol = 0, ai_addrlen = 0, ai_addr = 0x0, ai_canonname = 0x0, ai_next = 0x0}
readfds = {fds_bits = {8, 0 <repeats 15 times>}}
res = 0x7f2d340011e0
port_buffer = "8125\000"
err = <optimized out>
fd = <optimized out>
tv = {tv_sec = 0, tv_usec = 997461}
max_udp_packet_size = 1472
buffer = 0x7f2d34001420 ""
src_addr = {ss_family = 2, __ss_padding = "\277\320\177\000\000\001", '\000' <repeats 111 times>, __ss_align = 0}
src_addr_len = 16
rv = <optimized out>
datagram = <optimized out>
length = <optimized out>
#2 0x00007f2d40d501ca in start_thread (arg=<optimized out>) at pthread_create.c:479
ret = <optimized out>
pd = <optimized out>
unwind_buf = {cancel_jmp_buf = {{jmp_buf = {139832231794432, -7359956588598194139, 140731863178062, 140731863178063, 0, 139832231792000, 7459750404542107685, 7459843141556572197},
mask_was_saved = 0}}, priv = {pad = {0x0, 0x0, 0x0, 0x0}, data = {prev = 0x0, cleanup = 0x0, canceltype = 0}}}
not_first_call = <optimized out>
#3 0x00007f2d3f11b8d3 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
No locals.
Thread 1 (Thread 0x7f2d4179d980 (LWP 1039627)):
#0 0x00007f2d3f984516 in pmdaRehash () from /lib64/libpcp_pmda.so.3
No symbol table info available.
--Type <RET> for more, q to quit, c to continue without paging--
#1 0x000055997a75926b in statsd_possible_reload (pmda=pmda@entry=0x55997bf54910) at pmda-callbacks.c:462
data = 0x55997a961b00 <data>
need_reload = <optimized out>
#2 0x000055997a759b4b in statsd_fetch (num_pm_id=340, pm_id_list=0x55997bfb452c, resp=0x7ffeb0b69b98, pmda=0x55997bf54910) at pmda-callbacks.c:685
data = 0x55997a961b00 <data>
#3 0x00007f2d3f985d69 in __pmdaMainPDU () from /lib64/libpcp_pmda.so.3
No symbol table info available.
#4 0x000055997a752d48 in main_PDU_loop (dispatch=0x7ffeb0b69c40) at pmdastatsd.c:250
No locals.
#5 main (argc=3, argv=0x7ffeb0b69ee8) at pmdastatsd.c:417
new_action = {__sigaction_handler = {sa_handler = 0x55997a75a380 <signal_handler>, sa_sigaction = 0x55997a75a380 <signal_handler>}, sa_mask = {__val = {0 <repeats 16 times>}},
sa_flags = 536870912, sa_restorer = 0xa30}
old_action = {__sigaction_handler = {sa_handler = 0x0, sa_sigaction = 0x0}, sa_mask = {__val = {0, 0, 0, 0, 1024, 0, 1725472837, 533999999, 1725472837, 533999999, 1725472837,
533999999, 0, 0, 0, 94117698027536}}, sa_flags = 0, sa_restorer = 0x0}
sep = <optimized out>
dispatch = {domain = 57, comm = {pmda_interface = 7, pmapi_version = 2, flags = 0}, status = 0, version = {any = {ext = 0x55997bf54910, profile = 0x7f2d3f981960 <pmdaProfile>,
fetch = 0x55997a759b20 <statsd_fetch>, desc = 0x55997a759870 <statsd_desc>, instance = 0x55997a759ae0 <statsd_instance>, text = 0x55997a7598a0 <statsd_text>,
store = 0x7f2d3f983530 <pmdaStore>}, two = {ext = 0x55997bf54910, profile = 0x7f2d3f981960 <pmdaProfile>, fetch = 0x55997a759b20 <statsd_fetch>,
desc = 0x55997a759870 <statsd_desc>, instance = 0x55997a759ae0 <statsd_instance>, text = 0x55997a7598a0 <statsd_text>, store = 0x7f2d3f983530 <pmdaStore>}, three = {
ext = 0x55997bf54910, profile = 0x7f2d3f981960 <pmdaProfile>, fetch = 0x55997a759b20 <statsd_fetch>, desc = 0x55997a759870 <statsd_desc>,
instance = 0x55997a759ae0 <statsd_instance>, text = 0x55997a7598a0 <statsd_text>, store = 0x7f2d3f983530 <pmdaStore>}, four = {ext = 0x55997bf54910,
profile = 0x7f2d3f981960 <pmdaProfile>, fetch = 0x55997a759b20 <statsd_fetch>, desc = 0x55997a759870 <statsd_desc>, instance = 0x55997a759ae0 <statsd_instance>,
text = 0x55997a7598a0 <statsd_text>, store = 0x7f2d3f983530 <pmdaStore>, pmid = 0x55997a759b90 <statsd_pmid>, name = 0x55997a759be0 <statsd_name>,
children = 0x55997a759c30 <statsd_children>}, five = {ext = 0x55997bf54910, profile = 0x7f2d3f981960 <pmdaProfile>, fetch = 0x55997a759b20 <statsd_fetch>,
desc = 0x55997a759870 <statsd_desc>, instance = 0x55997a759ae0 <statsd_instance>, text = 0x55997a7598a0 <statsd_text>, store = 0x7f2d3f983530 <pmdaStore>,
pmid = 0x55997a759b90 <statsd_pmid>, name = 0x55997a759be0 <statsd_name>, children = 0x55997a759c30 <statsd_children>}, six = {ext = 0x55997bf54910,
profile = 0x7f2d3f981960 <pmdaProfile>, fetch = 0x55997a759b20 <statsd_fetch>, desc = 0x55997a759870 <statsd_desc>, instance = 0x55997a759ae0 <statsd_instance>,
text = 0x55997a7598a0 <statsd_text>, store = 0x7f2d3f983530 <pmdaStore>, pmid = 0x55997a759b90 <statsd_pmid>, name = 0x55997a759be0 <statsd_name>,
children = 0x55997a759c30 <statsd_children>, attribute = 0x7f2d3f9835a0 <pmdaAttribute>}, seven = {ext = 0x55997bf54910, profile = 0x7f2d3f981960 <pmdaProfile>,
fetch = 0x55997a759b20 <statsd_fetch>, desc = 0x55997a759870 <statsd_desc>, instance = 0x55997a759ae0 <statsd_instance>, text = 0x55997a7598a0 <statsd_text>,
store = 0x7f2d3f983530 <pmdaStore>, pmid = 0x55997a759b90 <statsd_pmid>, name = 0x55997a759be0 <statsd_name>, children = 0x55997a759c30 <statsd_children>,
attribute = 0x7f2d3f9835a0 <pmdaAttribute>, label = 0x55997a759c90 <statsd_label>}}}
(gdb)
Coredump and access to my reproducer system will be provided in next comment.
- links to
-
RHSA-2024:139624
pcp security update