Uploaded image for project: 'RHEL'
  1. RHEL
  2. RHEL-28781

Dead lock due to malloc call from signal handler

    • Icon: Bug Bug
    • Resolution: Unresolved
    • Icon: Undefined Undefined
    • None
    • rhel-7.9
    • java-1.8.0-openjdk
    • None
    • None
    • None
    • rhel-sst-java
    • None
    • False
    • Hide

      None

      Show
      None
    • None
    • Red Hat Enterprise Linux
    • None
    • None
    • None
    • All
    • None

      Abort was called due to deadlock:

      (gdb) thr 1
      [Switching to thread 1 (Thread 0x7f8cbbc85700 (LWP 32700))]
      #2  0x00007f8d003258a9 in os::die () at /usr/src/debug/java-1.8.0-openjdk-1.8.0.272.b10-1.el7_9.x86_64/openjdk/hotspot/src/os/linux/vm/os_linux.cpp:1581
      1581      ::abort();
      (gdb) bt
      #0  0x00007f8d00abd387 in __GI_raise (sig=sig@entry=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:55
      #1  0x00007f8d00abea78 in __GI_abort () at abort.c:90
      #2  0x00007f8d003258a9 in os::die () at /usr/src/debug/java-1.8.0-openjdk-1.8.0.272.b10-1.el7_9.x86_64/openjdk/hotspot/src/os/linux/vm/os_linux.cpp:1581
      #3  0x00007f8d004e56f3 in WatcherThread::run (this=0x560b7e091000) at /usr/src/debug/java-1.8.0-openjdk-1.8.0.272.b10-1.el7_9.x86_64/openjdk/hotspot/src/share/vm/runtime/thread.cpp:1380
      #4  0x00007f8d003247d2 in java_start (thread=0x560b7e091000) at /usr/src/debug/java-1.8.0-openjdk-1.8.0.272.b10-1.el7_9.x86_64/openjdk/hotspot/src/os/linux/vm/os_linux.cpp:847
      #5  0x00007f8d01485ea5 in start_thread (arg=0x7f8cbbc85700) at pthread_create.c:307
      #6  0x00007f8d00b85b0d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:111
      (gdb) frame 3
      #3  0x00007f8d004e56f3 in WatcherThread::run (this=0x560b7e091000) at /usr/src/debug/java-1.8.0-openjdk-1.8.0.272.b10-1.el7_9.x86_64/openjdk/hotspot/src/share/vm/runtime/thread.cpp:1380
      1380                 os::die();
      (gdb) list
      1375             && Arguments::abort_hook() == NULL) {
      1376                 os::sleep(this, 2 * 60 * 1000, false);
      1377                 fdStream err(defaultStream::output_fd());
      1378                 err.print_raw_cr("# [ timer expired, abort... ]");
      1379                 // skip atexit/vm_exit/vm_abort hooks
      1380                 os::die();
      1381            }
      

        The deadlock backtrace is:

       (gdb) thr 54
      [Switching to thread 54 (Thread 0x7f8cb1589700 (LWP 33064))]
      #0  __lll_lock_wait_private () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:95
      95      2:      movl    %edx, %eax
      (gdb) bt
      #0  __lll_lock_wait_private () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:95
      #1  0x00007f8d00b0fba2 in _L_lock_16654 () from /lib64/libc.so.6
      #2  0x00007f8d00b0c7e3 in __GI___libc_malloc (bytes=140243582117728) at malloc.c:2903
      #3  0x00007f8d0031c4ad in os::malloc (size=<optimized out>, memflags=<optimized out>, stack=...) at /usr/src/debug/java-1.8.0-openjdk-1.8.0.272.b10-1.el7_9.x86_64/openjdk/hotspot/src/share/vm/runtime/os.cpp:626
      #4  0x00007f8cffef2d97 in AllocateHeap (alloc_failmode=AllocFailStrategy::RETURN_NULL, stack=..., flags=mtInternal, size=24)
          at /usr/src/debug/java-1.8.0-openjdk-1.8.0.272.b10-1.el7_9.x86_64/openjdk/hotspot/src/share/vm/memory/allocation.inline.hpp:56
      #5  CHeapObj<(MemoryType)7>::operator new (stack=..., nothrow_constant=..., size=24) at /usr/src/debug/java-1.8.0-openjdk-1.8.0.272.b10-1.el7_9.x86_64/openjdk/hotspot/src/share/vm/memory/allocation.inline.hpp:113
      #6  0x00007f8cffef2e0d in CHeapObj<(MemoryType)7>::operator new (nothrow_constant=..., size=24) at /usr/src/debug/java-1.8.0-openjdk-1.8.0.272.b10-1.el7_9.x86_64/openjdk/hotspot/src/share/vm/memory/allocation.inline.hpp:122
      #7  0x00007f8cffef322d in create_decoder () at /usr/src/debug/java-1.8.0-openjdk-1.8.0.272.b10-1.el7_9.x86_64/openjdk/hotspot/src/share/vm/utilities/decoder.cpp:74
      #8  get_error_handler_instance () at /usr/src/debug/java-1.8.0-openjdk-1.8.0.272.b10-1.el7_9.x86_64/openjdk/hotspot/src/share/vm/utilities/decoder.cpp:59
      #9  Decoder::decode (addr=0x82272 <Address 0x82272 out of bounds>, buf=0x7f8d00a81de0 <VMError::report(outputStream*)::buf> "/lib64/libc.so.6", buflen=2000, offset=0x7f8cb15867d0, modulepath=0x7f8d018afed0 "/lib64/libc.so.6")
          at /usr/src/debug/java-1.8.0-openjdk-1.8.0.272.b10-1.el7_9.x86_64/openjdk/hotspot/src/share/vm/utilities/decoder.cpp:108
      #10 0x00007f8d003259f6 in os::dll_address_to_function_name (addr=addr@entry=0x7f8d00b09272 <_int_malloc+754> "M\211y\020\017\204l\003", buf=buf@entry=0x7f8d00a81de0 <VMError::report(outputStream*)::buf> "/lib64/libc.so.6", 
          buflen=buflen@entry=2000, offset=offset@entry=0x7f8cb15867d0) at /usr/src/debug/java-1.8.0-openjdk-1.8.0.272.b10-1.el7_9.x86_64/openjdk/hotspot/src/os/linux/vm/os_linux.cpp:1724
      #11 0x00007f8cfff82ee2 in frame::print_C_frame (st=st@entry=0x7f8cb1586f10, buf=buf@entry=0x7f8d00a81de0 <VMError::report(outputStream*)::buf> "/lib64/libc.so.6", buflen=buflen@entry=2000, 
          pc=0x7f8d00b09272 <_int_malloc+754> "M\211y\020\017\204l\003") at /usr/src/debug/java-1.8.0-openjdk-1.8.0.272.b10-1.el7_9.x86_64/openjdk/hotspot/src/share/vm/runtime/frame.cpp:702
      #12 0x00007f8cfff831c0 in frame::print_on_error (this=this@entry=0x7f8cb1586890, st=st@entry=0x7f8cb1586f10, buf=buf@entry=0x7f8d00a81de0 <VMError::report(outputStream*)::buf> "/lib64/libc.so.6", buflen=buflen@entry=2000, 
          verbose=verbose@entry=false) at /usr/src/debug/java-1.8.0-openjdk-1.8.0.272.b10-1.el7_9.x86_64/openjdk/hotspot/src/share/vm/runtime/frame.cpp:768
      #13 0x00007f8d005422d2 in VMError::report (this=0x7f8cb1587010, st=st@entry=0x7f8cb1586f10) at /usr/src/debug/java-1.8.0-openjdk-1.8.0.272.b10-1.el7_9.x86_64/openjdk/hotspot/src/share/vm/utilities/vmError.cpp:507
      #14 0x00007f8d00543fd8 in VMError::report_and_die (this=this@entry=0x7f8cb1587010) at /usr/src/debug/java-1.8.0-openjdk-1.8.0.272.b10-1.el7_9.x86_64/openjdk/hotspot/src/share/vm/utilities/vmError.cpp:992
      #15 0x00007f8d0032fa35 in JVM_handle_linux_signal (sig=11, info=0x7f8cb15872b0, ucVoid=0x7f8cb1587180, abort_if_unrecognized=<optimized out>)
          at /usr/src/debug/java-1.8.0-openjdk-1.8.0.272.b10-1.el7_9.x86_64/openjdk/hotspot/src/os_cpu/linux_x86/vm/os_linux_x86.cpp:541
      #16 0x00007f8d00322a48 in signalHandler (sig=11, info=0x7f8cb15872b0, uc=0x7f8cb1587180) at /usr/src/debug/java-1.8.0-openjdk-1.8.0.272.b10-1.el7_9.x86_64/openjdk/hotspot/src/os/linux/vm/os_linux.cpp:4591
      #17 <signal handler called>
      #18 _int_malloc (av=av@entry=0x7f8d00e4e760 <main_arena>, bytes=bytes@entry=1952) at malloc.c:3513
      #19 0x00007f8d00b0c78c in __GI___libc_malloc (bytes=1952) at malloc.c:2905
      #20 0x00007f8d0031c4ad in os::malloc (size=<optimized out>, memflags=<optimized out>, stack=...) at /usr/src/debug/java-1.8.0-openjdk-1.8.0.272.b10-1.el7_9.x86_64/openjdk/hotspot/src/share/vm/runtime/os.cpp:626
      #21 0x00007f8d0053ab8c in AllocateHeap (alloc_failmode=AllocFailStrategy::EXIT_OOM, stack=..., flags=mtCompiler, size=1952)
          at /usr/src/debug/java-1.8.0-openjdk-1.8.0.272.b10-1.el7_9.x86_64/openjdk/hotspot/src/share/vm/memory/allocation.inline.hpp:56
      #22 AllocateHeap (alloc_failmode=AllocFailStrategy::EXIT_OOM, flags=mtCompiler, size=1952) at /usr/src/debug/java-1.8.0-openjdk-1.8.0.272.b10-1.el7_9.x86_64/openjdk/hotspot/src/share/vm/memory/allocation.inline.hpp:71
      #23 vframeArray::allocate (thread=thread@entry=0x560b85724800, frame_size=16, chunk=chunk@entry=0x560b85724df0, reg_map=reg_map@entry=0x7f8cb1587c00, sender=..., caller=..., self=..., realloc_failures=realloc_failures@entry=false)
          at /usr/src/debug/java-1.8.0-openjdk-1.8.0.272.b10-1.el7_9.x86_64/openjdk/hotspot/src/share/vm/runtime/vframeArray.cpp:454
      #24 0x00007f8cffefc8f1 in Deoptimization::create_vframeArray (thread=thread@entry=0x560b85724800, fr=..., reg_map=reg_map@entry=0x7f8cb1587c00, chunk=chunk@entry=0x560b85724df0, realloc_failures=realloc_failures@entry=false)
          at /usr/src/debug/java-1.8.0-openjdk-1.8.0.272.b10-1.el7_9.x86_64/openjdk/hotspot/src/share/vm/runtime/deoptimization.cpp:1085
      #25 0x00007f8cfff008f7 in Deoptimization::fetch_unroll_info_helper (thread=0x560b85724800) at /usr/src/debug/java-1.8.0-openjdk-1.8.0.272.b10-1.el7_9.x86_64/openjdk/hotspot/src/share/vm/runtime/deoptimization.cpp:303
      

      What caused the crash in malloc was:

       (gdb) frame 19
      #19 0x00007f8d00b0c78c in __GI___libc_malloc (bytes=1952) at malloc.c:2905
      2905      victim = _int_malloc(ar_ptr, bytes);
      (gdb) p victim
      $1 = (struct malloc_chunk *) 0x560b8afce670
      (gdb) p victim.bk
      $2 = (struct malloc_chunk *) 0xefcdf20400000000
      (gdb) ptype victim
      type = struct malloc_chunk {
          size_t prev_size;
          size_t size;
          struct malloc_chunk *fd;
          struct malloc_chunk *bk;
          struct malloc_chunk *fd_nextsize;
          struct malloc_chunk *bk_nextsize;
      } *
      (gdb) p  victim
      $3 = {prev_size = 240, size = 17425, fd = 0x7f8d00e4e7b8 <main_arena+88>, bk = 0xefcdf20400000000, fd_nextsize = 0x86d782da68091b27, bk_nextsize = 0xc5acdf76511000} 

      The code is somewhat optimized, but we can see the values:

                  bck = victim->bk;
                 size = chunksize (victim);
      ...
                 if (__glibc_unlikely (bck->fd != victim))
                   malloc_printerr ("malloc(): corrupted unsorted chunks 3");
                 unsorted_chunks (av)->bk = bck;
                 bck->fd = unsorted_chunks (av);

       The dead lock happens certainly in this setup, due to the environment variable MALLOC_ARENA_MAX=1 but even if using more arenas (in this system there are 18 cpus, so, would be 144 arenas) it could still deadlock if both, the crashing thread and the thread doing a malloc call from a signal handler were using the same arena.

      A first hint of what code could be the culprit is:

      (gdb) info shared
      ...
                                              No          /app/JBOSS/jboss-slave/modules/system/layers/base/.overlays/layer-base-jboss-eap-7.3.2.CP/org/apache/activemq/artemis/journal/main/lib/linux-x86_64/libartemis-native-64.so
      0x00007f8cab1e95a0  0x00007f8cab1e977d  Yes         /lib64/libaio.so.1
                                              No          /tmp/libnetty_transport_native_epoll_x86_644761872524936407850.so
      

              rhn-engineering-ahughes Andrew Hughes
              rhn-support-pandrade Paulo Andrade
              Andrew Hughes Andrew Hughes
              David Kutalek David Kutalek
              Votes:
              0 Vote for this issue
              Watchers:
              3 Start watching this issue

                Created:
                Updated: