Uploaded image for project: 'RHEL'
  1. RHEL
  2. RHEL-41053

Dst vm crashed during storage migration with nbd+tls+iothread

    • Major
    • Regression
    • sst_virtualization_storage
    • ssg_virtualization
    • None
    • QE ack
    • False
    • Hide

      None

      Show
      None
    • None
    • Red Hat Enterprise Linux
    • None
    • None
    • None
    • x86_64
    • Unspecified
    • None

      What were you trying to do that didn't work?
      Dst vm crashed during storage migration with nbd+tls+iothread

      Crash info: (qemu) qemu-kvm: ../io/channel.c:534: void qio_channel_restart_read(void *): Assertion `qemu_get_current_aio_context() == qemu_coroutine_get_aio_context(co)' failed.

      Please provide the package NVR for which bug is seen:
      kernel version: 5.14.0-457.el9.x86_64
      qemu-kvm version: qemu-kvm-9.0.0-3.el9

      How reproducible:
      100%

      Steps to reproduce

       Prepare tls env before test:

      a. on both server and client, create tls dir for qemu
      (server)# mkdir -p /etc/pki/qemu
      (client)# mkdir -p /etc/pki/qemu
      
      b. Create keys/cert on server
      #cd /etc/pki/qemu
      #certtool --generate-privkey > ca-key.pem
      #cat ca.tmpl
      cn = $hostname_server
      ca
      cert_signing_key
      # certtool --generate-self-signed --load-privkey ca-key.pem --template /root/ca.tmpl --outfile ca-cert.pem
      
      # certtool --generate-privkey > server-key.pem
      #cat server.tmpl
       organization = GnuTLS test server
       cn = $hostname_server
       tls_www_server
       encryption_key
       signing_key
       dns_name = $shortname_server
       ip_address = $ip_server
      #certtool --generate-certificate --load-privkey server-key.pem --load-ca-certificate x509-ca.pem --load-ca-privkey ca-key.pem --template server.tmpl --outfile server-cert.pem
      #scp ca-cert.pem ca-key.pem root@$ip_client:/etc/pki/qemu
      
      c.Create keys/cert in client
      #cd /etc/pki/qemu
      #certtool --generate-privkey > client-key.pem
      #cat client.tpml
      cn = $hostname_server
      tls_www_client
      encryption_key
      signing_key
      ip_address = $ip_server
      dns_name = $shortname_server
      #certtool --generate-certificate --load-privkey client-key.pem --load-ca-certificate x509-ca.pem --load-ca-privkey ca-key.pem --template client.tmpl --outfile client-cert.pem 

      1.Start guest with qemu cmdline(iothread enable)

        /usr/libexec/qemu-kvm  \
       -name "mouse-vm" \
       -sandbox off \
       -machine q35,pflash0=drive_ovmf_code,pflash1=drive_ovmf_vars \
       -cpu Cascadelake-Server \
       -nodefaults  \
       -vga std \
       -chardev socket,id=qmp_id_qmpmonitor1,path=/var/tmp/monitor-qmpmonitor1,server=on,wait=off \
       -chardev socket,id=qmp_id_catch_monitor,path=/var/tmp/monitor-catch_monitor,server=on,wait=off \
       -mon chardev=qmp_id_qmpmonitor1,mode=control \
       -mon chardev=qmp_id_catch_monitor,mode=control \
       -device '{"driver":"pcie-root-port","id":"root0","multifunction":true,"addr":"0x2","chassis":1,"port":16,"bus":"pcie.0"}' \
       -device '{"driver":"pcie-root-port","id":"root1","addr":"0x2.0x1","chassis":2,"port":17,"bus":"pcie.0"}' \
       -device '{"driver":"pcie-root-port","id":"root2","addr":"0x2.0x2","chassis":3,"port":18,"bus":"pcie.0"}' \
       -device '{"driver":"pcie-root-port","id":"root3","addr":"0x2.0x3","chassis":4,"port":19,"bus":"pcie.0"}' \
       -device '{"driver":"pcie-root-port","id":"root4","addr":"0x2.0x4","chassis":5,"port":20,"bus":"pcie.0"}' \
       -device '{"driver":"pcie-root-port","id":"root5","addr":"0x2.0x5","chassis":6,"port":21,"bus":"pcie.0"}' \
       -device '{"driver":"pcie-root-port","id":"root6","addr":"0x2.0x6","chassis":7,"port":22,"bus":"pcie.0"}' \
       -device '{"driver":"pcie-root-port","id":"root7","addr":"0x2.0x7","chassis":8,"port":23,"bus":"pcie.0"}' \
       -device '{"driver":"nec-usb-xhci","id":"usb1","bus":"root0"}' \
       -device '{"driver":"virtio-scsi-pci","id":"virtio_scsi_pci0","bus":"root1","iothread":"iothread0"}' \
       -device '{"driver":"scsi-hd","id":"image1","drive":"drive_image1","bus":"virtio_scsi_pci0.0","channel":0,"scsi-id":0,"lun":0,"bootindex":0}' \
       -device '{"driver":"virtio-net-pci","mac":"9a:8a:8b:8c:8d:8e","id":"net0","netdev":"tap0","bus":"root2","vectors":4}' \
       -device '{"driver":"usb-tablet","id":"usb-tablet1","bus":"usb1.0","port":"1"}' \
       -blockdev '{"driver":"file","cache":

      {"direct":true,"no-flush":false}

      ,"filename":"/mnt/rhel950-64-virtio-scsi-ovmf.qcow2","node-name":"drive_sys1"}' \
       -blockdev '{"driver":"qcow2","node-name":"drive_image1","file":"drive_sys1"}' \
       -blockdev '{"node-name":"file_ovmf_code","driver":"file","filename":"/usr/share/OVMF/OVMF_CODE.secboot.fd","auto-read-only":true,"discard":"unmap"}' \
       -blockdev '{"node-name":"drive_ovmf_code","driver":"raw","read-only":true,"file":"file_ovmf_code"}' \
       -blockdev '{"node-name":"file_ovmf_vars","driver":"file","filename":"/mnt/rhel950-64-virtio-scsi-ovmf.qcow2_VARS.fd","auto-read-only":true,"discard":"unmap"}' \
       -blockdev '{"node-name":"drive_ovmf_vars","driver":"raw","read-only":false,"file":"file_ovmf_vars"}' \
       -netdev tap,id=tap0,vhost=on \
       -m 4096 \
       -object '{"qom-type":"memory-backend-ram","id":"mem-machine_mem","size":4294967296}' \
       -object '{"qom-type": "iothread", "id": "iothread0"}' \
       -smp 4,maxcpus=4,cores=2,threads=1,sockets=2 \
       -vnc :10 \
       -rtc base=utc,clock=host \
       -boot menu=off,strict=off,order=cdn,once=c \
       -enable-kvm  \
       -qmp tcp:0:3333,server=on,wait=off \
       -qmp tcp:0:9999,server=on,wait=off \
       -qmp tcp:0:9888,server=on,wait=off \
       -serial tcp:0:4444,server=on,wait=off \
       -monitor stdio \

      2.Start dst guest with iothread

        /usr/libexec/qemu-kvm  \
       -name "mouse-vm" \
       -sandbox off \
       -machine q35,pflash0=drive_ovmf_code,pflash1=drive_ovmf_vars \
       -cpu Cascadelake-Server \
       -nodefaults  \
       -vga std \
       -chardev socket,id=qmp_id_qmpmonitor1,path=/var/tmp/monitor-qmpmonitor1,server=on,wait=off \
       -chardev socket,id=qmp_id_catch_monitor,path=/var/tmp/monitor-catch_monitor,server=on,wait=off \
       -mon chardev=qmp_id_qmpmonitor1,mode=control \
       -mon chardev=qmp_id_catch_monitor,mode=control \
       -device '{"driver":"pcie-root-port","id":"root0","multifunction":true,"addr":"0x2","chassis":1,"port":16,"bus":"pcie.0"}' \
       -device '{"driver":"pcie-root-port","id":"root1","addr":"0x2.0x1","chassis":2,"port":17,"bus":"pcie.0"}' \
       -device '{"driver":"pcie-root-port","id":"root2","addr":"0x2.0x2","chassis":3,"port":18,"bus":"pcie.0"}' \
       -device '{"driver":"pcie-root-port","id":"root3","addr":"0x2.0x3","chassis":4,"port":19,"bus":"pcie.0"}' \
       -device '{"driver":"pcie-root-port","id":"root4","addr":"0x2.0x4","chassis":5,"port":20,"bus":"pcie.0"}' \
       -device '{"driver":"pcie-root-port","id":"root5","addr":"0x2.0x5","chassis":6,"port":21,"bus":"pcie.0"}' \
       -device '{"driver":"pcie-root-port","id":"root6","addr":"0x2.0x6","chassis":7,"port":22,"bus":"pcie.0"}' \
       -device '{"driver":"pcie-root-port","id":"root7","addr":"0x2.0x7","chassis":8,"port":23,"bus":"pcie.0"}' \
       -device '{"driver":"nec-usb-xhci","id":"usb1","bus":"root0"}' \
       -device '{"driver":"virtio-scsi-pci","id":"virtio_scsi_pci0","bus":"root1","iothread":"iothread0"}' \
       -device '{"driver":"scsi-hd","id":"image1","drive":"drive_image1","bus":"virtio_scsi_pci0.0","channel":0,"scsi-id":0,"lun":0,"bootindex":0}' \
       -device '{"driver":"virtio-net-pci","mac":"9a:8a:8b:8c:8d:8e","id":"net0","netdev":"tap0","bus":"root2","vectors":4}' \
       -device '{"driver":"usb-tablet","id":"usb-tablet1","bus":"usb1.0","port":"1"}' \
       -blockdev '{"driver":"file","cache":

      {"direct":true,"no-flush":false}

      ,"filename":"/home/mirror.qcow2","node-name":"drive_sys1"}' \
       -blockdev '{"driver":"qcow2","node-name":"drive_image1","file":"drive_sys1"}' \
       -blockdev '{"node-name":"file_ovmf_code","driver":"file","filename":"/usr/share/OVMF/OVMF_CODE.secboot.fd","auto-read-only":true,"discard":"unmap"}' \
       -blockdev '{"node-name":"drive_ovmf_code","driver":"raw","read-only":true,"file":"file_ovmf_code"}' \
       -blockdev '{"node-name":"file_ovmf_vars","driver":"file","filename":"/home/mirror.qcow2_VARS.fd","auto-read-only":true,"discard":"unmap"}' \
       -blockdev '{"node-name":"drive_ovmf_vars","driver":"raw","read-only":false,"file":"file_ovmf_vars"}' \
       -netdev tap,id=tap0,vhost=on \
       -m 4096 \
       -object '{"qom-type":"memory-backend-ram","id":"mem-machine_mem","size":4294967296}' \
       -object '{"qom-type": "iothread", "id": "iothread0"}' \
       -smp 4,maxcpus=4,cores=2,threads=1,sockets=2 \
       -vnc :10 \
       -rtc base=utc,clock=host \
       -boot menu=off,strict=off,order=cdn,once=c \
       -enable-kvm  \
       -qmp tcp:0:3333,server=on,wait=off \
       -qmp tcp:0:9999,server=on,wait=off \
       -qmp tcp:0:9888,server=on,wait=off \
       -serial tcp:0:4444,server=on,wait=off \
       -monitor stdio \
       -incoming defer \

      3. In dst, add tls creds, start nbd server and expose image

      {"execute": "object-add", "arguments": {"qom-type": "tls-creds-x509", "id": "tls0", "dir": "/etc/pki/qemu", "endpoint": "server", "verify-peer": true}, "id": "xDE3RFPN"}
      {"execute": "nbd-server-start", "arguments": {"addr": {"type": "inet", "data": {"host": "10.72.140.84", "port": "18180"}}, "tls-creds": "tls0"}, "id": "hqdKXuoi"}
      {"execute": "block-export-add", "arguments": {"node-name": "drive_image1", "type": "nbd", "writable": true, "id": "drive_image1"}, "id": "W50Mgctv"} 

      4. In src, add tls creds, add target node

      {"execute": "object-add", "arguments": {"qom-type": "tls-creds-x509", "id": "tls0", "dir": "/etc/pki/qemu", "endpoint": "client", "verify-peer": true}, "id": "a5LeD1ve"}
      {"execute": "blockdev-add", "arguments": {"node-name": "mirror", "driver": "nbd", "server": {"type": "inet", "host": "10.72.140.84", "port": "18180"}, "export": "drive_image1", "tls-creds": "tls0", "tls-hostname": "dell-per660-02"}, "id": "IRUVRv0u"} 

      5. Do mirror from src to dst

      {"execute": "blockdev-mirror", "arguments": {"sync": "full", "device": "drive_image1", "target": "mirror", "job-id": "drive_image1_53"}, "id": "NKGU8kX7"}
      {"timestamp": {"seconds": 1714470784, "microseconds": 903343}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "drive_image1_53"}}
      {"timestamp": {"seconds": 1714470784, "microseconds": 903421}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "drive_image1_53"}}
      {"timestamp": {"seconds": 1714470784, "microseconds": 951425}, "event": "BLOCK_JOB_ERROR", "data": {"device": "drive_image1_53", "operation": "write", "action": "report"}}
      {"timestamp": {"seconds": 1714470784, "microseconds": 951730}, "event": "BLOCK_JOB_ERROR", "data": {"device": "drive_image1_53", "operation": "write", "action": "report"}}
      {"timestamp": {"seconds": 1714470784, "microseconds": 952044}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "drive_image1_53", "len": 21474836480, "offset": 0, "speed": 0, "type": "mirror", "error": "Input/output error"}}
      {"timestamp": {"seconds": 1714470784, "microseconds": 952073}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "drive_image1_53"}}
      {"timestamp": {"seconds": 1714470784, "microseconds": 952091}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "drive_image1_53"}}
       

      Expected results

       Storage migration can executed successfully with nbd+tls+iothread env.

      Actual results

        Mirror failed as dst VM crashed with info:  (qemu) qemu-kvm: ../io/channel.c:534: void qio_channel_restart_read(void *): Assertion `qemu_get_current_aio_context() == qemu_coroutine_get_aio_context(co)' failed.

       

      Coredump info:

      Message: Process 221611 (qemu-kvm) of user 0 dumped core.
                      
                      Stack trace of thread 221611:
                      #0  0x00007f01ede8b94c __pthread_kill_implementation (libc.so.6 + 0x8b94c)
                      #1  0x00007f01ede3e646 raise (libc.so.6 + 0x3e646)
                      #2  0x00007f01ede287f3 abort (libc.so.6 + 0x287f3)
                      #3  0x00007f01ede2871b __assert_fail_base.cold (libc.so.6 + 0x2871b)
                      #4  0x00007f01ede37386 __assert_fail (libc.so.6 + 0x37386)
                      #5  0x000055a1f5a173c2 qio_channel_restart_read.llvm.10971766403606733745 (qemu-kvm + 0x7d43c2)
                      #6  0x000055a1f5c58909 aio_dispatch_handler.llvm.16820080296529896906 (qemu-kvm + 0xa15909)
                      #7  0x000055a1f5c5767c aio_dispatch (qemu-kvm + 0xa1467c)
                      #8  0x000055a1f5c756ff aio_ctx_dispatch (qemu-kvm + 0xa326ff)
                      #9  0x00007f01ee343f4f g_main_context_dispatch (libglib-2.0.so.0 + 0x54f4f)
                      #10 0x000055a1f5c7665e main_loop_wait (qemu-kvm + 0xa3365e)
                      #11 0x000055a1f579d2e7 qemu_main_loop (qemu-kvm + 0x55a2e7)
                      #12 0x000055a1f55dedba qemu_default_main (qemu-kvm + 0x39bdba)
                      #13 0x00007f01ede29590 __libc_start_call_main (libc.so.6 + 0x29590)
                      #14 0x00007f01ede29640 __libc_start_main@@GLIBC_2.34 (libc.so.6 + 0x29640)
                      #15 0x000055a1f55de4d5 _start (qemu-kvm + 0x39b4d5)
                      
                      Stack trace of thread 221632:
                      #0  0x00007f01ede8679a __futex_abstimed_wait_common (libc.so.6 + 0x8679a)
                      #1  0x00007f01ede88fa0 pthread_cond_wait@@GLIBC_2.3.2 (libc.so.6 + 0x88fa0)
                      #2  0x000055a1f5c5be76 qemu_cond_wait_impl (qemu-kvm + 0xa18e76)
                      #3  0x000055a1f578e50b qemu_wait_io_event (qemu-kvm + 0x54b50b)
                      #4  0x000055a1f59e54e1 kvm_vcpu_thread_fn (qemu-kvm + 0x7a24e1)
                      #5  0x000055a1f5c5c88a qemu_thread_start.llvm.5861892803676372967 (qemu-kvm + 0xa1988a)
                      #6  0x00007f01ede89c02 start_thread (libc.so.6 + 0x89c02)
                      #7  0x00007f01edf0ec40 __clone3 (libc.so.6 + 0x10ec40)
                      
                      Stack trace of thread 221613:
                      #0  0x00007f01edf01afe ppoll (libc.so.6 + 0x101afe)
                      #1  0x000055a1f5c58bf2 fdmon_poll_wait.llvm.3046691903732803001 (qemu-kvm + 0xa15bf2)
                      #2  0x000055a1f5c58163 aio_poll (qemu-kvm + 0xa15163)
                      #3  0x000055a1f5a413f2 iothread_run (qemu-kvm + 0x7fe3f2)
                      #4  0x000055a1f5c5c88a qemu_thread_start.llvm.5861892803676372967 (qemu-kvm + 0xa1988a)
                      #5  0x00007f01ede89c02 start_thread (libc.so.6 + 0x89c02)
                      #6  0x00007f01edf0ec40 __clone3 (libc.so.6 + 0x10ec40)
                      
                      Stack trace of thread 221633:
                      #0  0x00007f01ede8679a __futex_abstimed_wait_common (libc.so.6 + 0x8679a)
                      #1  0x00007f01ede88fa0 pthread_cond_wait@@GLIBC_2.3.2 (libc.so.6 + 0x88fa0)
                      #2  0x000055a1f5c5be76 qemu_cond_wait_impl (qemu-kvm + 0xa18e76)
                      #3  0x000055a1f578e50b qemu_wait_io_event (qemu-kvm + 0x54b50b)
                      #4  0x000055a1f59e54e1 kvm_vcpu_thread_fn (qemu-kvm + 0x7a24e1)
                      #5  0x000055a1f5c5c88a qemu_thread_start.llvm.5861892803676372967 (qemu-kvm + 0xa1988a)
                      #6  0x00007f01ede89c02 start_thread (libc.so.6 + 0x89c02)
                      #7  0x00007f01edf0ec40 __clone3 (libc.so.6 + 0x10ec40)
                      
                      Stack trace of thread 221624:
                      #0  0x00007f01edf019ff __poll (libc.so.6 + 0x1019ff)
                      #1  0x00007f01ee3991fc g_main_context_iterate.constprop.0 (libglib-2.0.so.0 + 0xaa1fc)
                      #2  0x00007f01ee3435a3 g_main_loop_run (libglib-2.0.so.0 + 0x545a3)
                      #3  0x000055a1f5a4140f iothread_run (qemu-kvm + 0x7fe40f)
                      #4  0x000055a1f5c5c88a qemu_thread_start.llvm.5861892803676372967 (qemu-kvm + 0xa1988a)
                      #5  0x00007f01ede89c02 start_thread (libc.so.6 + 0x89c02)
                      #6  0x00007f01edf0ec40 __clone3 (libc.so.6 + 0x10ec40)
                      
                      Stack trace of thread 221634:
                      #0  0x00007f01ede8679a __futex_abstimed_wait_common (libc.so.6 + 0x8679a)
                      #1  0x00007f01ede88fa0 pthread_cond_wait@@GLIBC_2.3.2 (libc.so.6 + 0x88fa0)
                      #2  0x000055a1f5c5be76 qemu_cond_wait_impl (qemu-kvm + 0xa18e76)
                      #3  0x000055a1f578e50b qemu_wait_io_event (qemu-kvm + 0x54b50b)
                      #4  0x000055a1f59e54e1 kvm_vcpu_thread_fn (qemu-kvm + 0x7a24e1)
                      #5  0x000055a1f5c5c88a qemu_thread_start.llvm.5861892803676372967 (qemu-kvm + 0xa1988a)
                      #6  0x00007f01ede89c02 start_thread (libc.so.6 + 0x89c02)
                      #7  0x00007f01edf0ec40 __clone3 (libc.so.6 + 0x10ec40)
                      
                      Stack trace of thread 221625:
                      #0  0x00007f01ede8679a __futex_abstimed_wait_common (libc.so.6 + 0x8679a)
                      #1  0x00007f01ede88fa0 pthread_cond_wait@@GLIBC_2.3.2 (libc.so.6 + 0x88fa0)
                      #2  0x000055a1f5c5be76 qemu_cond_wait_impl (qemu-kvm + 0xa18e76)
                      #3  0x000055a1f578e50b qemu_wait_io_event (qemu-kvm + 0x54b50b)
                      #4  0x000055a1f59e54e1 kvm_vcpu_thread_fn (qemu-kvm + 0x7a24e1)
                      #5  0x000055a1f5c5c88a qemu_thread_start.llvm.5861892803676372967 (qemu-kvm + 0xa1988a)
                      #6  0x00007f01ede89c02 start_thread (libc.so.6 + 0x89c02)
                      #7  0x00007f01edf0ec40 __clone3 (libc.so.6 + 0x10ec40)
                      
                      Stack trace of thread 221612:
                      #0  0x00007f01edf0713d syscall (libc.so.6 + 0x10713d)
                      #1  0x000055a1f5c5c5ef qemu_event_wait (qemu-kvm + 0xa195ef)
                      #2  0x000055a1f5c6aa19 call_rcu_thread (qemu-kvm + 0xa27a19)
                      #3  0x000055a1f5c5c88a qemu_thread_start.llvm.5861892803676372967 (qemu-kvm + 0xa1988a)
                      #4  0x00007f01ede89c02 start_thread (libc.so.6 + 0x89c02)
                      #5  0x00007f01edf0ec40 __clone3 (libc.so.6 + 0x10ec40)
                      
                      Stack trace of thread 221642:
                      #0  0x00007f01ede8679a __futex_abstimed_wait_common (libc.so.6 + 0x8679a)
                      #1  0x00007f01ede88fa0 pthread_cond_wait@@GLIBC_2.3.2 (libc.so.6 + 0x88fa0)
                      #2  0x000055a1f5c5be76 qemu_cond_wait_impl (qemu-kvm + 0xa18e76)
                      #3  0x000055a1f561bf06 vnc_worker_thread.llvm.15138515012054421639 (qemu-kvm + 0x3d8f06)
                      #4  0x000055a1f5c5c88a qemu_thread_start.llvm.5861892803676372967 (qemu-kvm + 0xa1988a)
                      #5  0x00007f01ede89c02 start_thread (libc.so.6 + 0x89c02)
                      #6  0x00007f01edf0ec40 __clone3 (libc.so.6 + 0x10ec40)
                      ELF object binary architecture: AMD x86-64

       

      Coredump file at:

       http://fileshare.hosts.qa.psi.pek2.redhat.com/pub/section2/coredump/RHEL-34786/ 

      Note:

      •  Storage migration works well only with iothread.
      •  Storage migration works well only with nbd+tls except RHEL-33440
      •  A regression issue since qemu-kvm-8.2.0-1.el9, and it works well on qemu-kvm-8.1.0-5.el9.

            eblake_redhat Eric Blake
            aliang@redhat.com Aihua Liang
            virt-maint virt-maint
            Aihua Liang Aihua Liang
            Votes:
            0 Vote for this issue
            Watchers:
            5 Start watching this issue

              Created:
              Updated:
              Resolved: