Uploaded image for project: 'OpenShift API for Data Protection'
  1. OpenShift API for Data Protection
  2. OADP-697

Restoring 5000pods takes long time (more than 10 hours)

XMLWordPrintable

    • False
    • Hide

      None

      Show
      None
    • False
    • QE - Ack
    • ToDo
    • 0
    • 0
    • Very Likely
    • 0
    • None
    • Unset
    • Unknown
    • No

      Description of problem:

      testing backup&restore of 5000pods in a single namespace.

      pods were created on ceph-rbd and using CSI plugin.

      backup 5K pods - 9Hrs
      restore 5K pods - 10.5Hrs

      Version-Release number of selected component (if applicable):

      Scale-Env: cloud33 (3 masters & 12 workers)

      OCP: 4.10.21
      OADP: 1.1.0-53 (iib 285237)

      How reproducible:

       

      Steps to Reproduce:
      1. Create single namespace with 5K pods over ceph-rbd (PV size - 32MiB)
      2. Create CSI backup 
      3. Delete the namespace
      4. Restore the namespace

      Actual results:

      restore finished after 10.5 Hrs

      Expected results:

      restore should finished after ~5Hrs

      Additional info:

      DPA

      [root@f07-h28-000-r640 07Aug22_10-05-45]# oc get dpa example-velero -nopenshift-adp -oyaml
      apiVersion: oadp.openshift.io/v1alpha1
      kind: DataProtectionApplication
      metadata:
        creationTimestamp: "2022-08-01T20:49:31Z"
        generation: 1
        name: example-velero
        namespace: openshift-adp
        resourceVersion: "52822724"
        uid: 7675350b-463a-4e13-9992-3ed1f5f04e7b
      spec:
        backupLocations:
        - velero:
            config:
              insecureSkipTLSVerify: "true"
              profile: noobaa
              region: noobaa
              s3ForcePathStyle: "true"
              s3Url: https://s3-openshift-storage.apps.oadp305.cloud33mpqe.com
            credential:
              key: cloud
              name: cloud-credentials
            default: true
            objectStorage:
              bucket: oadp-bucket
              prefix: velero
            provider: aws
        configuration:
          restic:
            enable: true
            podConfig:
              resourceAllocations:
                limits:
                  cpu: 2
                  memory: 32768Mi
                requests:
                  cpu: 1
                  memory: 16384Mi
            timeout: 900m
          velero:
            defaultPlugins:
            - openshift
            - aws
            - csi
            podConfig:
              resourceAllocations:
                limits:
                  cpu: 4
                  memory: 32768Mi
                requests:
                  cpu: 2
                  memory: 16384Mi
      status:
        conditions:
        - lastTransitionTime: "2022-08-01T20:49:31Z"
          message: Reconcile complete
          reason: Complete
          status: "True"
          type: Reconciled
      [root@f07-h28-000-r640 07Aug22_10-05-45]#

       

      {+}Backup
      {+}[root@f07-h28-000-r640 07Aug22_10-05-45]# oc get backup backup-csi-ocs-5000pods -nopenshift-adp -oyaml
      apiVersion: velero.io/v1
      kind: Backup
      metadata:
        annotations:
          velero.io/source-cluster-k8s-gitversion: v1.23.5+3afdacb
          velero.io/source-cluster-k8s-major-version: "1"
          velero.io/source-cluster-k8s-minor-version: "23"
        creationTimestamp: "2022-08-04T03:38:14Z"
        generation: 5030
        labels:
          velero.io/storage-location: example-velero-1
        name: backup-csi-ocs-5000pods
        namespace: openshift-adp
        resourceVersion: "70652310"
        uid: a23d35c1-5d97-4c56-8d51-8331ffd1451e
      spec:
        defaultVolumesToRestic: false
        hooks: {}
        includedNamespaces:
        - busybox-perf-single-ns-5000-pods
        metadata: {}
        storageLocation: example-velero-1
        ttl: 720h0m0s
      status:
        completionTimestamp: "2022-08-04T12:42:16Z"
        csiVolumeSnapshotsAttempted: 5000
        csiVolumeSnapshotsCompleted: 5000
        expiration: "2022-09-03T03:38:14Z"
        formatVersion: 1.1.0
        phase: Completed
        progress:
          itemsBackedUp: 64965
          totalItems: 64965
        startTimestamp: "2022-08-04T03:38:14Z"
        version: 1

      Restore
      [root@f07-h28-000-r640 07Aug22_10-05-45]# oc get restore restore-csi-ocs-5000pods -nopenshift-adp -oyaml
      apiVersion: velero.io/v1
      kind: Restore
      metadata:
        creationTimestamp: "2022-08-04T15:18:39Z"
        generation: 8234
        name: restore-csi-ocs-5000pods
        namespace: openshift-adp
        resourceVersion: "74633507"
        uid: aff03c02-51f5-4553-bd61-d20b0a3e7ddc
      spec:
        backupName: backup-csi-ocs-5000pods
        excludedResources:
        - nodes
        - events
        - events.events.k8s.io
        - backups.velero.io
        - restores.velero.io
        - resticrepositories.velero.io
        hooks: {}
        includedNamespaces:
        - '*'
      status:
        completionTimestamp: "2022-08-05T01:49:11Z"
        phase: Completed
        progress:
          itemsRestored: 55026
          totalItems: 55026
        startTimestamp: "2022-08-04T15:18:39Z"
        warnings: 5004

      {+}
      {}{+}

        1. backup-csi-ocs-5000pods.csv
          0.2 kB
          David Vaanunu
        2. backup-csi-ocs-5000pods.json
          1 kB
          David Vaanunu
        3. backup-csi-ocs-5000pods.log.xz
          1.69 MB
          David Vaanunu
        4. restore.tar.gz
          1.00 MB
          David Vaanunu
        5. restore-csi-ocs-5000pods.csv
          0.2 kB
          David Vaanunu
        6. restore-csi-ocs-5000pods.json
          1.0 kB
          David Vaanunu
        7. restore-csi-ocs-5000pods.log.xz
          2.03 MB
          David Vaanunu
        8. Summary-1.jpeg
          143 kB
          David Vaanunu

              sseago Scott Seago
              dvaanunu@redhat.com David Vaanunu
              Tzahi Ashkenazi Tzahi Ashkenazi
              Votes:
              0 Vote for this issue
              Watchers:
              11 Start watching this issue

                Created:
                Updated:
                Resolved: