Found 93 jobs Found 6 distinct failure reasons Failure: SSH connection to smithi092 was lost: 'sudo /home/ubuntu/cephtest/cephadm --image quay.io/ceph/ceph:v16.2.0 shell -c /etc/ceph/ceph.conf -k /etc/ceph/ceph.client.admin.keyring --fsid a3b65e1a-0056-11ee-9b1d-001a4aab830c -e sha1=08ac799adababe12f1fdea7ed02eea3d776184b1 -- bash -c \'while ceph orch upgrade status | jq \'"\'"\'.in_progress\'"\'"\' | grep true && ! ceph orch upgrade status | jq \'"\'"\'.message\'"\'"\' | grep Error ; do ceph orch ps ; ceph versions ; ceph orch upgrade status ; ceph health detail ; sleep 30 ; done\'' 12 jobs: ['7293241', '7293190', '7293253', '7293260', '7293224', '7293263', '7293248', '7293201', '7293206', '7293233', '7293220', '7293275'] suites intersection: [] suites union: ['1-bootstrap/16.2.0', '1-bootstrap/16.2.4', '1-bootstrap/16.2.5', '1-ranks/1', '1-ranks/2', '1-start', '1-volume/{0-create', '2-allow_standby_replay/no', '2-allow_standby_replay/yes', '2-client', '2-nfs', '2-repo_digest/repo_digest', '3-inline/no', '3-inline/yes', '3-upgrade-mgr-staggered', '3-upgrade-with-workload', '3-upgrade/simple', '4-config-upgrade/{fail_fs}', '4-final}', '4-verify}', '4-wait', '5-upgrade-ls', '5-upgrade-with-workload', '6-verify}}', 'agent/off', 'agent/on', 'centos_8.stream_container_tools', 'conf/{client', 'fail_fs/no', 'fail_fs/yes', 'ignorelist_wrongly_marked_down', 'mds', 'mon', 'mon_election/classic}', 'mon_election/connectivity}', 'orch:cephadm/mds_upgrade_sequence/{bluestore-bitmap', 'orch:cephadm/mgr-nfs-upgrade/{0-centos_8.stream_container_tools', 'orch:cephadm/upgrade/{1-start-distro/1-start-centos_8.stream_container-tools', 'orch:cephadm/upgrade/{1-start-distro/1-start-ubuntu_20.04', 'osd}', 'overrides/{ignorelist_health', 'pg-warn', 'roles', 'syntax}', 'tasks/{0-from/pacific', 'tasks/{0-from/v16.2.4'] Failure: SSH connection to smithi110 was lost: 'sudo /home/ubuntu/cephtest/cephadm --image quay.io/ceph/ceph:v16.2.0 shell -c /etc/ceph/ceph.conf -k /etc/ceph/ceph.client.admin.keyring --fsid 91b07b16-0055-11ee-9b1d-001a4aab830c -e sha1=08ac799adababe12f1fdea7ed02eea3d776184b1 -- bash -c \'set -ex\n# setup rgw\nradosgw-admin realm create --rgw-realm=r --default\nradosgw-admin zonegroup create --rgw-zonegroup=default --master --default\nradosgw-admin zone create --rgw-zonegroup=default --rgw-zone=z --master --default\nradosgw-admin period update --rgw-realm=r --commit\nceph orch apply rgw foo --realm r --zone z --placement=2 --port=8000\n# setup iscsi\nceph osd pool create foo\nrbd pool init foo\nceph orch apply iscsi foo u p\nsleep 180\nceph config set mon mon_warn_on_insecure_global_id_reclaim false --force\nceph config set mon mon_warn_on_insecure_global_id_reclaim_allowed false --force\nceph config set global log_to_journald false --force\n# get some good info on the state of things pre-upgrade. Useful for debugging\nceph orch ps\nceph versions\nceph -s\nceph orch ls\n# collect the target id for the container we are upgrading to\nTARGET_ID="$(ceph orch upgrade check --image quay.ceph.io/ceph-ci/ceph:$sha1 | jq -r \'"\'"\'.target_id\'"\'"\')"\necho "$TARGET_ID"\n# doing staggered upgrade requires mgr daemons being on a version that contains the staggered upgrade code\n# until there is a stable version that contains it, we can test by manually upgrading a mgr daemon\nceph orch daemon redeploy "mgr.$(ceph mgr dump -f json | jq .standbys | jq .[] | jq -r .name)" --image quay.ceph.io/ceph-ci/ceph:$sha1\nceph orch ps --refresh\nsleep 180\n# gather more possible debugging info\nceph orch ps\nceph versions\nceph -s\nceph health detail\n# verify we have upgraded exactly 1 of the 2 mgr daemons to the new image id\nmatching_daemon_count=$(ceph orch ps --format json | jq --arg TARGET_ID "$TARGET_ID" -e \'"\'"\'.[] | select(.daemon_type=="mgr") | select(.container_image_id==$TARGET_ID)\'"\'"\' | grep "container_image_id" | wc -l)\nif [ "$matching_daemon_count" != "1" ]; then echo "Found unexpected number of upgraded manager daemons"; exit 1; else echo "Matched 1 mgr with new container image id"; fi\n# verify exactly 1 mgr is not upgraded\nmatching_daemon_count=$(ceph orch ps --format json | jq --arg TARGET_ID "$TARGET_ID" -e \'"\'"\'.[] | select(.daemon_type=="mgr") | select(.container_image_id!=$TARGET_ID)\'"\'"\' | grep "container_image_id" | wc -l)\nif [ "$matching_daemon_count" != "1" ]; then echo "Found unexpected number of upgraded manager daemons"; exit 1; else echo "Matched 1 mgr with old container image id"; fi\nceph mgr fail\nsleep 180\n# now try upgrading the other mgr\nceph orch daemon redeploy "mgr.$(ceph mgr dump -f json | jq .standbys | jq .[] | jq -r .name)" --image quay.ceph.io/ceph-ci/ceph:$sha1\nceph orch ps --refresh\nsleep 180\n# gather more possible debugging info\nceph orch ps\nceph versions\nceph health detail\nceph -s\nceph mgr fail\nsleep 180\n# gather more debugging info\nceph orch ps\nceph versions\nceph -s\nceph health detail\n# now that both mgrs should have been redeployed with the new version, so should find 2 daemons\n# when matching against mgr daemons on the correct image id\nmatching_daemon_count=$(ceph orch ps --format json | jq --arg TARGET_ID "$TARGET_ID" -e \'"\'"\'.[] | select(.daemon_type=="mgr") | select(.container_image_id==$TARGET_ID)\'"\'"\' | grep "container_image_id" | wc -l)\nif [ "$matching_daemon_count" != "2" ]; then echo "Found unexpected number of upgraded manager daemons"; exit 1; else echo "Matched 2 mgr with new container image id"; fi\nceph mgr fail\nsleep 180\n# debugging info\nceph orch ps\nceph orch ls\nceph versions\n# to make sure mgr daemons upgrade is fully completed, including being deployed by a mgr on new version\n# also serves as an early failure if manually upgrading the mgrs failed as --daemon-types won\'"\'"\'t be recognized\nceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1 --daemon-types mgr\nwhile ceph orch upgrade status | jq \'"\'"\'.in_progress\'"\'"\' | grep true && ! ceph orch upgrade status | jq \'"\'"\'.message\'"\'"\' | grep Error ; do ceph orch ps ; ceph versions ; ceph orch upgrade status ; sleep 30 ; done\n# verify 2 mgr daemons both on the new container image id\nmatching_daemon_count=$(ceph orch ps --format json | jq --arg TARGET_ID "$TARGET_ID" -e \'"\'"\'.[] | select(.daemon_type=="mgr") | select(.container_image_id==$TARGET_ID)\'"\'"\' | grep "container_image_id" | wc -l)\nif [ "$matching_daemon_count" != "2" ]; then echo "Found unexpected number of upgraded manager daemons"; exit 1; else echo "Matched 2 mgr with new container image id"; fi\n# verify non-mgr daemons are still on old image id to make sure --daemon-types was respected\n! ceph orch ps --format json | jq -e \'"\'"\'.[] | select(.daemon_type!="mgr") | .container_image_id\'"\'"\' | grep $TARGET_ID\n# check that exactly two daemons have been upgraded to the new image (our 2 mgr daemons)\nceph orch upgrade check quay.ceph.io/ceph-ci/ceph:$sha1 | jq -e \'"\'"\'.up_to_date | length == 2\'"\'"\'\nceph orch upgrade status\nceph health detail\n# upgrade only the mons on one of the two hosts\nceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1 --daemon-types mon --hosts $(ceph orch ps | grep mgr.x | awk \'"\'"\'{print $2}\'"\'"\')\nwhile ceph orch upgrade status | jq \'"\'"\'.in_progress\'"\'"\' | grep true && ! ceph orch upgrade status | jq \'"\'"\'.message\'"\'"\' | grep Error ; do ceph orch ps ; ceph versions ; ceph orch upgrade status ; sleep 30 ; done\nceph orch ps\n# verify exactly 1 off the 2 mon daemons was upgraded\nmatching_daemon_count=$(ceph orch ps --format json | jq --arg TARGET_ID "$TARGET_ID" -e \'"\'"\'.[] | select(.daemon_type=="mon") | select(.container_image_id==$TARGET_ID)\'"\'"\' | grep "container_image_id" | wc -l)\nif [ "$matching_daemon_count" != "1" ]; then echo "Found unexpected number of upgraded mon daemons"; exit 1; else echo "Matched 1 mon with new container image id"; fi\nceph orch upgrade status\nceph health detail\n# upgrade mons on the other hosts\nceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1 --daemon-types mon --hosts $(ceph orch ps | grep mgr.y | awk \'"\'"\'{print $2}\'"\'"\')\nwhile ceph orch upgrade status | jq \'"\'"\'.in_progress\'"\'"\' | grep true && ! ceph orch upgrade status | jq \'"\'"\'.message\'"\'"\' | grep Error ; do ceph orch ps ; ceph versions ; ceph orch upgrade status ; sleep 30 ; done\nceph orch ps\n# verify all mons (3) now on same version and version hash matches what we are upgrading to\nmatching_daemon_count=$(ceph orch ps --format json | jq --arg TARGET_ID "$TARGET_ID" -e \'"\'"\'.[] | select(.daemon_type=="mon") | select(.container_image_id==$TARGET_ID)\'"\'"\' | grep "container_image_id" | wc -l)\nif [ "$matching_daemon_count" != "3" ]; then echo "Found unexpected number of upgraded mon daemons"; exit 1; else echo "Matched 3 mon with new container image id"; fi\n# verify exactly 5 daemons are now upgraded (2 mgrs, 3 mons)\nceph orch upgrade check quay.ceph.io/ceph-ci/ceph:$sha1 | jq -e \'"\'"\'.up_to_date | length == 5\'"\'"\'\nceph orch upgrade status\nceph health detail\n# upgrade exactly 2 osd daemons\nceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1 --daemon-types osd --limit 2\nwhile ceph orch upgrade status | jq \'"\'"\'.in_progress\'"\'"\' | grep true && ! ceph orch upgrade status | jq \'"\'"\'.message\'"\'"\' | grep Error ; do ceph orch ps ; ceph versions ; ceph orch upgrade status ; sleep 30 ; done\nceph orch ps\n# verify exactly 2 of the 8 OSDs were upgraded\nmatching_daemon_count=$(ceph orch ps --format json | jq --arg TARGET_ID "$TARGET_ID" -e \'"\'"\'.[] | select(.daemon_type=="osd") | select(.container_image_id==$TARGET_ID)\'"\'"\' | grep "container_image_id" | wc -l)\nif [ "$matching_daemon_count" != "2" ]; then echo "Found unexpected number of upgraded osd daemons"; exit 1; else echo "Matched 2 osd with new container image id"; fi\n# verify exactly 7 daemons have been upgraded (2 mgrs, 3 mons, 2 osds)\nceph orch upgrade check quay.ceph.io/ceph-ci/ceph:$sha1 | jq -e \'"\'"\'.up_to_date | length == 7\'"\'"\'\nceph orch upgrade status\nceph health detail\n# upgrade one more osd\nceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1 --daemon-types crash,osd --limit 1\nwhile ceph orch upgrade status | jq \'"\'"\'.in_progress\'"\'"\' | grep true && ! ceph orch upgrade status | jq \'"\'"\'.message\'"\'"\' | grep Error ; do ceph orch ps ; ceph versions ; ceph orch upgrade status ; sleep 30 ; done\nceph orch ps\n# verify 3 osd daemons have been upgraded\nmatching_daemon_count=$(ceph orch ps --format json | jq --arg TARGET_ID "$TARGET_ID" -e \'"\'"\'.[] | select(.daemon_type=="osd") | select(.container_image_id==$TARGET_ID)\'"\'"\' | grep "container_image_id" | wc -l)\nif [ "$matching_daemon_count" != "3" ]; then echo "Found unexpected number of upgraded osd daemons"; exit 1; else echo "Matched 3 osd with new container image id"; fi\n# verify now 8 daemons have been upgraded\nceph orch upgrade check quay.ceph.io/ceph-ci/ceph:$sha1 | jq -e \'"\'"\'.up_to_date | length == 8\'"\'"\'\n# upgrade the rest of the osds\n# use this opportunity to check we can set osd flags properly\nceph orch upgrade status\nceph health detail\n# make sure noout is listed as a flag to be set as that is what we\'"\'"\'ll test with\nceph config get mgr mgr/cephadm/upgrade_osd_flags | grep noout\n# upgrade osds and crash daemons.\nceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1 --daemon-types crash,osd\n# wait for upgrade to be started and in progress to check for osd flags\n# To test noout being set during upgrade, want to loop here until either the upgrade completes,\n# fails with an error, or noout is set, but in the noout case, we need to do something to mark\n# that that was the condition that we stopped looping on. Doing that here by having\n# it create a file whose existence we can check for once the loop is over\nwhile ceph orch upgrade status | jq \'"\'"\'.in_progress\'"\'"\' | grep true && ! ceph orch upgrade status | jq \'"\'"\'.message\'"\'"\' | grep Error ; do if ceph osd dump -f json | jq \'"\'"\'.flags_set\'"\'"\' | grep noout; then touch saw_noout.txt; break; else echo "no noout yet"; fi; sleep 1; done\nls | grep saw_noout\n# wait for upgrade to complete\nwhile ceph orch upgrade status | jq \'"\'"\'.in_progress\'"\'"\' | grep true && ! ceph orch upgrade status | jq \'"\'"\'.message\'"\'"\' | grep Error ; do ceph orch ps ; ceph versions ; ceph orch upgrade status ; sleep 30 ; done\nceph orch ps\n# verify noout was unset once upgrade completed\nif ceph osd dump -f json | jq \'"\'"\'.flags_set\'"\'"\' | grep noout; then (exit 1); else (exit 0); fi\n# verify all 8 osds are on the new container image id\nmatching_daemon_count=$(ceph orch ps --format json | jq --arg TARGET_ID "$TARGET_ID" -e \'"\'"\'.[] | select(.daemon_type=="osd") | select(.container_image_id==$TARGET_ID)\'"\'"\' | grep "container_image_id" | wc -l)\nif [ "$matching_daemon_count" != "8" ]; then echo "Found unexpected number of upgraded osd daemons"; exit 1; else echo "Matched 8 osd with new container image id"; fi\nceph orch upgrade status\nceph health detail\n# upgrade the rgw daemons using --services\nceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1 --services rgw.foo\nwhile ceph orch upgrade status | jq \'"\'"\'.in_progress\'"\'"\' | grep true && ! ceph orch upgrade status | jq \'"\'"\'.message\'"\'"\' | grep Error ; do ceph orch ps ; ceph versions ; ceph orch upgrade status ; sleep 30 ; done\nceph orch ps\n# verify all 2 rgw daemons were upgraded\nmatching_daemon_count=$(ceph orch ps --format json | jq --arg TARGET_ID "$TARGET_ID" -e \'"\'"\'.[] | select(.daemon_type=="rgw") | select(.container_image_id==$TARGET_ID)\'"\'"\' | grep "container_image_id" | wc -l)\nif [ "$matching_daemon_count" != "2" ]; then echo "Found unexpected number of upgraded rgw daemons"; exit 1; else echo "Matched 2 rgw with new container image id"; fi\nceph orch upgrade status\nceph health detail\n# run upgrade one more time with no filter parameters to make sure anything left gets upgraded\nceph orch upgrade start --image quay.ceph.io/ceph-ci/ceph:$sha1\n\'' 1 jobs: ['7293221'] suites: ['2-repo_digest/defaut', '3-upgrade/staggered', '4-wait', '5-upgrade-ls', 'agent/on', 'mon_election/classic}', 'orch:cephadm/upgrade/{1-start-distro/1-start-centos_8.stream_container-tools'] Failure: Command failed on smithi153 with status 95: 'sudo /home/ubuntu/cephtest/cephadm --image quay-quay-quay.apps.os.sepia.ceph.com/ceph-ci/ceph:08ac799adababe12f1fdea7ed02eea3d776184b1 shell -c /etc/ceph/ceph.conf -k /etc/ceph/ceph.client.admin.keyring --fsid 29f39f1a-0057-11ee-9b1d-001a4aab830c -- ceph orch ls -f json' 1 jobs: ['7293247'] suites: ['0-nvme-loop', '1-start', '2-services/mirror', '3-final}', 'orch:cephadm/smoke-roleless/{0-distro/rhel_8.6_container_tools_3.0'] Failure: timeout expired in wait_until_healthy 1 jobs: ['7293215'] suites: ['0-nvme-loop', 'agent/on', 'fixed-2', 'mon_election/connectivity', 'orch:cephadm/smoke/{0-distro/ubuntu_20.04', 'start}'] Failure: Test failure: test_cephfs_mirror (tasks.cephadm_cases.test_cli.TestCephadmCLI) 2 jobs: ['7293270', '7293205'] suites intersection: ['agent/off', 'mon_election/classic', 'task/test_orch_cli}'] suites union: ['agent/off', 'mon_election/classic', 'orch:cephadm/workunits/{0-distro/centos_8.stream_container_tools', 'orch:cephadm/workunits/{0-distro/ubuntu_20.04', 'task/test_orch_cli}'] Failure: reached maximum tries (301) after waiting for 300 seconds 1 jobs: ['7293244'] suites: ['0-nvme-loop', '1-start', '2-services/jaeger', '3-final}', 'orch:cephadm/smoke-roleless/{0-distro/centos_8.stream_container_tools_crun']