Found 39 jobs Found 9 distinct failure reasons Failure: Command failed on trial139 with status 4: 'sudo /home/ubuntu/cephtest/cephadm --image quay.ceph.io/ceph-ci/ceph:f8acada04a9facc6b0d4adc891b97eb878425a98 shell -c /etc/ceph/ceph.conf -k /etc/ceph/ceph.client.admin.keyring --fsid e04aaea5-0c2f-11f1-91f2-d404e6e7d460 -- bash -c \'set -e\nset -x\nceph orch apply node-exporter\nceph orch apply grafana\nceph orch apply alertmanager\nceph orch apply prometheus\nsleep 240\nceph orch ls\nceph orch ps\nceph orch host ls\nMON_DAEMON=$(ceph orch ps --daemon-type mon -f json | jq -r \'"\'"\'last | .daemon_name\'"\'"\')\nGRAFANA_HOST=$(ceph orch ps --daemon-type grafana -f json | jq -e \'"\'"\'.[]\'"\'"\' | jq -r \'"\'"\'.hostname\'"\'"\')\nPROM_HOST=$(ceph orch ps --daemon-type prometheus -f json | jq -e \'"\'"\'.[]\'"\'"\' | jq -r \'"\'"\'.hostname\'"\'"\')\nALERTM_HOST=$(ceph orch ps --daemon-type alertmanager -f json | jq -e \'"\'"\'.[]\'"\'"\' | jq -r \'"\'"\'.hostname\'"\'"\')\nGRAFANA_IP=$(ceph orch host ls -f json | jq -r --arg GRAFANA_HOST "$GRAFANA_HOST" \'"\'"\'.[] | select(.hostname==$GRAFANA_HOST) | .addr\'"\'"\')\nPROM_IP=$(ceph orch host ls -f json | jq -r --arg PROM_HOST "$PROM_HOST" \'"\'"\'.[] | select(.hostname==$PROM_HOST) | .addr\'"\'"\')\nALERTM_IP=$(ceph orch host ls -f json | jq -r --arg ALERTM_HOST "$ALERTM_HOST" \'"\'"\'.[] | select(.hostname==$ALERTM_HOST) | .addr\'"\'"\')\n# check each host node-exporter metrics endpoint is responsive\nALL_HOST_IPS=$(ceph orch host ls -f json | jq -r \'"\'"\'.[] | .addr\'"\'"\')\nfor ip in $ALL_HOST_IPS; do\n curl -s http://${ip}:9100/metric\ndone\n# check grafana endpoints are responsive and database health is okay\ncurl -k -s https://${GRAFANA_IP}:3000/api/health\ncurl -k -s https://${GRAFANA_IP}:3000/api/health | jq -e \'"\'"\'.database == "ok"\'"\'"\'\n# stop mon daemon in order to trigger an alert\nceph orch daemon stop $MON_DAEMON\nsleep 120\n# check prometheus endpoints are responsive and mon down alert is firing\ncurl -s http://${PROM_IP}:9095/api/v1/status/config\ncurl -s http://${PROM_IP}:9095/api/v1/status/config | jq -e \'"\'"\'.status == "success"\'"\'"\'\ncurl -s http://${PROM_IP}:9095/api/v1/alerts\ncurl -s http://${PROM_IP}:9095/api/v1/alerts | jq -e \'"\'"\'.data | .alerts | .[] | select(.labels | .alertname == "CephMonDown") | .state == "firing"\'"\'"\'\n# check alertmanager endpoints are responsive and mon down alert is active\ncurl -s http://${ALERTM_IP}:9093/api/v2/status\ncurl -s http://${ALERTM_IP}:9093/api/v2/alerts\ncurl -s http://${ALERTM_IP}:9093/api/v2/alerts | jq -e \'"\'"\'.[] | select(.labels | .alertname == "CephMonDown") | .status | .state == "active"\'"\'"\'\n# check prometheus metrics endpoint is not empty and make sure we can get metrics\nMETRICS_URL=$(ceph mgr services | jq -r .prometheus)\n[ -n "$METRICS_URL" ] || exit 1\ncurl -s "${METRICS_URL}metrics" | grep -q \'"\'"\'^ceph_health_status\'"\'"\'\n\'' 1 jobs: ['51594'] suites: ['agent/on', 'mon_election/connectivity', 'orch:cephadm/workunits/{0-distro/ubuntu_22.04', 'task/test_monitoring_stack_basic}'] Failure: "2026-02-17T18:39:14.799090+0000 mon.a (mon.0) 148 : cluster [WRN] Health check failed: Failed to apply 1 service(s): mon (CEPHADM_APPLY_SPEC_FAIL)" in cluster log 20 jobs: ['51604', '51595', '51587', '51612', '51585', '51598', '51600', '51580', '51614', '51588', '51605', '51596', '51591', '51607', '51606', '51582', '51602', '51576', '51597', '51613'] suites intersection: [] suites union: ['0-nvme-loop', '1-start', '2-ops/repave-all}', '2-ops/rm-zap-flag}', '2-ops/rm-zap-wait}', '2-services/basic', '2-services/jaeger', '2-services/nfs-ingress-rgw-bucket', '2-services/nfs-ingress-rgw-user', '2-services/nfs-ingress2', '2-services/nfs-keepalive-only', '2-services/nfs2', '2-services/nvmeof', '3-final}', 'agent/off', 'fixed-2', 'mode/packaged', 'mode/root', 'mon_election/classic', 'mon_election/connectivity', 'msgr/async-v1only', 'orch:cephadm/no-agent-workunits/{0-distro/centos_9.stream', 'orch:cephadm/no-agent-workunits/{0-distro/centos_9.stream_runc', 'orch:cephadm/no-agent-workunits/{0-distro/ubuntu_22.04', 'orch:cephadm/osds/{0-distro/ubuntu_22.04', 'orch:cephadm/smb/{0-distro/ubuntu_22.04', 'orch:cephadm/smoke-roleless/{0-distro/ubuntu_22.04', 'orch:cephadm/with-work/{0-distro/ubuntu_22.04', 'orch:cephadm/workunits/{0-distro/ubuntu_22.04', 'start', 'task/test_ca_signed_key}', 'task/test_cephadm_timeout}', 'task/test_host_drain}', 'task/test_orch_cli_mon}', 'tasks/deploy_smb_mgr_clustering_ips}', 'tasks/deploy_smb_mgr_ctdb_res_ips}', 'tasks/rados_api_tests}', 'tasks/rotate-keys}'] Failure: Command failed on trial049 with status 1: 'sudo /home/ubuntu/cephtest/cephadm --image quay.ceph.io/ceph-ci/ceph:f8acada04a9facc6b0d4adc891b97eb878425a98 shell -c /etc/ceph/ceph.conf -k /etc/ceph/ceph.client.admin.keyring --fsid e849371f-0c2f-11f1-88c2-d404e6e7d460 -- bash -c \'set -ex\n# since we don\'"\'"\'t know the real hostnames before the test, the next\n# bit is in order to replace the fake hostnames "host.a/b/c" with\n# the actual names cephadm knows the host by within the mon spec\nceph orch host ls --format json | jq -r \'"\'"\'.[] | .hostname\'"\'"\' > realnames\necho $\'"\'"\'host.a\\nhost.b\\nhost.c\'"\'"\' > fakenames\necho $\'"\'"\'a\\nb\\nc\'"\'"\' > mon_ids\necho $\'"\'"\'{datacenter=a}\\n{datacenter=b,rack=2}\\n{datacenter=a,rack=3}\'"\'"\' > crush_locs\nceph orch ls --service-name mon --export > mon.yaml\nMONSPEC=`cat mon.yaml`\necho "$MONSPEC"\nwhile read realname <&3 && read fakename <&4; do\n MONSPEC="${MONSPEC//$fakename/$realname}"\ndone 3 mon.yaml\ncat mon.yaml\n# now the spec should have the real hostnames, so let\'"\'"\'s re-apply\nceph orch apply -i mon.yaml\nsleep 90\nceph orch ps --refresh\nceph orch ls --service-name mon --export > mon.yaml; ceph orch apply -i mon.yaml\nsleep 90\nceph mon dump\nceph mon dump --format json\n# verify all the crush locations got set from "ceph mon dump" output\nwhile read monid <&3 && read crushloc <&4; do\n ceph mon dump --format json | jq --arg monid "$monid" --arg crushloc "$crushloc" -e \'"\'"\'.mons | .[] | select(.name == $monid) | .crush_location == $crushloc\'"\'"\'\ndone 3