archive_path: /home/teuthworker/mnt/teuthology/skanta-2026-02-13_01:43:11-rados-wip-bharath17-testing-2026-02-12-0618-tentacle-distro-default-trial/47838 branch: wip-bharath17-testing-2026-02-12-0618-tentacle description: rados/cephadm/workunits/{0-distro/ubuntu_22.04 agent/on mon_election/connectivity task/test_monitoring_stack_basic} email: skanta@redhat.com first_in_suite: false flavor: default job_id: '47838' kernel: &id001 branch: distro kdb: 1 sha1: distro ktype: distro last_in_suite: false machine_type: trial name: skanta-2026-02-13_01:43:11-rados-wip-bharath17-testing-2026-02-12-0618-tentacle-distro-default-trial no_nested_subset: false os_type: ubuntu os_version: '22.04' overrides: admin_socket: branch: wip-bharath17-testing-2026-02-12-0618-tentacle ceph: conf: global: mon election default strategy: 3 mgr: debug mgr: 20 debug ms: 1 mgr/cephadm/use_agent: true mon: debug mon: 20 debug ms: 1 debug paxos: 20 osd: debug ms: 1 debug osd: 20 flavor: default log-ignorelist: - \(MDS_ALL_DOWN\) - \(MDS_UP_LESS_THAN_MAX\) - MON_DOWN - mons down - mon down - out of quorum - CEPHADM_STRAY_DAEMON - CEPHADM_FAILED_DAEMON log-only-match: - CEPHADM_ sha1: 3b40640f18f2d34a761ee9b6dba2dfbb2a3118eb ceph-deploy: conf: client: log file: /var/log/ceph/ceph-$name.$pid.log mon: {} install: ceph: flavor: default sha1: 3b40640f18f2d34a761ee9b6dba2dfbb2a3118eb workunit: branch: wip-bharath17-testing-2026-02-12-0618-tentacle sha1: 3b40640f18f2d34a761ee9b6dba2dfbb2a3118eb owner: scheduled_skanta@soko04.front.sepia.ceph.com priority: 80 repo: https://github.com/ceph/ceph-ci.git roles: - - host.a - mon.a - mgr.a - osd.0 - - host.b - mon.b - mgr.b - osd.1 - - host.c - mon.c - osd.2 seed: 2723 sha1: 3b40640f18f2d34a761ee9b6dba2dfbb2a3118eb sleep_before_teardown: 0 subset: 111/120000 suite: rados suite_branch: wip-bharath17-testing-2026-02-12-0618-tentacle suite_path: /home/teuthworker/src/github.com_ceph_ceph-c_3b40640f18f2d34a761ee9b6dba2dfbb2a3118eb/qa suite_relpath: qa suite_repo: https://github.com/ceph/ceph-ci.git suite_sha1: 3b40640f18f2d34a761ee9b6dba2dfbb2a3118eb targets: trial034.front.sepia.ceph.com: ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBA7EpTXm34MRp7Xe3V7BUOr8z+BiNSrIRBRxUPC3DmE2PABmIzkOf5CMQCf3MkPGC0s26X5GU8BwsNRVZjpHwZk= trial039.front.sepia.ceph.com: ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBKSdTtqO599YwK+Ql+6k2uw6enAerD4OpEDaz9dWklEQIJPMDGOiK06fzm7v/NqHOpFwq3MTIJ8S7AQDSRc4YtY= trial043.front.sepia.ceph.com: ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBGkG9o+utK7RzFs4TRAnXM1gTCscA70e4wuVeFiL/oQrDBCpP6uMwjN4kFFFaswuABcxabHGsonhWoXV+a1Rm5M= tasks: - internal.check_packages: null - internal.buildpackages_prep: null - internal.save_config: null - internal.check_lock: null - internal.add_remotes: null - console_log: null - internal.connect: null - internal.push_inventory: null - internal.serialize_remote_roles: null - internal.check_conflict: null - internal.check_ceph_data: null - internal.vm_setup: null - kernel: *id001 - internal.base: null - internal.archive_upload: null - internal.archive: null - internal.coredump: null - internal.sudo: null - internal.syslog: null - internal.timer: null - pcp: null - selinux: null - ansible.cephlab: null - clock: null - install: null - cephadm: null - cephadm.shell: host.a: - "set -e\nset -x\nceph orch apply node-exporter\nceph orch apply grafana\nceph\ \ orch apply alertmanager\nceph orch apply prometheus\nsleep 240\nceph orch\ \ ls\nceph orch ps\nceph orch host ls\nMON_DAEMON=$(ceph orch ps --daemon-type\ \ mon -f json | jq -r 'last | .daemon_name')\nGRAFANA_HOST=$(ceph orch ps --daemon-type\ \ grafana -f json | jq -e '.[]' | jq -r '.hostname')\nPROM_HOST=$(ceph orch\ \ ps --daemon-type prometheus -f json | jq -e '.[]' | jq -r '.hostname')\nALERTM_HOST=$(ceph\ \ orch ps --daemon-type alertmanager -f json | jq -e '.[]' | jq -r '.hostname')\n\ GRAFANA_IP=$(ceph orch host ls -f json | jq -r --arg GRAFANA_HOST \"$GRAFANA_HOST\"\ \ '.[] | select(.hostname==$GRAFANA_HOST) | .addr')\nPROM_IP=$(ceph orch host\ \ ls -f json | jq -r --arg PROM_HOST \"$PROM_HOST\" '.[] | select(.hostname==$PROM_HOST)\ \ | .addr')\nALERTM_IP=$(ceph orch host ls -f json | jq -r --arg ALERTM_HOST\ \ \"$ALERTM_HOST\" '.[] | select(.hostname==$ALERTM_HOST) | .addr')\n# check\ \ each host node-exporter metrics endpoint is responsive\nALL_HOST_IPS=$(ceph\ \ orch host ls -f json | jq -r '.[] | .addr')\nfor ip in $ALL_HOST_IPS; do\n\ \ curl -s http://${ip}:9100/metric\ndone\n# check grafana endpoints are responsive\ \ and database health is okay\ncurl -k -s https://${GRAFANA_IP}:3000/api/health\n\ curl -k -s https://${GRAFANA_IP}:3000/api/health | jq -e '.database == \"ok\"\ '\n# stop mon daemon in order to trigger an alert\nceph orch daemon stop $MON_DAEMON\n\ sleep 120\n# check prometheus endpoints are responsive and mon down alert is\ \ firing\ncurl -s http://${PROM_IP}:9095/api/v1/status/config\ncurl -s http://${PROM_IP}:9095/api/v1/status/config\ \ | jq -e '.status == \"success\"'\ncurl -s http://${PROM_IP}:9095/api/v1/alerts\n\ curl -s http://${PROM_IP}:9095/api/v1/alerts | jq -e '.data | .alerts | .[]\ \ | select(.labels | .alertname == \"CephMonDown\") | .state == \"firing\"'\n\ # check alertmanager endpoints are responsive and mon down alert is active\n\ curl -s http://${ALERTM_IP}:9093/api/v2/status\ncurl -s http://${ALERTM_IP}:9093/api/v2/alerts\n\ curl -s http://${ALERTM_IP}:9093/api/v2/alerts | jq -e '.[] | select(.labels\ \ | .alertname == \"CephMonDown\") | .status | .state == \"active\"'\n# check\ \ prometheus metrics endpoint is not empty and make sure we can get metrics\n\ METRICS_URL=$(ceph mgr services | jq -r .prometheus)\n[ -n \"$METRICS_URL\"\ \ ] || exit 1\ncurl -s \"${METRICS_URL}metrics\" | grep -q '^ceph_health_status'\n" teuthology: fragments_dropped: [] meta: {} postmerge: [] teuthology_branch: main teuthology_sha1: 8bec0da71becad44414c54979f64c9ef0e7099c6 timestamp: 2026-02-13_01:43:11 tube: trial user: skanta verbose: true worker_log: /home/teuthworker/mnt/teuthology/worker_logs/dispatcher.trial.3932482