[ceph-users] scrub errors

solarflow99 solarflow99 at gmail.com
Mon Mar 25 16:07:44 PDT 2019


yes, I know its old.  I intend to have it replaced but thats a few months
away and was hoping to get past this.  the other OSDs appear to be ok, I
see them up and in, why do you see something wrong?

On Mon, Mar 25, 2019 at 4:00 PM Brad Hubbard <bhubbard at redhat.com> wrote:

> Hammer is no longer supported.
>
> What's the status of osds 7 and 17?
>
> On Tue, Mar 26, 2019 at 8:56 AM solarflow99 <solarflow99 at gmail.com> wrote:
> >
> > hi, thanks.  Its still using Hammer.  Here's the output from the pg
> query, the last command you gave doesn't work at all but be too old.
> >
> >
> > # ceph pg 10.2a query
> > {
> >     "state": "active+clean+inconsistent",
> >     "snap_trimq": "[]",
> >     "epoch": 23265,
> >     "up": [
> >         41,
> >         38,
> >         8
> >     ],
> >     "acting": [
> >         41,
> >         38,
> >         8
> >     ],
> >     "actingbackfill": [
> >         "8",
> >         "38",
> >         "41"
> >     ],
> >     "info": {
> >         "pgid": "10.2a",
> >         "last_update": "23265'20886859",
> >         "last_complete": "23265'20886859",
> >         "log_tail": "23265'20883809",
> >         "last_user_version": 20886859,
> >         "last_backfill": "MAX",
> >         "purged_snaps": "[]",
> >         "history": {
> >             "epoch_created": 8200,
> >             "last_epoch_started": 21481,
> >             "last_epoch_clean": 21487,
> >             "last_epoch_split": 0,
> >             "same_up_since": 21472,
> >             "same_interval_since": 21474,
> >             "same_primary_since": 8244,
> >             "last_scrub": "23265'20864209",
> >             "last_scrub_stamp": "2019-03-22 22:39:13.930673",
> >             "last_deep_scrub": "23265'20864209",
> >             "last_deep_scrub_stamp": "2019-03-22 22:39:13.930673",
> >             "last_clean_scrub_stamp": "2019-03-15 01:33:21.447438"
> >         },
> >         "stats": {
> >             "version": "23265'20886859",
> >             "reported_seq": "10109937",
> >             "reported_epoch": "23265",
> >             "state": "active+clean+inconsistent",
> >             "last_fresh": "2019-03-25 15:52:53.720768",
> >             "last_change": "2019-03-22 22:39:13.931038",
> >             "last_active": "2019-03-25 15:52:53.720768",
> >             "last_peered": "2019-03-25 15:52:53.720768",
> >             "last_clean": "2019-03-25 15:52:53.720768",
> >             "last_became_active": "0.000000",
> >             "last_became_peered": "0.000000",
> >             "last_unstale": "2019-03-25 15:52:53.720768",
> >             "last_undegraded": "2019-03-25 15:52:53.720768",
> >             "last_fullsized": "2019-03-25 15:52:53.720768",
> >             "mapping_epoch": 21472,
> >             "log_start": "23265'20883809",
> >             "ondisk_log_start": "23265'20883809",
> >             "created": 8200,
> >             "last_epoch_clean": 21487,
> >             "parent": "0.0",
> >             "parent_split_bits": 0,
> >             "last_scrub": "23265'20864209",
> >             "last_scrub_stamp": "2019-03-22 22:39:13.930673",
> >             "last_deep_scrub": "23265'20864209",
> >             "last_deep_scrub_stamp": "2019-03-22 22:39:13.930673",
> >             "last_clean_scrub_stamp": "2019-03-15 01:33:21.447438",
> >             "log_size": 3050,
> >             "ondisk_log_size": 3050,
> >             "stats_invalid": "0",
> >             "stat_sum": {
> >                 "num_bytes": 8220278746,
> >                 "num_objects": 345034,
> >                 "num_object_clones": 0,
> >                 "num_object_copies": 1035102,
> >                 "num_objects_missing_on_primary": 0,
> >                 "num_objects_degraded": 0,
> >                 "num_objects_misplaced": 0,
> >                 "num_objects_unfound": 0,
> >                 "num_objects_dirty": 345034,
> >                 "num_whiteouts": 0,
> >                 "num_read": 7904350,
> >                 "num_read_kb": 58116568,
> >                 "num_write": 8753504,
> >                 "num_write_kb": 85104263,
> >                 "num_scrub_errors": 47,
> >                 "num_shallow_scrub_errors": 47,
> >                 "num_deep_scrub_errors": 0,
> >                 "num_objects_recovered": 167138,
> >                 "num_bytes_recovered": 5193543924,
> >                 "num_keys_recovered": 0,
> >                 "num_objects_omap": 0,
> >                 "num_objects_hit_set_archive": 0,
> >                 "num_bytes_hit_set_archive": 0
> >             },
> >             "up": [
> >                 41,
> >                 38,
> >                 8
> >             ],
> >             "acting": [
> >                 41,
> >                 38,
> >                 8
> >             ],
> >             "blocked_by": [],
> >             "up_primary": 41,
> >             "acting_primary": 41
> >         },
> >         "empty": 0,
> >         "dne": 0,
> >         "incomplete": 0,
> >         "last_epoch_started": 21481,
> >         "hit_set_history": {
> >             "current_last_update": "0'0",
> >             "current_last_stamp": "0.000000",
> >             "current_info": {
> >                 "begin": "0.000000",
> >                 "end": "0.000000",
> >                 "version": "0'0",
> >                 "using_gmt": "0"
> >             },
> >             "history": []
> >         }
> >     },
> >     "peer_info": [
> >         {
> >             "peer": "8",
> >             "pgid": "10.2a",
> >             "last_update": "23265'20886859",
> >             "last_complete": "23265'20886859",
> >             "log_tail": "21395'11840466",
> >             "last_user_version": 11843648,
> >             "last_backfill": "MAX",
> >             "purged_snaps": "[]",
> >             "history": {
> >                 "epoch_created": 8200,
> >                 "last_epoch_started": 21481,
> >                 "last_epoch_clean": 21487,
> >                 "last_epoch_split": 0,
> >                 "same_up_since": 21472,
> >                 "same_interval_since": 21474,
> >                 "same_primary_since": 8244,
> >                 "last_scrub": "23265'20864209",
> >                 "last_scrub_stamp": "2019-03-22 22:39:13.930673",
> >                 "last_deep_scrub": "23265'20864209",
> >                 "last_deep_scrub_stamp": "2019-03-22 22:39:13.930673",
> >                 "last_clean_scrub_stamp": "2019-03-15 01:33:21.447438"
> >             },
> >             "stats": {
> >                 "version": "21471'11843647",
> >                 "reported_seq": "7670875",
> >                 "reported_epoch": "21471",
> >                 "state":
> "active+undersized+degraded+remapped+wait_backfill",
> >                 "last_fresh": "2018-09-22 07:07:23.061013",
> >                 "last_change": "2018-09-22 06:39:32.487204",
> >                 "last_active": "2018-09-22 07:07:23.061013",
> >                 "last_peered": "2018-09-22 07:07:23.061013",
> >                 "last_clean": "2018-09-22 06:33:47.246063",
> >                 "last_became_active": "0.000000",
> >                 "last_became_peered": "0.000000",
> >                 "last_unstale": "2018-09-22 07:07:23.061013",
> >                 "last_undegraded": "2018-09-22 06:39:13.626445",
> >                 "last_fullsized": "2018-09-22 06:39:13.626445",
> >                 "mapping_epoch": 21472,
> >                 "log_start": "21395'11840466",
> >                 "ondisk_log_start": "21395'11840466",
> >                 "created": 8200,
> >                 "last_epoch_clean": 21397,
> >                 "parent": "0.0",
> >                 "parent_split_bits": 0,
> >                 "last_scrub": "21395'11835365",
> >                 "last_scrub_stamp": "2018-09-21 12:11:47.230141",
> >                 "last_deep_scrub": "21395'11835365",
> >                 "last_deep_scrub_stamp": "2018-09-21 12:11:47.230141",
> >                 "last_clean_scrub_stamp": "2018-09-21 12:11:47.230141",
> >                 "log_size": 3181,
> >                 "ondisk_log_size": 3181,
> >                 "stats_invalid": "0",
> >                 "stat_sum": {
> >                     "num_bytes": 6406027390,
> >                     "num_objects": 241710,
> >                     "num_object_clones": 0,
> >                     "num_object_copies": 966844,
> >                     "num_objects_missing_on_primary": 0,
> >                     "num_objects_degraded": 241715,
> >                     "num_objects_misplaced": 725133,
> >                     "num_objects_unfound": 0,
> >                     "num_objects_dirty": 241710,
> >                     "num_whiteouts": 0,
> >                     "num_read": 5638025,
> >                     "num_read_kb": 48736266,
> >                     "num_write": 6789818,
> >                     "num_write_kb": 67680335,
> >                     "num_scrub_errors": 0,
> >                     "num_shallow_scrub_errors": 0,
> >                     "num_deep_scrub_errors": 0,
> >                     "num_objects_recovered": 167079,
> >                     "num_bytes_recovered": 5191625476,
> >                     "num_keys_recovered": 0,
> >                     "num_objects_omap": 0,
> >                     "num_objects_hit_set_archive": 0,
> >                     "num_bytes_hit_set_archive": 0
> >                 },
> >                 "up": [
> >                     41,
> >                     38,
> >                     8
> >                 ],
> >                 "acting": [
> >                     41,
> >                     38,
> >                     8
> >                 ],
> >                 "blocked_by": [],
> >                 "up_primary": 41,
> >                 "acting_primary": 41
> >             },
> >             "empty": 0,
> >             "dne": 0,
> >             "incomplete": 0,
> >             "last_epoch_started": 21481,
> >             "hit_set_history": {
> >                 "current_last_update": "0'0",
> >                 "current_last_stamp": "0.000000",
> >                 "current_info": {
> >                     "begin": "0.000000",
> >                     "end": "0.000000",
> >                     "version": "0'0",
> >                     "using_gmt": "0"
> >                 },
> >                 "history": []
> >             }
> >         },
> >         {
> >             "peer": "38",
> >             "pgid": "10.2a",
> >             "last_update": "23265'20886859",
> >             "last_complete": "21395'11843517",
> >             "log_tail": "21395'11840466",
> >             "last_user_version": 11843517,
> >             "last_backfill": "MAX",
> >             "purged_snaps": "[]",
> >             "history": {
> >                 "epoch_created": 8200,
> >                 "last_epoch_started": 21481,
> >                 "last_epoch_clean": 21487,
> >                 "last_epoch_split": 0,
> >                 "same_up_since": 21472,
> >                 "same_interval_since": 21474,
> >                 "same_primary_since": 8244,
> >                 "last_scrub": "23265'20864209",
> >                 "last_scrub_stamp": "2019-03-22 22:39:13.930673",
> >                 "last_deep_scrub": "23265'20864209",
> >                 "last_deep_scrub_stamp": "2019-03-22 22:39:13.930673",
> >                 "last_clean_scrub_stamp": "2019-03-15 01:33:21.447438"
> >             },
> >             "stats": {
> >                 "version": "21395'11843516",
> >                 "reported_seq": "7670719",
> >                 "reported_epoch": "21395",
> >                 "state": "active+clean",
> >                 "last_fresh": "2018-09-22 06:33:14.791334",
> >                 "last_change": "2018-09-21 12:11:47.230557",
> >                 "last_active": "2018-09-22 06:33:14.791334",
> >                 "last_peered": "2018-09-22 06:33:14.791334",
> >                 "last_clean": "2018-09-22 06:33:14.791334",
> >                 "last_became_active": "0.000000",
> >                 "last_became_peered": "0.000000",
> >                 "last_unstale": "2018-09-22 06:33:14.791334",
> >                 "last_undegraded": "2018-09-22 06:33:14.791334",
> >                 "last_fullsized": "2018-09-22 06:33:14.791334",
> >                 "mapping_epoch": 21472,
> >                 "log_start": "21395'11840466",
> >                 "ondisk_log_start": "21395'11840466",
> >                 "created": 8200,
> >                 "last_epoch_clean": 20840,
> >                 "parent": "0.0",
> >                 "parent_split_bits": 0,
> >                 "last_scrub": "21395'11835365",
> >                 "last_scrub_stamp": "2018-09-21 12:11:47.230141",
> >                 "last_deep_scrub": "21395'11835365",
> >                 "last_deep_scrub_stamp": "2018-09-21 12:11:47.230141",
> >                 "last_clean_scrub_stamp": "2018-09-21 12:11:47.230141",
> >                 "log_size": 3050,
> >                 "ondisk_log_size": 3050,
> >                 "stats_invalid": "0",
> >                 "stat_sum": {
> >                     "num_bytes": 6405126628,
> >                     "num_objects": 241711,
> >                     "num_object_clones": 0,
> >                     "num_object_copies": 725130,
> >                     "num_objects_missing_on_primary": 0,
> >                     "num_objects_degraded": 0,
> >                     "num_objects_misplaced": 0,
> >                     "num_objects_unfound": 0,
> >                     "num_objects_dirty": 241711,
> >                     "num_whiteouts": 0,
> >                     "num_read": 5637862,
> >                     "num_read_kb": 48735376,
> >                     "num_write": 6789687,
> >                     "num_write_kb": 67678402,
> >                     "num_scrub_errors": 0,
> >                     "num_shallow_scrub_errors": 0,
> >                     "num_deep_scrub_errors": 0,
> >                     "num_objects_recovered": 167079,
> >                     "num_bytes_recovered": 5191625476,
> >                     "num_keys_recovered": 0,
> >                     "num_objects_omap": 0,
> >                     "num_objects_hit_set_archive": 0,
> >                     "num_bytes_hit_set_archive": 0
> >                 },
> >                 "up": [
> >                     41,
> >                     38,
> >                     8
> >                 ],
> >                 "acting": [
> >                     41,
> >                     38,
> >                     8
> >                 ],
> >                 "blocked_by": [],
> >                 "up_primary": 41,
> >                 "acting_primary": 41
> >             },
> >             "empty": 0,
> >             "dne": 0,
> >             "incomplete": 0,
> >             "last_epoch_started": 21481,
> >             "hit_set_history": {
> >                 "current_last_update": "0'0",
> >                 "current_last_stamp": "0.000000",
> >                 "current_info": {
> >                     "begin": "0.000000",
> >                     "end": "0.000000",
> >                     "version": "0'0",
> >                     "using_gmt": "0"
> >                 },
> >                 "history": []
> >             }
> >         }
> >     ],
> >     "recovery_state": [
> >         {
> >             "name": "Started\/Primary\/Active",
> >             "enter_time": "2018-09-22 07:07:48.637248",
> >             "might_have_unfound": [
> >                 {
> >                     "osd": "7",
> >                     "status": "not queried"
> >                 },
> >                 {
> >                     "osd": "8",
> >                     "status": "already probed"
> >                 },
> >                 {
> >                     "osd": "17",
> >                     "status": "not queried"
> >                 },
> >                 {
> >                     "osd": "38",
> >                     "status": "already probed"
> >                 }
> >             ],
> >             "recovery_progress": {
> >                 "backfill_targets": [],
> >                 "waiting_on_backfill": [],
> >                 "last_backfill_started": "-1\/0\/\/0",
> >                 "backfill_info": {
> >                     "begin": "-1\/0\/\/0",
> >                     "end": "-1\/0\/\/0",
> >                     "objects": []
> >                 },
> >                 "peer_backfill_info": [],
> >                 "backfills_in_flight": [],
> >                 "recovering": [],
> >                 "pg_backend": {
> >                     "pull_from_peer": [],
> >                     "pushing": []
> >                 }
> >             },
> >             "scrub": {
> >                 "scrubber.epoch_start": "21474",
> >                 "scrubber.active": 0,
> >                 "scrubber.waiting_on": 0,
> >                 "scrubber.waiting_on_whom": []
> >             }
> >         },
> >         {
> >             "name": "Started",
> >             "enter_time": "2018-09-22 07:07:42.138358"
> >         }
> >     ],
> >     "agent_state": {}
> > }
> >
> >
> > On Mon, Mar 25, 2019 at 3:46 PM Brad Hubbard <bhubbard at redhat.com>
> wrote:
> >>
> >> It would help to know what version you are running but, to begin with,
> >> could you post the output of the following?
> >>
> >> $ sudo ceph pg 10.2a query
> >> $ sudo rados list-inconsistent-obj 10.2a --format=json-pretty
> >>
> >> Also, have a read of
> >>
> http://docs.ceph.com/docs/mimic/rados/troubleshooting/troubleshooting-pg/
> >> (adjust the URl for your release).
> >>
> >> On Tue, Mar 26, 2019 at 8:19 AM solarflow99 <solarflow99 at gmail.com>
> wrote:
> >> >
> >> > I noticed my cluster has scrub errors but the deep-scrub command
> doesn't show any errors.  Is there any way to know what it takes to fix it?
> >> >
> >> >
> >> >
> >> > # ceph health detail
> >> > HEALTH_ERR 1 pgs inconsistent; 47 scrub errors
> >> > pg 10.2a is active+clean+inconsistent, acting [41,38,8]
> >> > 47 scrub errors
> >> >
> >> > # zgrep 10.2a /var/log/ceph/ceph.log*
> >> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 16:20:18.148299 osd.41
> 192.168.4.19:6809/30077 54885 : cluster [INF] 10.2a deep-scrub starts
> >> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024040 osd.41
> 192.168.4.19:6809/30077 54886 : cluster [ERR] 10.2a shard 38 missing
> 10/24083d2a/ec50777d-cc99-46a8-8610-4492213f412f/head
> >> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024049 osd.41
> 192.168.4.19:6809/30077 54887 : cluster [ERR] 10.2a shard 38 missing
> 10/ff183d2a/fce859b9-61a9-46cb-82f1-4b4af31c10db/head
> >> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024074 osd.41
> 192.168.4.19:6809/30077 54888 : cluster [ERR] 10.2a shard 38 missing
> 10/34283d2a/4b7c96cb-c494-4637-8669-e42049bd0e1c/head
> >> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024076 osd.41
> 192.168.4.19:6809/30077 54889 : cluster [ERR] 10.2a shard 38 missing
> 10/df283d2a/bbe61149-99f8-4b83-a42b-b208d18094a8/head
> >> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024077 osd.41
> 192.168.4.19:6809/30077 54890 : cluster [ERR] 10.2a shard 38 missing
> 10/35383d2a/60e8ed9b-bd04-5a43-8917-6f29eba28a66:0014/head
> >> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024078 osd.41
> 192.168.4.19:6809/30077 54891 : cluster [ERR] 10.2a shard 38 missing
> 10/d5383d2a/2bdeb186-561b-4151-b87e-fe7c2e217d41/head
> >> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024080 osd.41
> 192.168.4.19:6809/30077 54892 : cluster [ERR] 10.2a shard 38 missing
> 10/a7383d2a/b6b9d21d-2f4f-4550-8928-52552349db7d/head
> >> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024081 osd.41
> 192.168.4.19:6809/30077 54893 : cluster [ERR] 10.2a shard 38 missing
> 10/9c383d2a/5b552687-c709-4e87-b773-1cce5b262754/head
> >> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024082 osd.41
> 192.168.4.19:6809/30077 54894 : cluster [ERR] 10.2a shard 38 missing
> 10/5d383d2a/cb1a2ea8-0872-4de9-8b93-5ea8d9d8e613/head
> >> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024083 osd.41
> 192.168.4.19:6809/30077 54895 : cluster [ERR] 10.2a shard 38 missing
> 10/8f483d2a/74c7a2b9-f00a-4c89-afbd-c1b8439234ac/head
> >> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024085 osd.41
> 192.168.4.19:6809/30077 54896 : cluster [ERR] 10.2a shard 38 missing
> 10/b1583d2a/b3f00768-82a2-4637-91d1-164f3a51312a/head
> >> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024086 osd.41
> 192.168.4.19:6809/30077 54897 : cluster [ERR] 10.2a shard 38 missing
> 10/35583d2a/e347aff4-7b71-476e-863a-310e767e4160/head
> >> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024088 osd.41
> 192.168.4.19:6809/30077 54898 : cluster [ERR] 10.2a shard 38 missing
> 10/69583d2a/0805d07a-49d1-44cb-87c7-3bd73a0ce692/head
> >> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024122 osd.41
> 192.168.4.19:6809/30077 54899 : cluster [ERR] 10.2a shard 38 missing
> 10/1a583d2a/d65bcf6a-9457-46c3-8fbc-432ebbaad89a/head
> >> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024123 osd.41
> 192.168.4.19:6809/30077 54900 : cluster [ERR] 10.2a shard 38 missing
> 10/6d583d2a/5592f7d6-a131-4eb2-a3dd-b2d96691dd7e/head
> >> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024124 osd.41
> 192.168.4.19:6809/30077 54901 : cluster [ERR] 10.2a shard 38 missing
> 10/f0683d2a/81897399-4cb0-59b3-b9ae-bf043a272137:0003/head
> >> >
> >> >
> >> >
> >> > # ceph pg deep-scrub 10.2a
> >> > instructing pg 10.2a on osd.41 to deep-scrub
> >> >
> >> >
> >> > # ceph -w | grep 10.2a
> >> >
> >> >
> >> > _______________________________________________
> >> > ceph-users mailing list
> >> > ceph-users at lists.ceph.com
> >> > http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com
> >>
> >>
> >>
> >> --
> >> Cheers,
> >> Brad
>
>
>
> --
> Cheers,
> Brad
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.ceph.com/pipermail/ceph-users-ceph.com/attachments/20190325/dc8b1f89/attachment.html>


More information about the ceph-users mailing list