[ceph-users] scrub errors

Brad Hubbard bhubbard at redhat.com
Tue Mar 26 16:52:53 PDT 2019


http://docs.ceph.com/docs/hammer/rados/troubleshooting/troubleshooting-pg/

Did you try repairing the pg?


On Tue, Mar 26, 2019 at 9:08 AM solarflow99 <solarflow99 at gmail.com> wrote:
>
> yes, I know its old.  I intend to have it replaced but thats a few months away and was hoping to get past this.  the other OSDs appear to be ok, I see them up and in, why do you see something wrong?
>
> On Mon, Mar 25, 2019 at 4:00 PM Brad Hubbard <bhubbard at redhat.com> wrote:
>>
>> Hammer is no longer supported.
>>
>> What's the status of osds 7 and 17?
>>
>> On Tue, Mar 26, 2019 at 8:56 AM solarflow99 <solarflow99 at gmail.com> wrote:
>> >
>> > hi, thanks.  Its still using Hammer.  Here's the output from the pg query, the last command you gave doesn't work at all but be too old.
>> >
>> >
>> > # ceph pg 10.2a query
>> > {
>> >     "state": "active+clean+inconsistent",
>> >     "snap_trimq": "[]",
>> >     "epoch": 23265,
>> >     "up": [
>> >         41,
>> >         38,
>> >         8
>> >     ],
>> >     "acting": [
>> >         41,
>> >         38,
>> >         8
>> >     ],
>> >     "actingbackfill": [
>> >         "8",
>> >         "38",
>> >         "41"
>> >     ],
>> >     "info": {
>> >         "pgid": "10.2a",
>> >         "last_update": "23265'20886859",
>> >         "last_complete": "23265'20886859",
>> >         "log_tail": "23265'20883809",
>> >         "last_user_version": 20886859,
>> >         "last_backfill": "MAX",
>> >         "purged_snaps": "[]",
>> >         "history": {
>> >             "epoch_created": 8200,
>> >             "last_epoch_started": 21481,
>> >             "last_epoch_clean": 21487,
>> >             "last_epoch_split": 0,
>> >             "same_up_since": 21472,
>> >             "same_interval_since": 21474,
>> >             "same_primary_since": 8244,
>> >             "last_scrub": "23265'20864209",
>> >             "last_scrub_stamp": "2019-03-22 22:39:13.930673",
>> >             "last_deep_scrub": "23265'20864209",
>> >             "last_deep_scrub_stamp": "2019-03-22 22:39:13.930673",
>> >             "last_clean_scrub_stamp": "2019-03-15 01:33:21.447438"
>> >         },
>> >         "stats": {
>> >             "version": "23265'20886859",
>> >             "reported_seq": "10109937",
>> >             "reported_epoch": "23265",
>> >             "state": "active+clean+inconsistent",
>> >             "last_fresh": "2019-03-25 15:52:53.720768",
>> >             "last_change": "2019-03-22 22:39:13.931038",
>> >             "last_active": "2019-03-25 15:52:53.720768",
>> >             "last_peered": "2019-03-25 15:52:53.720768",
>> >             "last_clean": "2019-03-25 15:52:53.720768",
>> >             "last_became_active": "0.000000",
>> >             "last_became_peered": "0.000000",
>> >             "last_unstale": "2019-03-25 15:52:53.720768",
>> >             "last_undegraded": "2019-03-25 15:52:53.720768",
>> >             "last_fullsized": "2019-03-25 15:52:53.720768",
>> >             "mapping_epoch": 21472,
>> >             "log_start": "23265'20883809",
>> >             "ondisk_log_start": "23265'20883809",
>> >             "created": 8200,
>> >             "last_epoch_clean": 21487,
>> >             "parent": "0.0",
>> >             "parent_split_bits": 0,
>> >             "last_scrub": "23265'20864209",
>> >             "last_scrub_stamp": "2019-03-22 22:39:13.930673",
>> >             "last_deep_scrub": "23265'20864209",
>> >             "last_deep_scrub_stamp": "2019-03-22 22:39:13.930673",
>> >             "last_clean_scrub_stamp": "2019-03-15 01:33:21.447438",
>> >             "log_size": 3050,
>> >             "ondisk_log_size": 3050,
>> >             "stats_invalid": "0",
>> >             "stat_sum": {
>> >                 "num_bytes": 8220278746,
>> >                 "num_objects": 345034,
>> >                 "num_object_clones": 0,
>> >                 "num_object_copies": 1035102,
>> >                 "num_objects_missing_on_primary": 0,
>> >                 "num_objects_degraded": 0,
>> >                 "num_objects_misplaced": 0,
>> >                 "num_objects_unfound": 0,
>> >                 "num_objects_dirty": 345034,
>> >                 "num_whiteouts": 0,
>> >                 "num_read": 7904350,
>> >                 "num_read_kb": 58116568,
>> >                 "num_write": 8753504,
>> >                 "num_write_kb": 85104263,
>> >                 "num_scrub_errors": 47,
>> >                 "num_shallow_scrub_errors": 47,
>> >                 "num_deep_scrub_errors": 0,
>> >                 "num_objects_recovered": 167138,
>> >                 "num_bytes_recovered": 5193543924,
>> >                 "num_keys_recovered": 0,
>> >                 "num_objects_omap": 0,
>> >                 "num_objects_hit_set_archive": 0,
>> >                 "num_bytes_hit_set_archive": 0
>> >             },
>> >             "up": [
>> >                 41,
>> >                 38,
>> >                 8
>> >             ],
>> >             "acting": [
>> >                 41,
>> >                 38,
>> >                 8
>> >             ],
>> >             "blocked_by": [],
>> >             "up_primary": 41,
>> >             "acting_primary": 41
>> >         },
>> >         "empty": 0,
>> >         "dne": 0,
>> >         "incomplete": 0,
>> >         "last_epoch_started": 21481,
>> >         "hit_set_history": {
>> >             "current_last_update": "0'0",
>> >             "current_last_stamp": "0.000000",
>> >             "current_info": {
>> >                 "begin": "0.000000",
>> >                 "end": "0.000000",
>> >                 "version": "0'0",
>> >                 "using_gmt": "0"
>> >             },
>> >             "history": []
>> >         }
>> >     },
>> >     "peer_info": [
>> >         {
>> >             "peer": "8",
>> >             "pgid": "10.2a",
>> >             "last_update": "23265'20886859",
>> >             "last_complete": "23265'20886859",
>> >             "log_tail": "21395'11840466",
>> >             "last_user_version": 11843648,
>> >             "last_backfill": "MAX",
>> >             "purged_snaps": "[]",
>> >             "history": {
>> >                 "epoch_created": 8200,
>> >                 "last_epoch_started": 21481,
>> >                 "last_epoch_clean": 21487,
>> >                 "last_epoch_split": 0,
>> >                 "same_up_since": 21472,
>> >                 "same_interval_since": 21474,
>> >                 "same_primary_since": 8244,
>> >                 "last_scrub": "23265'20864209",
>> >                 "last_scrub_stamp": "2019-03-22 22:39:13.930673",
>> >                 "last_deep_scrub": "23265'20864209",
>> >                 "last_deep_scrub_stamp": "2019-03-22 22:39:13.930673",
>> >                 "last_clean_scrub_stamp": "2019-03-15 01:33:21.447438"
>> >             },
>> >             "stats": {
>> >                 "version": "21471'11843647",
>> >                 "reported_seq": "7670875",
>> >                 "reported_epoch": "21471",
>> >                 "state": "active+undersized+degraded+remapped+wait_backfill",
>> >                 "last_fresh": "2018-09-22 07:07:23.061013",
>> >                 "last_change": "2018-09-22 06:39:32.487204",
>> >                 "last_active": "2018-09-22 07:07:23.061013",
>> >                 "last_peered": "2018-09-22 07:07:23.061013",
>> >                 "last_clean": "2018-09-22 06:33:47.246063",
>> >                 "last_became_active": "0.000000",
>> >                 "last_became_peered": "0.000000",
>> >                 "last_unstale": "2018-09-22 07:07:23.061013",
>> >                 "last_undegraded": "2018-09-22 06:39:13.626445",
>> >                 "last_fullsized": "2018-09-22 06:39:13.626445",
>> >                 "mapping_epoch": 21472,
>> >                 "log_start": "21395'11840466",
>> >                 "ondisk_log_start": "21395'11840466",
>> >                 "created": 8200,
>> >                 "last_epoch_clean": 21397,
>> >                 "parent": "0.0",
>> >                 "parent_split_bits": 0,
>> >                 "last_scrub": "21395'11835365",
>> >                 "last_scrub_stamp": "2018-09-21 12:11:47.230141",
>> >                 "last_deep_scrub": "21395'11835365",
>> >                 "last_deep_scrub_stamp": "2018-09-21 12:11:47.230141",
>> >                 "last_clean_scrub_stamp": "2018-09-21 12:11:47.230141",
>> >                 "log_size": 3181,
>> >                 "ondisk_log_size": 3181,
>> >                 "stats_invalid": "0",
>> >                 "stat_sum": {
>> >                     "num_bytes": 6406027390,
>> >                     "num_objects": 241710,
>> >                     "num_object_clones": 0,
>> >                     "num_object_copies": 966844,
>> >                     "num_objects_missing_on_primary": 0,
>> >                     "num_objects_degraded": 241715,
>> >                     "num_objects_misplaced": 725133,
>> >                     "num_objects_unfound": 0,
>> >                     "num_objects_dirty": 241710,
>> >                     "num_whiteouts": 0,
>> >                     "num_read": 5638025,
>> >                     "num_read_kb": 48736266,
>> >                     "num_write": 6789818,
>> >                     "num_write_kb": 67680335,
>> >                     "num_scrub_errors": 0,
>> >                     "num_shallow_scrub_errors": 0,
>> >                     "num_deep_scrub_errors": 0,
>> >                     "num_objects_recovered": 167079,
>> >                     "num_bytes_recovered": 5191625476,
>> >                     "num_keys_recovered": 0,
>> >                     "num_objects_omap": 0,
>> >                     "num_objects_hit_set_archive": 0,
>> >                     "num_bytes_hit_set_archive": 0
>> >                 },
>> >                 "up": [
>> >                     41,
>> >                     38,
>> >                     8
>> >                 ],
>> >                 "acting": [
>> >                     41,
>> >                     38,
>> >                     8
>> >                 ],
>> >                 "blocked_by": [],
>> >                 "up_primary": 41,
>> >                 "acting_primary": 41
>> >             },
>> >             "empty": 0,
>> >             "dne": 0,
>> >             "incomplete": 0,
>> >             "last_epoch_started": 21481,
>> >             "hit_set_history": {
>> >                 "current_last_update": "0'0",
>> >                 "current_last_stamp": "0.000000",
>> >                 "current_info": {
>> >                     "begin": "0.000000",
>> >                     "end": "0.000000",
>> >                     "version": "0'0",
>> >                     "using_gmt": "0"
>> >                 },
>> >                 "history": []
>> >             }
>> >         },
>> >         {
>> >             "peer": "38",
>> >             "pgid": "10.2a",
>> >             "last_update": "23265'20886859",
>> >             "last_complete": "21395'11843517",
>> >             "log_tail": "21395'11840466",
>> >             "last_user_version": 11843517,
>> >             "last_backfill": "MAX",
>> >             "purged_snaps": "[]",
>> >             "history": {
>> >                 "epoch_created": 8200,
>> >                 "last_epoch_started": 21481,
>> >                 "last_epoch_clean": 21487,
>> >                 "last_epoch_split": 0,
>> >                 "same_up_since": 21472,
>> >                 "same_interval_since": 21474,
>> >                 "same_primary_since": 8244,
>> >                 "last_scrub": "23265'20864209",
>> >                 "last_scrub_stamp": "2019-03-22 22:39:13.930673",
>> >                 "last_deep_scrub": "23265'20864209",
>> >                 "last_deep_scrub_stamp": "2019-03-22 22:39:13.930673",
>> >                 "last_clean_scrub_stamp": "2019-03-15 01:33:21.447438"
>> >             },
>> >             "stats": {
>> >                 "version": "21395'11843516",
>> >                 "reported_seq": "7670719",
>> >                 "reported_epoch": "21395",
>> >                 "state": "active+clean",
>> >                 "last_fresh": "2018-09-22 06:33:14.791334",
>> >                 "last_change": "2018-09-21 12:11:47.230557",
>> >                 "last_active": "2018-09-22 06:33:14.791334",
>> >                 "last_peered": "2018-09-22 06:33:14.791334",
>> >                 "last_clean": "2018-09-22 06:33:14.791334",
>> >                 "last_became_active": "0.000000",
>> >                 "last_became_peered": "0.000000",
>> >                 "last_unstale": "2018-09-22 06:33:14.791334",
>> >                 "last_undegraded": "2018-09-22 06:33:14.791334",
>> >                 "last_fullsized": "2018-09-22 06:33:14.791334",
>> >                 "mapping_epoch": 21472,
>> >                 "log_start": "21395'11840466",
>> >                 "ondisk_log_start": "21395'11840466",
>> >                 "created": 8200,
>> >                 "last_epoch_clean": 20840,
>> >                 "parent": "0.0",
>> >                 "parent_split_bits": 0,
>> >                 "last_scrub": "21395'11835365",
>> >                 "last_scrub_stamp": "2018-09-21 12:11:47.230141",
>> >                 "last_deep_scrub": "21395'11835365",
>> >                 "last_deep_scrub_stamp": "2018-09-21 12:11:47.230141",
>> >                 "last_clean_scrub_stamp": "2018-09-21 12:11:47.230141",
>> >                 "log_size": 3050,
>> >                 "ondisk_log_size": 3050,
>> >                 "stats_invalid": "0",
>> >                 "stat_sum": {
>> >                     "num_bytes": 6405126628,
>> >                     "num_objects": 241711,
>> >                     "num_object_clones": 0,
>> >                     "num_object_copies": 725130,
>> >                     "num_objects_missing_on_primary": 0,
>> >                     "num_objects_degraded": 0,
>> >                     "num_objects_misplaced": 0,
>> >                     "num_objects_unfound": 0,
>> >                     "num_objects_dirty": 241711,
>> >                     "num_whiteouts": 0,
>> >                     "num_read": 5637862,
>> >                     "num_read_kb": 48735376,
>> >                     "num_write": 6789687,
>> >                     "num_write_kb": 67678402,
>> >                     "num_scrub_errors": 0,
>> >                     "num_shallow_scrub_errors": 0,
>> >                     "num_deep_scrub_errors": 0,
>> >                     "num_objects_recovered": 167079,
>> >                     "num_bytes_recovered": 5191625476,
>> >                     "num_keys_recovered": 0,
>> >                     "num_objects_omap": 0,
>> >                     "num_objects_hit_set_archive": 0,
>> >                     "num_bytes_hit_set_archive": 0
>> >                 },
>> >                 "up": [
>> >                     41,
>> >                     38,
>> >                     8
>> >                 ],
>> >                 "acting": [
>> >                     41,
>> >                     38,
>> >                     8
>> >                 ],
>> >                 "blocked_by": [],
>> >                 "up_primary": 41,
>> >                 "acting_primary": 41
>> >             },
>> >             "empty": 0,
>> >             "dne": 0,
>> >             "incomplete": 0,
>> >             "last_epoch_started": 21481,
>> >             "hit_set_history": {
>> >                 "current_last_update": "0'0",
>> >                 "current_last_stamp": "0.000000",
>> >                 "current_info": {
>> >                     "begin": "0.000000",
>> >                     "end": "0.000000",
>> >                     "version": "0'0",
>> >                     "using_gmt": "0"
>> >                 },
>> >                 "history": []
>> >             }
>> >         }
>> >     ],
>> >     "recovery_state": [
>> >         {
>> >             "name": "Started\/Primary\/Active",
>> >             "enter_time": "2018-09-22 07:07:48.637248",
>> >             "might_have_unfound": [
>> >                 {
>> >                     "osd": "7",
>> >                     "status": "not queried"
>> >                 },
>> >                 {
>> >                     "osd": "8",
>> >                     "status": "already probed"
>> >                 },
>> >                 {
>> >                     "osd": "17",
>> >                     "status": "not queried"
>> >                 },
>> >                 {
>> >                     "osd": "38",
>> >                     "status": "already probed"
>> >                 }
>> >             ],
>> >             "recovery_progress": {
>> >                 "backfill_targets": [],
>> >                 "waiting_on_backfill": [],
>> >                 "last_backfill_started": "-1\/0\/\/0",
>> >                 "backfill_info": {
>> >                     "begin": "-1\/0\/\/0",
>> >                     "end": "-1\/0\/\/0",
>> >                     "objects": []
>> >                 },
>> >                 "peer_backfill_info": [],
>> >                 "backfills_in_flight": [],
>> >                 "recovering": [],
>> >                 "pg_backend": {
>> >                     "pull_from_peer": [],
>> >                     "pushing": []
>> >                 }
>> >             },
>> >             "scrub": {
>> >                 "scrubber.epoch_start": "21474",
>> >                 "scrubber.active": 0,
>> >                 "scrubber.waiting_on": 0,
>> >                 "scrubber.waiting_on_whom": []
>> >             }
>> >         },
>> >         {
>> >             "name": "Started",
>> >             "enter_time": "2018-09-22 07:07:42.138358"
>> >         }
>> >     ],
>> >     "agent_state": {}
>> > }
>> >
>> >
>> > On Mon, Mar 25, 2019 at 3:46 PM Brad Hubbard <bhubbard at redhat.com> wrote:
>> >>
>> >> It would help to know what version you are running but, to begin with,
>> >> could you post the output of the following?
>> >>
>> >> $ sudo ceph pg 10.2a query
>> >> $ sudo rados list-inconsistent-obj 10.2a --format=json-pretty
>> >>
>> >> Also, have a read of
>> >> http://docs.ceph.com/docs/mimic/rados/troubleshooting/troubleshooting-pg/
>> >> (adjust the URl for your release).
>> >>
>> >> On Tue, Mar 26, 2019 at 8:19 AM solarflow99 <solarflow99 at gmail.com> wrote:
>> >> >
>> >> > I noticed my cluster has scrub errors but the deep-scrub command doesn't show any errors.  Is there any way to know what it takes to fix it?
>> >> >
>> >> >
>> >> >
>> >> > # ceph health detail
>> >> > HEALTH_ERR 1 pgs inconsistent; 47 scrub errors
>> >> > pg 10.2a is active+clean+inconsistent, acting [41,38,8]
>> >> > 47 scrub errors
>> >> >
>> >> > # zgrep 10.2a /var/log/ceph/ceph.log*
>> >> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 16:20:18.148299 osd.41 192.168.4.19:6809/30077 54885 : cluster [INF] 10.2a deep-scrub starts
>> >> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024040 osd.41 192.168.4.19:6809/30077 54886 : cluster [ERR] 10.2a shard 38 missing 10/24083d2a/ec50777d-cc99-46a8-8610-4492213f412f/head
>> >> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024049 osd.41 192.168.4.19:6809/30077 54887 : cluster [ERR] 10.2a shard 38 missing 10/ff183d2a/fce859b9-61a9-46cb-82f1-4b4af31c10db/head
>> >> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024074 osd.41 192.168.4.19:6809/30077 54888 : cluster [ERR] 10.2a shard 38 missing 10/34283d2a/4b7c96cb-c494-4637-8669-e42049bd0e1c/head
>> >> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024076 osd.41 192.168.4.19:6809/30077 54889 : cluster [ERR] 10.2a shard 38 missing 10/df283d2a/bbe61149-99f8-4b83-a42b-b208d18094a8/head
>> >> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024077 osd.41 192.168.4.19:6809/30077 54890 : cluster [ERR] 10.2a shard 38 missing 10/35383d2a/60e8ed9b-bd04-5a43-8917-6f29eba28a66:0014/head
>> >> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024078 osd.41 192.168.4.19:6809/30077 54891 : cluster [ERR] 10.2a shard 38 missing 10/d5383d2a/2bdeb186-561b-4151-b87e-fe7c2e217d41/head
>> >> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024080 osd.41 192.168.4.19:6809/30077 54892 : cluster [ERR] 10.2a shard 38 missing 10/a7383d2a/b6b9d21d-2f4f-4550-8928-52552349db7d/head
>> >> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024081 osd.41 192.168.4.19:6809/30077 54893 : cluster [ERR] 10.2a shard 38 missing 10/9c383d2a/5b552687-c709-4e87-b773-1cce5b262754/head
>> >> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024082 osd.41 192.168.4.19:6809/30077 54894 : cluster [ERR] 10.2a shard 38 missing 10/5d383d2a/cb1a2ea8-0872-4de9-8b93-5ea8d9d8e613/head
>> >> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024083 osd.41 192.168.4.19:6809/30077 54895 : cluster [ERR] 10.2a shard 38 missing 10/8f483d2a/74c7a2b9-f00a-4c89-afbd-c1b8439234ac/head
>> >> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024085 osd.41 192.168.4.19:6809/30077 54896 : cluster [ERR] 10.2a shard 38 missing 10/b1583d2a/b3f00768-82a2-4637-91d1-164f3a51312a/head
>> >> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024086 osd.41 192.168.4.19:6809/30077 54897 : cluster [ERR] 10.2a shard 38 missing 10/35583d2a/e347aff4-7b71-476e-863a-310e767e4160/head
>> >> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024088 osd.41 192.168.4.19:6809/30077 54898 : cluster [ERR] 10.2a shard 38 missing 10/69583d2a/0805d07a-49d1-44cb-87c7-3bd73a0ce692/head
>> >> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024122 osd.41 192.168.4.19:6809/30077 54899 : cluster [ERR] 10.2a shard 38 missing 10/1a583d2a/d65bcf6a-9457-46c3-8fbc-432ebbaad89a/head
>> >> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024123 osd.41 192.168.4.19:6809/30077 54900 : cluster [ERR] 10.2a shard 38 missing 10/6d583d2a/5592f7d6-a131-4eb2-a3dd-b2d96691dd7e/head
>> >> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024124 osd.41 192.168.4.19:6809/30077 54901 : cluster [ERR] 10.2a shard 38 missing 10/f0683d2a/81897399-4cb0-59b3-b9ae-bf043a272137:0003/head
>> >> >
>> >> >
>> >> >
>> >> > # ceph pg deep-scrub 10.2a
>> >> > instructing pg 10.2a on osd.41 to deep-scrub
>> >> >
>> >> >
>> >> > # ceph -w | grep 10.2a
>> >> >
>> >> >
>> >> > _______________________________________________
>> >> > ceph-users mailing list
>> >> > ceph-users at lists.ceph.com
>> >> > http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com
>> >>
>> >>
>> >>
>> >> --
>> >> Cheers,
>> >> Brad
>>
>>
>>
>> --
>> Cheers,
>> Brad



--
Cheers,
Brad


More information about the ceph-users mailing list