ReactOS  0.4.14-dev-614-gbfd8a84
scrub.c
Go to the documentation of this file.
1 /* Copyright (c) Mark Harmstone 2017
2  *
3  * This file is part of WinBtrfs.
4  *
5  * WinBtrfs is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public Licence as published by
7  * the Free Software Foundation, either version 3 of the Licence, or
8  * (at your option) any later version.
9  *
10  * WinBtrfs is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser General Public Licence for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public Licence
16  * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
17 
18 #include "btrfs_drv.h"
19 
20 #define SCRUB_UNIT 0x100000 // 1 MB
21 
22 struct _scrub_context;
23 
24 typedef struct {
31  bool csum_error;
34 
35 typedef struct _scrub_context {
40 
41 typedef struct {
45 } path_part;
46 
48  LIST_ENTRY *le, parts;
49  root* r = NULL;
50  KEY searchkey;
52  uint64_t dir;
53  bool orig_subvol = true, not_in_tree = false;
57  ULONG utf16len;
58 
59  le = Vcb->roots.Flink;
60  while (le != &Vcb->roots) {
62 
63  if (r2->id == subvol) {
64  r = r2;
65  break;
66  }
67 
68  le = le->Flink;
69  }
70 
71  if (!r) {
72  ERR("could not find subvol %I64x\n", subvol);
73  return;
74  }
75 
77 
78  dir = inode;
79 
80  while (true) {
81  if (dir == r->root_item.objid) {
82  if (r == Vcb->root_fileref->fcb->subvol)
83  break;
84 
85  searchkey.obj_id = r->id;
86  searchkey.obj_type = TYPE_ROOT_BACKREF;
87  searchkey.offset = 0xffffffffffffffff;
88 
89  Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, NULL);
90  if (!NT_SUCCESS(Status)) {
91  ERR("find_item returned %08x\n", Status);
92  goto end;
93  }
94 
95  if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
96  ROOT_REF* rr = (ROOT_REF*)tp.item->data;
97  path_part* pp;
98 
99  if (tp.item->size < sizeof(ROOT_REF)) {
100  ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(ROOT_REF));
101  goto end;
102  }
103 
104  if (tp.item->size < offsetof(ROOT_REF, name[0]) + rr->n) {
105  ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
106  tp.item->size, offsetof(ROOT_REF, name[0]) + rr->n);
107  goto end;
108  }
109 
111  if (!pp) {
112  ERR("out of memory\n");
113  goto end;
114  }
115 
116  pp->name.Buffer = rr->name;
117  pp->name.Length = pp->name.MaximumLength = rr->n;
118  pp->orig_subvol = false;
119 
120  InsertTailList(&parts, &pp->list_entry);
121 
122  r = NULL;
123 
124  le = Vcb->roots.Flink;
125  while (le != &Vcb->roots) {
127 
128  if (r2->id == tp.item->key.offset) {
129  r = r2;
130  break;
131  }
132 
133  le = le->Flink;
134  }
135 
136  if (!r) {
137  ERR("could not find subvol %I64x\n", tp.item->key.offset);
138  goto end;
139  }
140 
141  dir = rr->dir;
142  orig_subvol = false;
143  } else {
144  not_in_tree = true;
145  break;
146  }
147  } else {
148  searchkey.obj_id = dir;
149  searchkey.obj_type = TYPE_INODE_EXTREF;
150  searchkey.offset = 0xffffffffffffffff;
151 
152  Status = find_item(Vcb, r, &tp, &searchkey, false, NULL);
153  if (!NT_SUCCESS(Status)) {
154  ERR("find_item returned %08x\n", Status);
155  goto end;
156  }
157 
158  if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == TYPE_INODE_REF) {
159  INODE_REF* ir = (INODE_REF*)tp.item->data;
160  path_part* pp;
161 
162  if (tp.item->size < sizeof(INODE_REF)) {
163  ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(INODE_REF));
164  goto end;
165  }
166 
167  if (tp.item->size < offsetof(INODE_REF, name[0]) + ir->n) {
168  ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
169  tp.item->size, offsetof(INODE_REF, name[0]) + ir->n);
170  goto end;
171  }
172 
174  if (!pp) {
175  ERR("out of memory\n");
176  goto end;
177  }
178 
179  pp->name.Buffer = ir->name;
180  pp->name.Length = pp->name.MaximumLength = ir->n;
181  pp->orig_subvol = orig_subvol;
182 
183  InsertTailList(&parts, &pp->list_entry);
184 
185  if (dir == tp.item->key.offset)
186  break;
187 
188  dir = tp.item->key.offset;
189  } else if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == TYPE_INODE_EXTREF) {
191  path_part* pp;
192 
193  if (tp.item->size < sizeof(INODE_EXTREF)) {
194  ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
195  tp.item->size, sizeof(INODE_EXTREF));
196  goto end;
197  }
198 
199  if (tp.item->size < offsetof(INODE_EXTREF, name[0]) + ier->n) {
200  ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
201  tp.item->size, offsetof(INODE_EXTREF, name[0]) + ier->n);
202  goto end;
203  }
204 
206  if (!pp) {
207  ERR("out of memory\n");
208  goto end;
209  }
210 
211  pp->name.Buffer = ier->name;
212  pp->name.Length = pp->name.MaximumLength = ier->n;
213  pp->orig_subvol = orig_subvol;
214 
215  InsertTailList(&parts, &pp->list_entry);
216 
217  if (dir == ier->dir)
218  break;
219 
220  dir = ier->dir;
221  } else {
222  ERR("could not find INODE_REF for inode %I64x in subvol %I64x\n", dir, r->id);
223  goto end;
224  }
225  }
226  }
227 
228  fn.MaximumLength = 0;
229 
230  if (not_in_tree) {
231  le = parts.Blink;
232  while (le != &parts) {
234  LIST_ENTRY* le2 = le->Blink;
235 
236  if (pp->orig_subvol)
237  break;
238 
240  ExFreePool(pp);
241 
242  le = le2;
243  }
244  }
245 
246  le = parts.Flink;
247  while (le != &parts) {
249 
250  fn.MaximumLength += pp->name.Length + 1;
251 
252  le = le->Flink;
253  }
254 
255  fn.Buffer = ExAllocatePoolWithTag(PagedPool, fn.MaximumLength, ALLOC_TAG);
256  if (!fn.Buffer) {
257  ERR("out of memory\n");
258  goto end;
259  }
260 
261  fn.Length = 0;
262 
263  le = parts.Blink;
264  while (le != &parts) {
266 
267  fn.Buffer[fn.Length] = '\\';
268  fn.Length++;
269 
270  RtlCopyMemory(&fn.Buffer[fn.Length], pp->name.Buffer, pp->name.Length);
271  fn.Length += pp->name.Length;
272 
273  le = le->Blink;
274  }
275 
276  if (not_in_tree)
277  ERR("subvol %I64x, %.*s, offset %I64x\n", subvol, fn.Length, fn.Buffer, offset);
278  else
279  ERR("%.*s, offset %I64x\n", fn.Length, fn.Buffer, offset);
280 
281  Status = utf8_to_utf16(NULL, 0, &utf16len, fn.Buffer, fn.Length);
282  if (!NT_SUCCESS(Status)) {
283  ERR("utf8_to_utf16 1 returned %08x\n", Status);
284  ExFreePool(fn.Buffer);
285  goto end;
286  }
287 
289  if (!err) {
290  ERR("out of memory\n");
291  ExFreePool(fn.Buffer);
292  goto end;
293  }
294 
295  err->address = addr;
296  err->device = devid;
297  err->recovered = false;
298  err->is_metadata = false;
299  err->parity = false;
300 
301  err->data.subvol = not_in_tree ? subvol : 0;
302  err->data.offset = offset;
303  err->data.filename_length = (uint16_t)utf16len;
304 
305  Status = utf8_to_utf16(err->data.filename, utf16len, &utf16len, fn.Buffer, fn.Length);
306  if (!NT_SUCCESS(Status)) {
307  ERR("utf8_to_utf16 2 returned %08x\n", Status);
308  ExFreePool(fn.Buffer);
309  ExFreePool(err);
310  goto end;
311  }
312 
313  ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, true);
314 
315  Vcb->scrub.num_errors++;
316  InsertTailList(&Vcb->scrub.errors, &err->list_entry);
317 
318  ExReleaseResourceLite(&Vcb->scrub.stats_lock);
319 
320  ExFreePool(fn.Buffer);
321 
322 end:
323  while (!IsListEmpty(&parts)) {
325 
326  ExFreePool(pp);
327  }
328 }
329 
331  tree_header* tree;
333  leaf_node* ln;
334  ULONG i;
335 
336  tree = ExAllocatePoolWithTag(PagedPool, Vcb->superblock.node_size, ALLOC_TAG);
337  if (!tree) {
338  ERR("out of memory\n");
339  return;
340  }
341 
342  Status = read_data(Vcb, treeaddr, Vcb->superblock.node_size, NULL, true, (uint8_t*)tree, NULL, NULL, NULL, 0, false, NormalPagePriority);
343  if (!NT_SUCCESS(Status)) {
344  ERR("read_data returned %08x\n", Status);
345  goto end;
346  }
347 
348  if (tree->level != 0) {
349  ERR("tree level was %x, expected 0\n", tree->level);
350  goto end;
351  }
352 
353  ln = (leaf_node*)&tree[1];
354 
355  for (i = 0; i < tree->num_items; i++) {
356  if (ln[i].key.obj_type == TYPE_EXTENT_DATA && ln[i].size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
357  EXTENT_DATA* ed = (EXTENT_DATA*)((uint8_t*)tree + sizeof(tree_header) + ln[i].offset);
359 
360  if (ed->type == EXTENT_TYPE_REGULAR && ed2->size != 0 && ed2->address == addr)
361  log_file_checksum_error(Vcb, addr, devid, tree->tree_id, ln[i].key.obj_id, ln[i].key.offset + addr - extent);
362  }
363  }
364 
365 end:
366  ExFreePool(tree);
367 }
368 
370  scrub_error* err;
371 
373  if (!err) {
374  ERR("out of memory\n");
375  return;
376  }
377 
378  err->address = addr;
379  err->device = devid;
380  err->recovered = false;
381  err->is_metadata = true;
382  err->parity = false;
383 
384  err->metadata.root = root;
385  err->metadata.level = level;
386 
387  if (firstitem) {
388  ERR("root %I64x, level %u, first item (%I64x,%x,%I64x)\n", root, level, firstitem->obj_id,
389  firstitem->obj_type, firstitem->offset);
390 
391  err->metadata.firstitem = *firstitem;
392  } else {
393  ERR("root %I64x, level %u\n", root, level);
394 
395  RtlZeroMemory(&err->metadata.firstitem, sizeof(KEY));
396  }
397 
398  ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, true);
399 
400  Vcb->scrub.num_errors++;
401  InsertTailList(&Vcb->scrub.errors, &err->list_entry);
402 
403  ExReleaseResourceLite(&Vcb->scrub.stats_lock);
404 }
405 
407  tree_header* tree;
409  internal_node* in;
410  ULONG i;
411 
412  tree = ExAllocatePoolWithTag(PagedPool, Vcb->superblock.node_size, ALLOC_TAG);
413  if (!tree) {
414  ERR("out of memory\n");
415  return;
416  }
417 
418  Status = read_data(Vcb, offset, Vcb->superblock.node_size, NULL, true, (uint8_t*)tree, NULL, NULL, NULL, 0, false, NormalPagePriority);
419  if (!NT_SUCCESS(Status)) {
420  ERR("read_data returned %08x\n", Status);
421  goto end;
422  }
423 
424  if (tree->level == 0) {
425  ERR("tree level was 0\n");
426  goto end;
427  }
428 
429  in = (internal_node*)&tree[1];
430 
431  for (i = 0; i < tree->num_items; i++) {
432  if (in[i].address == address) {
433  log_tree_checksum_error(Vcb, address, devid, tree->tree_id, tree->level - 1, &in[i].key);
434  break;
435  }
436  }
437 
438 end:
439  ExFreePool(tree);
440 }
441 
443  KEY searchkey;
446  EXTENT_ITEM* ei;
447  EXTENT_ITEM2* ei2 = NULL;
448  uint8_t* ptr;
449  ULONG len;
450  uint64_t rc;
451 
452  // FIXME - still log even if rest of this function fails
453 
454  searchkey.obj_id = address;
455  searchkey.obj_type = TYPE_METADATA_ITEM;
456  searchkey.offset = 0xffffffffffffffff;
457 
458  Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, NULL);
459  if (!NT_SUCCESS(Status)) {
460  ERR("find_item returned %08x\n", Status);
461  return;
462  }
463 
465  tp.item->key.obj_id >= address + Vcb->superblock.sector_size ||
467  (tp.item->key.obj_type == TYPE_METADATA_ITEM && tp.item->key.obj_id + Vcb->superblock.node_size <= address)
468  )
469  return;
470 
471  if (tp.item->size < sizeof(EXTENT_ITEM)) {
472  ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
473  return;
474  }
475 
476  ei = (EXTENT_ITEM*)tp.item->data;
477  ptr = (uint8_t*)&ei[1];
478  len = tp.item->size - sizeof(EXTENT_ITEM);
479 
481  if (tp.item->size < sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2)) {
482  ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
483  tp.item->size, sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2));
484  return;
485  }
486 
487  ei2 = (EXTENT_ITEM2*)ptr;
488 
489  ptr += sizeof(EXTENT_ITEM2);
490  len -= sizeof(EXTENT_ITEM2);
491  }
492 
493  rc = 0;
494 
495  while (len > 0) {
496  uint8_t type = *ptr;
497 
498  ptr++;
499  len--;
500 
501  if (type == TYPE_TREE_BLOCK_REF) {
502  TREE_BLOCK_REF* tbr;
503 
504  if (len < sizeof(TREE_BLOCK_REF)) {
505  ERR("TREE_BLOCK_REF takes up %u bytes, but only %u remaining\n", sizeof(TREE_BLOCK_REF), len);
506  break;
507  }
508 
509  tbr = (TREE_BLOCK_REF*)ptr;
510 
511  log_tree_checksum_error(Vcb, address, devid, tbr->offset, ei2 ? ei2->level : (uint8_t)tp.item->key.offset, ei2 ? &ei2->firstitem : NULL);
512 
513  rc++;
514 
515  ptr += sizeof(TREE_BLOCK_REF);
516  len -= sizeof(TREE_BLOCK_REF);
517  } else if (type == TYPE_EXTENT_DATA_REF) {
518  EXTENT_DATA_REF* edr;
519 
520  if (len < sizeof(EXTENT_DATA_REF)) {
521  ERR("EXTENT_DATA_REF takes up %u bytes, but only %u remaining\n", sizeof(EXTENT_DATA_REF), len);
522  break;
523  }
524 
525  edr = (EXTENT_DATA_REF*)ptr;
526 
527  log_file_checksum_error(Vcb, address, devid, edr->root, edr->objid, edr->offset + address - tp.item->key.obj_id);
528 
529  rc += edr->count;
530 
531  ptr += sizeof(EXTENT_DATA_REF);
532  len -= sizeof(EXTENT_DATA_REF);
533  } else if (type == TYPE_SHARED_BLOCK_REF) {
534  SHARED_BLOCK_REF* sbr;
535 
536  if (len < sizeof(SHARED_BLOCK_REF)) {
537  ERR("SHARED_BLOCK_REF takes up %u bytes, but only %u remaining\n", sizeof(SHARED_BLOCK_REF), len);
538  break;
539  }
540 
541  sbr = (SHARED_BLOCK_REF*)ptr;
542 
544 
545  rc++;
546 
547  ptr += sizeof(SHARED_BLOCK_REF);
548  len -= sizeof(SHARED_BLOCK_REF);
549  } else if (type == TYPE_SHARED_DATA_REF) {
550  SHARED_DATA_REF* sdr;
551 
552  if (len < sizeof(SHARED_DATA_REF)) {
553  ERR("SHARED_DATA_REF takes up %u bytes, but only %u remaining\n", sizeof(SHARED_DATA_REF), len);
554  break;
555  }
556 
557  sdr = (SHARED_DATA_REF*)ptr;
558 
560 
561  rc += sdr->count;
562 
563  ptr += sizeof(SHARED_DATA_REF);
564  len -= sizeof(SHARED_DATA_REF);
565  } else {
566  ERR("unknown extent type %x\n", type);
567  break;
568  }
569  }
570 
571  if (rc < ei->refcount) {
572  do {
573  traverse_ptr next_tp;
574 
575  if (find_next_item(Vcb, &tp, &next_tp, false, NULL))
576  tp = next_tp;
577  else
578  break;
579 
580  if (tp.item->key.obj_id == address) {
582  log_tree_checksum_error(Vcb, address, devid, tp.item->key.offset, ei2 ? ei2->level : (uint8_t)tp.item->key.offset, ei2 ? &ei2->firstitem : NULL);
583  else if (tp.item->key.obj_type == TYPE_EXTENT_DATA_REF) {
584  EXTENT_DATA_REF* edr;
585 
586  if (tp.item->size < sizeof(EXTENT_DATA_REF)) {
587  ERR("(%I64x,%x,%I64x) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
588  tp.item->size, sizeof(EXTENT_DATA_REF));
589  break;
590  }
591 
592  edr = (EXTENT_DATA_REF*)tp.item->data;
593 
594  log_file_checksum_error(Vcb, address, devid, edr->root, edr->objid, edr->offset + address - tp.item->key.obj_id);
595  } else if (tp.item->key.obj_type == TYPE_SHARED_BLOCK_REF)
597  else if (tp.item->key.obj_type == TYPE_SHARED_DATA_REF)
599  } else
600  break;
601  } while (true);
602  }
603 }
604 
605 static void log_error(device_extension* Vcb, uint64_t addr, uint64_t devid, bool metadata, bool recoverable, bool parity) {
606  if (recoverable) {
607  scrub_error* err;
608 
609  if (parity) {
610  ERR("recovering from parity error at %I64x on device %I64x\n", addr, devid);
611  } else {
612  if (metadata)
613  ERR("recovering from metadata checksum error at %I64x on device %I64x\n", addr, devid);
614  else
615  ERR("recovering from data checksum error at %I64x on device %I64x\n", addr, devid);
616  }
617 
619  if (!err) {
620  ERR("out of memory\n");
621  return;
622  }
623 
624  err->address = addr;
625  err->device = devid;
626  err->recovered = true;
627  err->is_metadata = metadata;
628  err->parity = parity;
629 
630  if (metadata)
631  RtlZeroMemory(&err->metadata, sizeof(err->metadata));
632  else
633  RtlZeroMemory(&err->data, sizeof(err->data));
634 
635  ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, true);
636 
637  Vcb->scrub.num_errors++;
638  InsertTailList(&Vcb->scrub.errors, &err->list_entry);
639 
640  ExReleaseResourceLite(&Vcb->scrub.stats_lock);
641  } else {
642  if (metadata)
643  ERR("unrecoverable metadata checksum error at %I64x\n", addr);
644  else
645  ERR("unrecoverable data checksum error at %I64x\n", addr);
646 
648  }
649 }
650 
651 _Function_class_(IO_COMPLETION_ROUTINE)
652 static NTSTATUS __stdcall scrub_read_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
653  scrub_context_stripe* stripe = conptr;
655  ULONG left = InterlockedDecrement(&context->stripes_left);
656 
658 
659  stripe->iosb = Irp->IoStatus;
660 
661  if (left == 0)
662  KeSetEvent(&context->Event, 0, false);
663 
665 }
666 
669  bool csum_error = false;
670  ULONG i;
671  CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
672  uint16_t present_devices = 0;
673 
674  if (csum) {
675  ULONG good_stripe = 0xffffffff;
676 
677  for (i = 0; i < c->chunk_item->num_stripes; i++) {
678  if (c->devices[i]->devobj) {
679  present_devices++;
680 
681  // if first stripe is okay, we only need to check that the others are identical to it
682  if (good_stripe != 0xffffffff) {
683  if (RtlCompareMemory(context->stripes[i].buf, context->stripes[good_stripe].buf,
684  context->stripes[good_stripe].length) != context->stripes[i].length) {
685  context->stripes[i].csum_error = true;
686  csum_error = true;
688  }
689  } else {
690  Status = check_csum(Vcb, context->stripes[i].buf, context->stripes[i].length / Vcb->superblock.sector_size, csum);
691  if (Status == STATUS_CRC_ERROR) {
692  context->stripes[i].csum_error = true;
693  csum_error = true;
695  } else if (!NT_SUCCESS(Status)) {
696  ERR("check_csum returned %08x\n", Status);
697  return Status;
698  } else
699  good_stripe = i;
700  }
701  }
702  }
703  } else {
704  ULONG good_stripe = 0xffffffff;
705 
706  for (i = 0; i < c->chunk_item->num_stripes; i++) {
707  ULONG j;
708 
709  if (c->devices[i]->devobj) {
710  // if first stripe is okay, we only need to check that the others are identical to it
711  if (good_stripe != 0xffffffff) {
712  if (RtlCompareMemory(context->stripes[i].buf, context->stripes[good_stripe].buf,
713  context->stripes[good_stripe].length) != context->stripes[i].length) {
714  context->stripes[i].csum_error = true;
715  csum_error = true;
717  }
718  } else {
719  for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) {
720  tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size];
721  uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
722 
723  if (crc32 != *((uint32_t*)th->csum) || th->address != offset + UInt32x32To64(j, Vcb->superblock.node_size)) {
724  context->stripes[i].csum_error = true;
725  csum_error = true;
727  }
728  }
729 
730  if (!context->stripes[i].csum_error)
731  good_stripe = i;
732  }
733  }
734  }
735  }
736 
737  if (!csum_error)
738  return STATUS_SUCCESS;
739 
740  // handle checksum error
741 
742  for (i = 0; i < c->chunk_item->num_stripes; i++) {
743  if (context->stripes[i].csum_error) {
744  if (csum) {
745  context->stripes[i].bad_csums = ExAllocatePoolWithTag(PagedPool, context->stripes[i].length * sizeof(uint32_t) / Vcb->superblock.sector_size, ALLOC_TAG);
746  if (!context->stripes[i].bad_csums) {
747  ERR("out of memory\n");
749  }
750 
751  Status = calc_csum(Vcb, context->stripes[i].buf, context->stripes[i].length / Vcb->superblock.sector_size, context->stripes[i].bad_csums);
752  if (!NT_SUCCESS(Status)) {
753  ERR("calc_csum returned %08x\n", Status);
754  return Status;
755  }
756  } else {
757  ULONG j;
758 
759  context->stripes[i].bad_csums = ExAllocatePoolWithTag(PagedPool, context->stripes[i].length * sizeof(uint32_t) / Vcb->superblock.node_size, ALLOC_TAG);
760  if (!context->stripes[i].bad_csums) {
761  ERR("out of memory\n");
763  }
764 
765  for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) {
766  tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size];
767  uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
768 
769  context->stripes[i].bad_csums[j] = crc32;
770  }
771  }
772  }
773  }
774 
775  if (present_devices > 1) {
776  ULONG good_stripe = 0xffffffff;
777 
778  for (i = 0; i < c->chunk_item->num_stripes; i++) {
779  if (c->devices[i]->devobj && !context->stripes[i].csum_error) {
780  good_stripe = i;
781  break;
782  }
783  }
784 
785  if (good_stripe != 0xffffffff) {
786  // log
787 
788  for (i = 0; i < c->chunk_item->num_stripes; i++) {
789  if (context->stripes[i].csum_error) {
790  ULONG j;
791 
792  if (csum) {
793  for (j = 0; j < context->stripes[i].length / Vcb->superblock.sector_size; j++) {
794  if (context->stripes[i].bad_csums[j] != csum[j]) {
795  uint64_t addr = offset + UInt32x32To64(j, Vcb->superblock.sector_size);
796 
797  log_error(Vcb, addr, c->devices[i]->devitem.dev_id, false, true, false);
799  }
800  }
801  } else {
802  for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) {
803  tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size];
804  uint64_t addr = offset + UInt32x32To64(j, Vcb->superblock.node_size);
805 
806  if (context->stripes[i].bad_csums[j] != *((uint32_t*)th->csum) || th->address != addr) {
807  log_error(Vcb, addr, c->devices[i]->devitem.dev_id, true, true, false);
809  }
810  }
811  }
812  }
813  }
814 
815  // write good data over bad
816 
817  for (i = 0; i < c->chunk_item->num_stripes; i++) {
818  if (context->stripes[i].csum_error && !c->devices[i]->readonly) {
819  Status = write_data_phys(c->devices[i]->devobj, c->devices[i]->fileobj, cis[i].offset + offset - c->offset,
820  context->stripes[good_stripe].buf, context->stripes[i].length);
821 
822  if (!NT_SUCCESS(Status)) {
823  ERR("write_data_phys returned %08x\n", Status);
825  return Status;
826  }
827  }
828  }
829 
830  return STATUS_SUCCESS;
831  }
832 
833  // if csum errors on all stripes, check sector by sector
834 
835  for (i = 0; i < c->chunk_item->num_stripes; i++) {
836  ULONG j;
837 
838  if (c->devices[i]->devobj) {
839  if (csum) {
840  for (j = 0; j < context->stripes[i].length / Vcb->superblock.sector_size; j++) {
841  if (context->stripes[i].bad_csums[j] != csum[j]) {
842  ULONG k;
843  uint64_t addr = offset + UInt32x32To64(j, Vcb->superblock.sector_size);
844  bool recovered = false;
845 
846  for (k = 0; k < c->chunk_item->num_stripes; k++) {
847  if (i != k && c->devices[k]->devobj && context->stripes[k].bad_csums[j] == csum[j]) {
848  log_error(Vcb, addr, c->devices[i]->devitem.dev_id, false, true, false);
850 
851  RtlCopyMemory(context->stripes[i].buf + (j * Vcb->superblock.sector_size),
852  context->stripes[k].buf + (j * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
853 
854  recovered = true;
855  break;
856  }
857  }
858 
859  if (!recovered) {
860  log_error(Vcb, addr, c->devices[i]->devitem.dev_id, false, false, false);
862  }
863  }
864  }
865  } else {
866  for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) {
867  tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size];
868  uint64_t addr = offset + UInt32x32To64(j, Vcb->superblock.node_size);
869 
870  if (context->stripes[i].bad_csums[j] != *((uint32_t*)th->csum) || th->address != addr) {
871  ULONG k;
872  bool recovered = false;
873 
874  for (k = 0; k < c->chunk_item->num_stripes; k++) {
875  if (i != k && c->devices[k]->devobj) {
876  tree_header* th2 = (tree_header*)&context->stripes[k].buf[j * Vcb->superblock.node_size];
877 
878  if (context->stripes[k].bad_csums[j] == *((uint32_t*)th2->csum) && th2->address == addr) {
879  log_error(Vcb, addr, c->devices[i]->devitem.dev_id, true, true, false);
881 
882  RtlCopyMemory(th, th2, Vcb->superblock.node_size);
883 
884  recovered = true;
885  break;
886  }
887  }
888  }
889 
890  if (!recovered) {
891  log_error(Vcb, addr, c->devices[i]->devitem.dev_id, true, false, false);
893  }
894  }
895  }
896  }
897  }
898  }
899 
900  // write good data over bad
901 
902  for (i = 0; i < c->chunk_item->num_stripes; i++) {
903  if (c->devices[i]->devobj && !c->devices[i]->readonly) {
904  Status = write_data_phys(c->devices[i]->devobj, c->devices[i]->fileobj, cis[i].offset + offset - c->offset,
905  context->stripes[i].buf, context->stripes[i].length);
906  if (!NT_SUCCESS(Status)) {
907  ERR("write_data_phys returned %08x\n", Status);
909  return Status;
910  }
911  }
912  }
913 
914  return STATUS_SUCCESS;
915  }
916 
917  for (i = 0; i < c->chunk_item->num_stripes; i++) {
918  if (c->devices[i]->devobj) {
919  ULONG j;
920 
921  if (csum) {
922  for (j = 0; j < context->stripes[i].length / Vcb->superblock.sector_size; j++) {
923  if (context->stripes[i].bad_csums[j] != csum[j]) {
924  uint64_t addr = offset + UInt32x32To64(j, Vcb->superblock.sector_size);
925 
926  log_error(Vcb, addr, c->devices[i]->devitem.dev_id, false, false, false);
927  }
928  }
929  } else {
930  for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) {
931  tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size];
932  uint64_t addr = offset + UInt32x32To64(j, Vcb->superblock.node_size);
933 
934  if (context->stripes[i].bad_csums[j] != *((uint32_t*)th->csum) || th->address != addr)
935  log_error(Vcb, addr, c->devices[i]->devitem.dev_id, true, false, false);
936  }
937  }
938  }
939  }
940 
941  return STATUS_SUCCESS;
942 }
943 
945  ULONG j;
947  uint32_t pos, *stripeoff;
948 
949  pos = 0;
950  stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * c->chunk_item->num_stripes, ALLOC_TAG);
951  if (!stripeoff) {
952  ERR("out of memory\n");
954  }
955 
956  RtlZeroMemory(stripeoff, sizeof(uint32_t) * c->chunk_item->num_stripes);
957 
958  stripe = startoffstripe;
959  while (pos < length) {
960  uint32_t readlen;
961 
962  if (pos == 0)
963  readlen = (uint32_t)min(context->stripes[stripe].length, c->chunk_item->stripe_length - (context->stripes[stripe].start % c->chunk_item->stripe_length));
964  else
965  readlen = min(length - pos, (uint32_t)c->chunk_item->stripe_length);
966 
967  if (csum) {
968  for (j = 0; j < readlen; j += Vcb->superblock.sector_size) {
969  uint32_t crc32 = ~calc_crc32c(0xffffffff, context->stripes[stripe].buf + stripeoff[stripe], Vcb->superblock.sector_size);
970 
971  if (crc32 != csum[pos / Vcb->superblock.sector_size]) {
972  uint64_t addr = offset + pos;
973 
974  log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, false, false, false);
976  }
977 
978  pos += Vcb->superblock.sector_size;
979  stripeoff[stripe] += Vcb->superblock.sector_size;
980  }
981  } else {
982  for (j = 0; j < readlen; j += Vcb->superblock.node_size) {
983  tree_header* th = (tree_header*)(context->stripes[stripe].buf + stripeoff[stripe]);
984  uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
985  uint64_t addr = offset + pos;
986 
987  if (crc32 != *((uint32_t*)th->csum) || th->address != addr) {
988  log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, true, false, false);
990  }
991 
992  pos += Vcb->superblock.node_size;
993  stripeoff[stripe] += Vcb->superblock.node_size;
994  }
995  }
996 
997  stripe = (stripe + 1) % c->chunk_item->num_stripes;
998  }
999 
1000  ExFreePool(stripeoff);
1001 
1002  return STATUS_SUCCESS;
1003 }
1004 
1006  ULONG j;
1007  uint16_t stripe, sub_stripes = max(c->chunk_item->sub_stripes, 1);
1008  uint32_t pos, *stripeoff;
1009  bool csum_error = false;
1010  NTSTATUS Status;
1011 
1012  pos = 0;
1013  stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * c->chunk_item->num_stripes / sub_stripes, ALLOC_TAG);
1014  if (!stripeoff) {
1015  ERR("out of memory\n");
1017  }
1018 
1019  RtlZeroMemory(stripeoff, sizeof(uint32_t) * c->chunk_item->num_stripes / sub_stripes);
1020 
1021  stripe = startoffstripe;
1022  while (pos < length) {
1023  uint32_t readlen;
1024 
1025  if (pos == 0)
1026  readlen = (uint32_t)min(context->stripes[stripe * sub_stripes].length,
1027  c->chunk_item->stripe_length - (context->stripes[stripe * sub_stripes].start % c->chunk_item->stripe_length));
1028  else
1029  readlen = min(length - pos, (uint32_t)c->chunk_item->stripe_length);
1030 
1031  if (csum) {
1032  ULONG good_stripe = 0xffffffff;
1033  uint16_t k;
1034 
1035  for (k = 0; k < sub_stripes; k++) {
1036  if (c->devices[(stripe * sub_stripes) + k]->devobj) {
1037  // if first stripe is okay, we only need to check that the others are identical to it
1038  if (good_stripe != 0xffffffff) {
1039  if (RtlCompareMemory(context->stripes[(stripe * sub_stripes) + k].buf + stripeoff[stripe],
1040  context->stripes[(stripe * sub_stripes) + good_stripe].buf + stripeoff[stripe],
1041  readlen) != readlen) {
1042  context->stripes[(stripe * sub_stripes) + k].csum_error = true;
1043  csum_error = true;
1044  log_device_error(Vcb, c->devices[(stripe * sub_stripes) + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1045  }
1046  } else {
1047  for (j = 0; j < readlen; j += Vcb->superblock.sector_size) {
1048  uint32_t crc32 = ~calc_crc32c(0xffffffff, context->stripes[(stripe * sub_stripes) + k].buf + stripeoff[stripe] + j, Vcb->superblock.sector_size);
1049 
1050  if (crc32 != csum[(pos + j) / Vcb->superblock.sector_size]) {
1051  csum_error = true;
1052  context->stripes[(stripe * sub_stripes) + k].csum_error = true;
1053  log_device_error(Vcb, c->devices[(stripe * sub_stripes) + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1054  break;
1055  }
1056  }
1057 
1058  if (!context->stripes[(stripe * sub_stripes) + k].csum_error)
1059  good_stripe = k;
1060  }
1061  }
1062  }
1063 
1064  pos += readlen;
1065  stripeoff[stripe] += readlen;
1066  } else {
1067  ULONG good_stripe = 0xffffffff;
1068  uint16_t k;
1069 
1070  for (k = 0; k < sub_stripes; k++) {
1071  if (c->devices[(stripe * sub_stripes) + k]->devobj) {
1072  // if first stripe is okay, we only need to check that the others are identical to it
1073  if (good_stripe != 0xffffffff) {
1074  if (RtlCompareMemory(context->stripes[(stripe * sub_stripes) + k].buf + stripeoff[stripe],
1075  context->stripes[(stripe * sub_stripes) + good_stripe].buf + stripeoff[stripe],
1076  readlen) != readlen) {
1077  context->stripes[(stripe * sub_stripes) + k].csum_error = true;
1078  csum_error = true;
1079  log_device_error(Vcb, c->devices[(stripe * sub_stripes) + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1080  }
1081  } else {
1082  for (j = 0; j < readlen; j += Vcb->superblock.node_size) {
1083  tree_header* th = (tree_header*)(context->stripes[(stripe * sub_stripes) + k].buf + stripeoff[stripe] + j);
1084  uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1085  uint64_t addr = offset + pos + j;
1086 
1087  if (crc32 != *((uint32_t*)th->csum) || th->address != addr) {
1088  csum_error = true;
1089  context->stripes[(stripe * sub_stripes) + k].csum_error = true;
1090  log_device_error(Vcb, c->devices[(stripe * sub_stripes) + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1091  break;
1092  }
1093  }
1094 
1095  if (!context->stripes[(stripe * sub_stripes) + k].csum_error)
1096  good_stripe = k;
1097  }
1098  }
1099  }
1100 
1101  pos += readlen;
1102  stripeoff[stripe] += readlen;
1103  }
1104 
1105  stripe = (stripe + 1) % (c->chunk_item->num_stripes / sub_stripes);
1106  }
1107 
1108  if (!csum_error) {
1110  goto end;
1111  }
1112 
1113  for (j = 0; j < c->chunk_item->num_stripes; j += sub_stripes) {
1114  ULONG goodstripe = 0xffffffff;
1115  uint16_t k;
1116  bool hasbadstripe = false;
1117 
1118  if (context->stripes[j].length == 0)
1119  continue;
1120 
1121  for (k = 0; k < sub_stripes; k++) {
1122  if (c->devices[j + k]->devobj) {
1123  if (!context->stripes[j + k].csum_error)
1124  goodstripe = k;
1125  else
1126  hasbadstripe = true;
1127  }
1128  }
1129 
1130  if (hasbadstripe) {
1131  if (goodstripe != 0xffffffff) {
1132  for (k = 0; k < sub_stripes; k++) {
1133  if (c->devices[j + k]->devobj && context->stripes[j + k].csum_error) {
1134  uint32_t so = 0;
1135  bool recovered = false;
1136 
1137  pos = 0;
1138 
1139  stripe = startoffstripe;
1140  while (pos < length) {
1141  uint32_t readlen;
1142 
1143  if (pos == 0)
1144  readlen = (uint32_t)min(context->stripes[stripe * sub_stripes].length,
1145  c->chunk_item->stripe_length - (context->stripes[stripe * sub_stripes].start % c->chunk_item->stripe_length));
1146  else
1147  readlen = min(length - pos, (uint32_t)c->chunk_item->stripe_length);
1148 
1149  if (stripe == j / sub_stripes) {
1150  if (csum) {
1151  ULONG l;
1152 
1153  for (l = 0; l < readlen; l += Vcb->superblock.sector_size) {
1154  if (RtlCompareMemory(context->stripes[j + k].buf + so,
1155  context->stripes[j + goodstripe].buf + so,
1156  Vcb->superblock.sector_size) != Vcb->superblock.sector_size) {
1157  uint64_t addr = offset + pos;
1158 
1159  log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, false, true, false);
1160 
1161  recovered = true;
1162  }
1163 
1164  pos += Vcb->superblock.sector_size;
1165  so += Vcb->superblock.sector_size;
1166  }
1167  } else {
1168  ULONG l;
1169 
1170  for (l = 0; l < readlen; l += Vcb->superblock.node_size) {
1171  if (RtlCompareMemory(context->stripes[j + k].buf + so,
1172  context->stripes[j + goodstripe].buf + so,
1173  Vcb->superblock.node_size) != Vcb->superblock.node_size) {
1174  uint64_t addr = offset + pos;
1175 
1176  log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, true, true, false);
1177 
1178  recovered = true;
1179  }
1180 
1181  pos += Vcb->superblock.node_size;
1182  so += Vcb->superblock.node_size;
1183  }
1184  }
1185  } else
1186  pos += readlen;
1187 
1188  stripe = (stripe + 1) % (c->chunk_item->num_stripes / sub_stripes);
1189  }
1190 
1191  if (recovered) {
1192  // write good data over bad
1193 
1194  if (!c->devices[j + k]->readonly) {
1195  CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
1196 
1197  Status = write_data_phys(c->devices[j + k]->devobj, c->devices[j + k]->fileobj, cis[j + k].offset + offset - c->offset,
1198  context->stripes[j + goodstripe].buf, context->stripes[j + goodstripe].length);
1199 
1200  if (!NT_SUCCESS(Status)) {
1201  ERR("write_data_phys returned %08x\n", Status);
1203  goto end;
1204  }
1205  }
1206  }
1207  }
1208  }
1209  } else {
1210  uint32_t so = 0;
1211  bool recovered = false;
1212 
1213  if (csum) {
1214  for (k = 0; k < sub_stripes; k++) {
1215  if (c->devices[j + k]->devobj) {
1216  context->stripes[j + k].bad_csums = ExAllocatePoolWithTag(PagedPool, context->stripes[j + k].length * sizeof(uint32_t) / Vcb->superblock.sector_size, ALLOC_TAG);
1217  if (!context->stripes[j + k].bad_csums) {
1218  ERR("out of memory\n");
1220  goto end;
1221  }
1222 
1223  Status = calc_csum(Vcb, context->stripes[j + k].buf, context->stripes[j + k].length / Vcb->superblock.sector_size, context->stripes[j + k].bad_csums);
1224  if (!NT_SUCCESS(Status)) {
1225  ERR("calc_csum returned %08x\n", Status);
1226  goto end;
1227  }
1228  }
1229  }
1230  } else {
1231  for (k = 0; k < sub_stripes; k++) {
1232  if (c->devices[j + k]->devobj) {
1233  ULONG l;
1234 
1235  context->stripes[j + k].bad_csums = ExAllocatePoolWithTag(PagedPool, context->stripes[j + k].length * sizeof(uint32_t) / Vcb->superblock.node_size, ALLOC_TAG);
1236  if (!context->stripes[j + k].bad_csums) {
1237  ERR("out of memory\n");
1239  goto end;
1240  }
1241 
1242  for (l = 0; l < context->stripes[j + k].length / Vcb->superblock.node_size; l++) {
1243  tree_header* th = (tree_header*)&context->stripes[j + k].buf[l * Vcb->superblock.node_size];
1244  uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1245 
1246  context->stripes[j + k].bad_csums[l] = crc32;
1247  }
1248  }
1249  }
1250  }
1251 
1252  pos = 0;
1253 
1254  stripe = startoffstripe;
1255  while (pos < length) {
1256  uint32_t readlen;
1257 
1258  if (pos == 0)
1259  readlen = (uint32_t)min(context->stripes[stripe * sub_stripes].length,
1260  c->chunk_item->stripe_length - (context->stripes[stripe * sub_stripes].start % c->chunk_item->stripe_length));
1261  else
1262  readlen = min(length - pos, (uint32_t)c->chunk_item->stripe_length);
1263 
1264  if (stripe == j / sub_stripes) {
1265  ULONG l;
1266 
1267  if (csum) {
1268  for (l = 0; l < readlen; l += Vcb->superblock.sector_size) {
1269  uint32_t crc32 = csum[pos / Vcb->superblock.sector_size];
1270  bool has_error = false;
1271 
1272  goodstripe = 0xffffffff;
1273  for (k = 0; k < sub_stripes; k++) {
1274  if (c->devices[j + k]->devobj) {
1275  if (context->stripes[j + k].bad_csums[so / Vcb->superblock.sector_size] != crc32)
1276  has_error = true;
1277  else
1278  goodstripe = k;
1279  }
1280  }
1281 
1282  if (has_error) {
1283  if (goodstripe != 0xffffffff) {
1284  for (k = 0; k < sub_stripes; k++) {
1285  if (c->devices[j + k]->devobj && context->stripes[j + k].bad_csums[so / Vcb->superblock.sector_size] != crc32) {
1286  uint64_t addr = offset + pos;
1287 
1288  log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, false, true, false);
1289 
1290  recovered = true;
1291 
1292  RtlCopyMemory(context->stripes[j + k].buf + so, context->stripes[j + goodstripe].buf + so,
1293  Vcb->superblock.sector_size);
1294  }
1295  }
1296  } else {
1297  uint64_t addr = offset + pos;
1298 
1299  for (k = 0; k < sub_stripes; k++) {
1300  if (c->devices[j + j]->devobj) {
1301  log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, false, false, false);
1303  }
1304  }
1305  }
1306  }
1307 
1308  pos += Vcb->superblock.sector_size;
1309  so += Vcb->superblock.sector_size;
1310  }
1311  } else {
1312  for (l = 0; l < readlen; l += Vcb->superblock.node_size) {
1313  for (k = 0; k < sub_stripes; k++) {
1314  if (c->devices[j + k]->devobj) {
1315  tree_header* th = (tree_header*)&context->stripes[j + k].buf[so];
1316  uint64_t addr = offset + pos;
1317 
1318  if (context->stripes[j + k].bad_csums[so / Vcb->superblock.node_size] != *((uint32_t*)th->csum) || th->address != addr) {
1319  ULONG m;
1320 
1321  recovered = false;
1322 
1323  for (m = 0; m < sub_stripes; m++) {
1324  if (m != k) {
1325  tree_header* th2 = (tree_header*)&context->stripes[j + m].buf[so];
1326 
1327  if (context->stripes[j + m].bad_csums[so / Vcb->superblock.node_size] == *((uint32_t*)th2->csum) && th2->address == addr) {
1328  log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, true, true, false);
1329 
1330  RtlCopyMemory(th, th2, Vcb->superblock.node_size);
1331 
1332  recovered = true;
1333  break;
1334  } else
1336  }
1337  }
1338 
1339  if (!recovered)
1340  log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, true, false, false);
1341  }
1342  }
1343  }
1344 
1345  pos += Vcb->superblock.node_size;
1346  so += Vcb->superblock.node_size;
1347  }
1348  }
1349  } else
1350  pos += readlen;
1351 
1352  stripe = (stripe + 1) % (c->chunk_item->num_stripes / sub_stripes);
1353  }
1354 
1355  if (recovered) {
1356  // write good data over bad
1357 
1358  for (k = 0; k < sub_stripes; k++) {
1359  if (c->devices[j + k]->devobj && !c->devices[j + k]->readonly) {
1360  CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
1361 
1362  Status = write_data_phys(c->devices[j + k]->devobj, c->devices[j + k]->fileobj, cis[j + k].offset + offset - c->offset,
1363  context->stripes[j + k].buf, context->stripes[j + k].length);
1364 
1365  if (!NT_SUCCESS(Status)) {
1366  ERR("write_data_phys returned %08x\n", Status);
1368  goto end;
1369  }
1370  }
1371  }
1372  }
1373  }
1374  }
1375  }
1376 
1378 
1379 end:
1380  ExFreePool(stripeoff);
1381 
1382  return Status;
1383 }
1384 
1386  ULONG i;
1388  CHUNK_ITEM_STRIPE* cis;
1389  NTSTATUS Status;
1390  uint16_t startoffstripe, num_missing, allowed_missing;
1391 
1392  TRACE("(%p, %p, %I64x, %I64x, %p)\n", Vcb, c, offset, size, csum);
1393 
1394  context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(scrub_context_stripe) * c->chunk_item->num_stripes, ALLOC_TAG);
1395  if (!context.stripes) {
1396  ERR("out of memory\n");
1398  goto end;
1399  }
1400 
1401  RtlZeroMemory(context.stripes, sizeof(scrub_context_stripe) * c->chunk_item->num_stripes);
1402 
1403  context.stripes_left = 0;
1404 
1405  cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
1406 
1407  if (type == BLOCK_FLAG_RAID0) {
1408  uint64_t startoff, endoff;
1409  uint16_t endoffstripe;
1410 
1411  get_raid0_offset(offset - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &startoff, &startoffstripe);
1412  get_raid0_offset(offset + size - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &endoff, &endoffstripe);
1413 
1414  for (i = 0; i < c->chunk_item->num_stripes; i++) {
1415  if (startoffstripe > i)
1416  context.stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1417  else if (startoffstripe == i)
1418  context.stripes[i].start = startoff;
1419  else
1420  context.stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length);
1421 
1422  if (endoffstripe > i)
1423  context.stripes[i].length = (uint32_t)(endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length - context.stripes[i].start);
1424  else if (endoffstripe == i)
1425  context.stripes[i].length = (uint32_t)(endoff + 1 - context.stripes[i].start);
1426  else
1427  context.stripes[i].length = (uint32_t)(endoff - (endoff % c->chunk_item->stripe_length) - context.stripes[i].start);
1428  }
1429 
1430  allowed_missing = 0;
1431  } else if (type == BLOCK_FLAG_RAID10) {
1432  uint64_t startoff, endoff;
1433  uint16_t endoffstripe, j, sub_stripes = max(c->chunk_item->sub_stripes, 1);
1434 
1435  get_raid0_offset(offset - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes / sub_stripes, &startoff, &startoffstripe);
1436  get_raid0_offset(offset + size - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes / sub_stripes, &endoff, &endoffstripe);
1437 
1438  if ((c->chunk_item->num_stripes % sub_stripes) != 0) {
1439  ERR("chunk %I64x: num_stripes %x was not a multiple of sub_stripes %x!\n", c->offset, c->chunk_item->num_stripes, sub_stripes);
1441  goto end;
1442  }
1443 
1444  startoffstripe *= sub_stripes;
1445  endoffstripe *= sub_stripes;
1446 
1447  for (i = 0; i < c->chunk_item->num_stripes; i += sub_stripes) {
1448  if (startoffstripe > i)
1449  context.stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1450  else if (startoffstripe == i)
1451  context.stripes[i].start = startoff;
1452  else
1453  context.stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length);
1454 
1455  if (endoffstripe > i)
1456  context.stripes[i].length = (uint32_t)(endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length - context.stripes[i].start);
1457  else if (endoffstripe == i)
1458  context.stripes[i].length = (uint32_t)(endoff + 1 - context.stripes[i].start);
1459  else
1460  context.stripes[i].length = (uint32_t)(endoff - (endoff % c->chunk_item->stripe_length) - context.stripes[i].start);
1461 
1462  for (j = 1; j < sub_stripes; j++) {
1463  context.stripes[i+j].start = context.stripes[i].start;
1464  context.stripes[i+j].length = context.stripes[i].length;
1465  }
1466  }
1467 
1468  startoffstripe /= sub_stripes;
1469  allowed_missing = 1;
1470  } else
1471  allowed_missing = c->chunk_item->num_stripes - 1;
1472 
1473  num_missing = 0;
1474 
1475  for (i = 0; i < c->chunk_item->num_stripes; i++) {
1477 
1478  context.stripes[i].context = (struct _scrub_context*)&context;
1479 
1480  if (type == BLOCK_FLAG_DUPLICATE) {
1481  context.stripes[i].start = offset - c->offset;
1482  context.stripes[i].length = size;
1483  } else if (type != BLOCK_FLAG_RAID0 && type != BLOCK_FLAG_RAID10) {
1484  ERR("unexpected chunk type %x\n", type);
1486  goto end;
1487  }
1488 
1489  if (!c->devices[i]->devobj) {
1490  num_missing++;
1491 
1492  if (num_missing > allowed_missing) {
1493  ERR("too many missing devices (at least %u, maximum allowed %u)\n", num_missing, allowed_missing);
1495  goto end;
1496  }
1497  } else if (context.stripes[i].length > 0) {
1498  context.stripes[i].buf = ExAllocatePoolWithTag(NonPagedPool, context.stripes[i].length, ALLOC_TAG);
1499 
1500  if (!context.stripes[i].buf) {
1501  ERR("out of memory\n");
1503  goto end;
1504  }
1505 
1506  context.stripes[i].Irp = IoAllocateIrp(c->devices[i]->devobj->StackSize, false);
1507 
1508  if (!context.stripes[i].Irp) {
1509  ERR("IoAllocateIrp failed\n");
1511  goto end;
1512  }
1513 
1514  IrpSp = IoGetNextIrpStackLocation(context.stripes[i].Irp);
1516  IrpSp->FileObject = c->devices[i]->fileobj;
1517 
1518  if (c->devices[i]->devobj->Flags & DO_BUFFERED_IO) {
1519  context.stripes[i].Irp->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, context.stripes[i].length, ALLOC_TAG);
1520  if (!context.stripes[i].Irp->AssociatedIrp.SystemBuffer) {
1521  ERR("out of memory\n");
1523  goto end;
1524  }
1525 
1527 
1528  context.stripes[i].Irp->UserBuffer = context.stripes[i].buf;
1529  } else if (c->devices[i]->devobj->Flags & DO_DIRECT_IO) {
1530  context.stripes[i].Irp->MdlAddress = IoAllocateMdl(context.stripes[i].buf, context.stripes[i].length, false, false, NULL);
1531  if (!context.stripes[i].Irp->MdlAddress) {
1532  ERR("IoAllocateMdl failed\n");
1534  goto end;
1535  }
1536 
1538 
1539  _SEH2_TRY {
1540  MmProbeAndLockPages(context.stripes[i].Irp->MdlAddress, KernelMode, IoWriteAccess);
1543  } _SEH2_END;
1544 
1545  if (!NT_SUCCESS(Status)) {
1546  ERR("MmProbeAndLockPages threw exception %08x\n", Status);
1547  IoFreeMdl(context.stripes[i].Irp->MdlAddress);
1548  context.stripes[i].Irp->MdlAddress = NULL;
1549  goto end;
1550  }
1551  } else
1552  context.stripes[i].Irp->UserBuffer = context.stripes[i].buf;
1553 
1554  IrpSp->Parameters.Read.Length = context.stripes[i].length;
1555  IrpSp->Parameters.Read.ByteOffset.QuadPart = context.stripes[i].start + cis[i].offset;
1556 
1557  context.stripes[i].Irp->UserIosb = &context.stripes[i].iosb;
1558 
1559  IoSetCompletionRoutine(context.stripes[i].Irp, scrub_read_completion, &context.stripes[i], true, true, true);
1560 
1561  context.stripes_left++;
1562 
1563  Vcb->scrub.data_scrubbed += context.stripes[i].length;
1564  }
1565  }
1566 
1567  if (context.stripes_left == 0) {
1568  ERR("error - not reading any stripes\n");
1570  goto end;
1571  }
1572 
1574 
1575  for (i = 0; i < c->chunk_item->num_stripes; i++) {
1576  if (c->devices[i]->devobj && context.stripes[i].length > 0)
1577  IoCallDriver(c->devices[i]->devobj, context.stripes[i].Irp);
1578  }
1579 
1581 
1582  // return an error if any of the stripes returned an error
1583  for (i = 0; i < c->chunk_item->num_stripes; i++) {
1584  if (!NT_SUCCESS(context.stripes[i].iosb.Status)) {
1585  Status = context.stripes[i].iosb.Status;
1587  goto end;
1588  }
1589  }
1590 
1591  if (type == BLOCK_FLAG_DUPLICATE) {
1593  if (!NT_SUCCESS(Status)) {
1594  ERR("scrub_extent_dup returned %08x\n", Status);
1595  goto end;
1596  }
1597  } else if (type == BLOCK_FLAG_RAID0) {
1598  Status = scrub_extent_raid0(Vcb, c, offset, size, startoffstripe, csum, &context);
1599  if (!NT_SUCCESS(Status)) {
1600  ERR("scrub_extent_raid0 returned %08x\n", Status);
1601  goto end;
1602  }
1603  } else if (type == BLOCK_FLAG_RAID10) {
1604  Status = scrub_extent_raid10(Vcb, c, offset, size, startoffstripe, csum, &context);
1605  if (!NT_SUCCESS(Status)) {
1606  ERR("scrub_extent_raid10 returned %08x\n", Status);
1607  goto end;
1608  }
1609  }
1610 
1611 end:
1612  if (context.stripes) {
1613  for (i = 0; i < c->chunk_item->num_stripes; i++) {
1614  if (context.stripes[i].Irp) {
1615  if (c->devices[i]->devobj->Flags & DO_DIRECT_IO && context.stripes[i].Irp->MdlAddress) {
1616  MmUnlockPages(context.stripes[i].Irp->MdlAddress);
1617  IoFreeMdl(context.stripes[i].Irp->MdlAddress);
1618  }
1619  IoFreeIrp(context.stripes[i].Irp);
1620  }
1621 
1622  if (context.stripes[i].buf)
1623  ExFreePool(context.stripes[i].buf);
1624 
1625  if (context.stripes[i].bad_csums)
1626  ExFreePool(context.stripes[i].bad_csums);
1627  }
1628 
1629  ExFreePool(context.stripes);
1630  }
1631 
1632  return Status;
1633 }
1634 
1636  NTSTATUS Status;
1637  ULONG runlength, index;
1638 
1639  runlength = RtlFindFirstRunClear(bmp, &index);
1640 
1641  while (runlength != 0) {
1642  if (index >= bmplen)
1643  break;
1644 
1645  if (index + runlength >= bmplen) {
1646  runlength = bmplen - index;
1647 
1648  if (runlength == 0)
1649  break;
1650  }
1651 
1652  do {
1653  ULONG rl;
1654 
1655  if (runlength * Vcb->superblock.sector_size > SCRUB_UNIT)
1656  rl = SCRUB_UNIT / Vcb->superblock.sector_size;
1657  else
1658  rl = runlength;
1659 
1660  Status = scrub_extent(Vcb, c, type, offset + UInt32x32To64(index, Vcb->superblock.sector_size), rl * Vcb->superblock.sector_size, &csum[index]);
1661  if (!NT_SUCCESS(Status)) {
1662  ERR("scrub_data_extent_dup returned %08x\n", Status);
1663  return Status;
1664  }
1665 
1666  runlength -= rl;
1667  index += rl;
1668  } while (runlength > 0);
1669 
1670  runlength = RtlFindNextForwardRunClear(bmp, index, &index);
1671  }
1672 
1673  return STATUS_SUCCESS;
1674 }
1675 
1676 typedef struct {
1679  void* context;
1682  bool rewrite, missing;
1686 
1687 typedef struct {
1698 
1699 _Function_class_(IO_COMPLETION_ROUTINE)
1700 static NTSTATUS __stdcall scrub_read_completion_raid56(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
1703  LONG left = InterlockedDecrement(&context->stripes_left);
1704 
1706 
1707  stripe->iosb = Irp->IoStatus;
1708 
1709  if (left == 0)
1710  KeSetEvent(&context->Event, 0, false);
1711 
1713 }
1714 
1716  uint64_t num, uint16_t missing_devices) {
1717  ULONG sectors_per_stripe = (ULONG)(c->chunk_item->stripe_length / Vcb->superblock.sector_size), i, off;
1718  uint16_t stripe, parity = (bit_start + num + c->chunk_item->num_stripes - 1) % c->chunk_item->num_stripes;
1719  uint64_t stripeoff;
1720 
1721  stripe = (parity + 1) % c->chunk_item->num_stripes;
1722  off = (ULONG)(bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 1);
1723  stripeoff = num * sectors_per_stripe;
1724 
1725  if (missing_devices == 0)
1726  RtlCopyMemory(context->parity_scratch, &context->stripes[parity].buf[num * c->chunk_item->stripe_length], (ULONG)c->chunk_item->stripe_length);
1727 
1728  while (stripe != parity) {
1729  RtlClearAllBits(&context->stripes[stripe].error);
1730 
1731  for (i = 0; i < sectors_per_stripe; i++) {
1732  if (c->devices[stripe]->devobj && RtlCheckBit(&context->alloc, off)) {
1733  if (RtlCheckBit(&context->is_tree, off)) {
1734  tree_header* th = (tree_header*)&context->stripes[stripe].buf[stripeoff * Vcb->superblock.sector_size];
1735  uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size);
1736  uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1737 
1738  if (crc32 != *((uint32_t*)th->csum) || th->address != addr) {
1739  RtlSetBits(&context->stripes[stripe].error, i, Vcb->superblock.node_size / Vcb->superblock.sector_size);
1741 
1742  if (missing_devices > 0)
1743  log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, true, false, false);
1744  }
1745 
1746  off += Vcb->superblock.node_size / Vcb->superblock.sector_size;
1747  stripeoff += Vcb->superblock.node_size / Vcb->superblock.sector_size;
1748  i += (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1;
1749 
1750  continue;
1751  } else if (RtlCheckBit(&context->has_csum, off)) {
1752  uint32_t crc32 = ~calc_crc32c(0xffffffff, context->stripes[stripe].buf + (stripeoff * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1753 
1754  if (crc32 != context->csum[off]) {
1755  RtlSetBit(&context->stripes[stripe].error, i);
1757 
1758  if (missing_devices > 0) {
1759  uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size);
1760 
1761  log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, false, false, false);
1762  }
1763  }
1764  }
1765  }
1766 
1767  off++;
1768  stripeoff++;
1769  }
1770 
1771  if (missing_devices == 0)
1772  do_xor(context->parity_scratch, &context->stripes[stripe].buf[num * c->chunk_item->stripe_length], (ULONG)c->chunk_item->stripe_length);
1773 
1774  stripe = (stripe + 1) % c->chunk_item->num_stripes;
1775  stripeoff = num * sectors_per_stripe;
1776  }
1777 
1778  // check parity
1779 
1780  if (missing_devices == 0) {
1781  RtlClearAllBits(&context->stripes[parity].error);
1782 
1783  for (i = 0; i < sectors_per_stripe; i++) {
1784  ULONG o, j;
1785 
1786  o = i * Vcb->superblock.sector_size;
1787  for (j = 0; j < Vcb->superblock.sector_size; j++) { // FIXME - use SSE
1788  if (context->parity_scratch[o] != 0) {
1789  RtlSetBit(&context->stripes[parity].error, i);
1790  break;
1791  }
1792  o++;
1793  }
1794  }
1795  }
1796 
1797  // log and fix errors
1798 
1799  if (missing_devices > 0)
1800  return;
1801 
1802  for (i = 0; i < sectors_per_stripe; i++) {
1803  ULONG num_errors = 0, bad_off;
1804  uint64_t bad_stripe;
1805  bool alloc = false;
1806 
1807  stripe = (parity + 1) % c->chunk_item->num_stripes;
1808  off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 1)) + i;
1809 
1810  while (stripe != parity) {
1811  if (RtlCheckBit(&context->alloc, off)) {
1812  alloc = true;
1813 
1814  if (RtlCheckBit(&context->stripes[stripe].error, i)) {
1815  bad_stripe = stripe;
1816  bad_off = off;
1817  num_errors++;
1818  }
1819  }
1820 
1821  off += sectors_per_stripe;
1822  stripe = (stripe + 1) % c->chunk_item->num_stripes;
1823  }
1824 
1825  if (!alloc)
1826  continue;
1827 
1828  if (num_errors == 0 && !RtlCheckBit(&context->stripes[parity].error, i)) // everything fine
1829  continue;
1830 
1831  if (num_errors == 0 && RtlCheckBit(&context->stripes[parity].error, i)) { // parity error
1832  uint64_t addr;
1833 
1834  do_xor(&context->stripes[parity].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
1835  &context->parity_scratch[i * Vcb->superblock.sector_size],
1836  Vcb->superblock.sector_size);
1837 
1838  bad_off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 1)) + i;
1839  addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (bad_off * Vcb->superblock.sector_size);
1840 
1841  context->stripes[parity].rewrite = true;
1842 
1843  log_error(Vcb, addr, c->devices[parity]->devitem.dev_id, false, true, true);
1845  } else if (num_errors == 1) {
1846  uint32_t crc32;
1847  uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (bad_off * Vcb->superblock.sector_size);
1848 
1849  if (RtlCheckBit(&context->is_tree, bad_off)) {
1850  tree_header* th;
1851 
1852  do_xor(&context->parity_scratch[i * Vcb->superblock.sector_size],
1853  &context->stripes[bad_stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
1854  Vcb->superblock.node_size);
1855 
1856  th = (tree_header*)&context->parity_scratch[i * Vcb->superblock.sector_size];
1857  crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1858 
1859  if (crc32 == *((uint32_t*)th->csum) && th->address == addr) {
1860  RtlCopyMemory(&context->stripes[bad_stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
1861  &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.node_size);
1862 
1863  context->stripes[bad_stripe].rewrite = true;
1864 
1865  RtlClearBits(&context->stripes[bad_stripe].error, i + 1, (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1);
1866 
1867  log_error(Vcb, addr, c->devices[bad_stripe]->devitem.dev_id, true, true, false);
1868  } else
1869  log_error(Vcb, addr, c->devices[bad_stripe]->devitem.dev_id, true, false, false);
1870  } else {
1871  do_xor(&context->parity_scratch[i * Vcb->superblock.sector_size],
1872  &context->stripes[bad_stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
1873  Vcb->superblock.sector_size);
1874 
1875  crc32 = ~calc_crc32c(0xffffffff, &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size);
1876 
1877  if (crc32 == context->csum[bad_off]) {
1878  RtlCopyMemory(&context->stripes[bad_stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
1879  &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size);
1880 
1881  context->stripes[bad_stripe].rewrite = true;
1882 
1883  log_error(Vcb, addr, c->devices[bad_stripe]->devitem.dev_id, false, true, false);
1884  } else
1885  log_error(Vcb, addr, c->devices[bad_stripe]->devitem.dev_id, false, false, false);
1886  }
1887  } else {
1888  stripe = (parity + 1) % c->chunk_item->num_stripes;
1889  off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 1)) + i;
1890 
1891  while (stripe != parity) {
1892  if (RtlCheckBit(&context->alloc, off)) {
1893  if (RtlCheckBit(&context->stripes[stripe].error, i)) {
1894  uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size);
1895 
1896  log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, RtlCheckBit(&context->is_tree, off), false, false);
1897  }
1898  }
1899 
1900  off += sectors_per_stripe;
1901  stripe = (stripe + 1) % c->chunk_item->num_stripes;
1902  }
1903  }
1904  }
1905 }
1906 
1908  uint64_t num, uint16_t missing_devices) {
1909  ULONG sectors_per_stripe = (ULONG)(c->chunk_item->stripe_length / Vcb->superblock.sector_size), i, off;
1910  uint16_t stripe, parity1 = (bit_start + num + c->chunk_item->num_stripes - 2) % c->chunk_item->num_stripes;
1911  uint16_t parity2 = (parity1 + 1) % c->chunk_item->num_stripes;
1912  uint64_t stripeoff;
1913 
1914  stripe = (parity1 + 2) % c->chunk_item->num_stripes;
1915  off = (ULONG)(bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2);
1916  stripeoff = num * sectors_per_stripe;
1917 
1918  if (c->devices[parity1]->devobj)
1919  RtlCopyMemory(context->parity_scratch, &context->stripes[parity1].buf[num * c->chunk_item->stripe_length], (ULONG)c->chunk_item->stripe_length);
1920 
1921  if (c->devices[parity2]->devobj)
1922  RtlZeroMemory(context->parity_scratch2, (ULONG)c->chunk_item->stripe_length);
1923 
1924  while (stripe != parity1) {
1925  RtlClearAllBits(&context->stripes[stripe].error);
1926 
1927  for (i = 0; i < sectors_per_stripe; i++) {
1928  if (c->devices[stripe]->devobj && RtlCheckBit(&context->alloc, off)) {
1929  if (RtlCheckBit(&context->is_tree, off)) {
1930  tree_header* th = (tree_header*)&context->stripes[stripe].buf[stripeoff * Vcb->superblock.sector_size];
1931  uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size);
1932  uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1933 
1934  if (crc32 != *((uint32_t*)th->csum) || th->address != addr) {
1935  RtlSetBits(&context->stripes[stripe].error, i, Vcb->superblock.node_size / Vcb->superblock.sector_size);
1937 
1938  if (missing_devices == 2)
1939  log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, true, false, false);
1940  }
1941 
1942  off += Vcb->superblock.node_size / Vcb->superblock.sector_size;
1943  stripeoff += Vcb->superblock.node_size / Vcb->superblock.sector_size;
1944  i += (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1;
1945 
1946  continue;
1947  } else if (RtlCheckBit(&context->has_csum, off)) {
1948  uint32_t crc32 = ~calc_crc32c(0xffffffff, context->stripes[stripe].buf + (stripeoff * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1949 
1950  if (crc32 != context->csum[off]) {
1951  uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size);
1952 
1953  RtlSetBit(&context->stripes[stripe].error, i);
1955 
1956  if (missing_devices == 2)
1957  log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, false, false, false);
1958  }
1959  }
1960  }
1961 
1962  off++;
1963  stripeoff++;
1964  }
1965 
1966  if (c->devices[parity1]->devobj)
1967  do_xor(context->parity_scratch, &context->stripes[stripe].buf[num * c->chunk_item->stripe_length], (uint32_t)c->chunk_item->stripe_length);
1968 
1969  stripe = (stripe + 1) % c->chunk_item->num_stripes;
1970  stripeoff = num * sectors_per_stripe;
1971  }
1972 
1973  RtlClearAllBits(&context->stripes[parity1].error);
1974 
1975  if (missing_devices == 0 || (missing_devices == 1 && !c->devices[parity2]->devobj)) {
1976  // check parity 1
1977 
1978  for (i = 0; i < sectors_per_stripe; i++) {
1979  ULONG o, j;
1980 
1981  o = i * Vcb->superblock.sector_size;
1982  for (j = 0; j < Vcb->superblock.sector_size; j++) { // FIXME - use SSE
1983  if (context->parity_scratch[o] != 0) {
1984  RtlSetBit(&context->stripes[parity1].error, i);
1985  break;
1986  }
1987  o++;
1988  }
1989  }
1990  }
1991 
1992  RtlClearAllBits(&context->stripes[parity2].error);
1993 
1994  if (missing_devices == 0 || (missing_devices == 1 && !c->devices[parity1]->devobj)) {
1995  // check parity 2
1996 
1997  stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1);
1998 
1999  while (stripe != parity2) {
2000  galois_double(context->parity_scratch2, (uint32_t)c->chunk_item->stripe_length);
2001  do_xor(context->parity_scratch2, &context->stripes[stripe].buf[num * c->chunk_item->stripe_length], (uint32_t)c->chunk_item->stripe_length);
2002 
2003  stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2004  }
2005 
2006  for (i = 0; i < sectors_per_stripe; i++) {
2007  if (RtlCompareMemory(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2008  &context->parity_scratch2[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size) != Vcb->superblock.sector_size)
2009  RtlSetBit(&context->stripes[parity2].error, i);
2010  }
2011  }
2012 
2013  if (missing_devices == 2)
2014  return;
2015 
2016  // log and fix errors
2017 
2018  for (i = 0; i < sectors_per_stripe; i++) {
2019  ULONG num_errors = 0;
2020  uint64_t bad_stripe1, bad_stripe2;
2021  ULONG bad_off1, bad_off2;
2022  bool alloc = false;
2023 
2024  stripe = (parity1 + 2) % c->chunk_item->num_stripes;
2025  off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2)) + i;
2026 
2027  while (stripe != parity1) {
2028  if (RtlCheckBit(&context->alloc, off)) {
2029  alloc = true;
2030 
2031  if (!c->devices[stripe]->devobj || RtlCheckBit(&context->stripes[stripe].error, i)) {
2032  if (num_errors == 0) {
2033  bad_stripe1 = stripe;
2034  bad_off1 = off;
2035  } else if (num_errors == 1) {
2036  bad_stripe2 = stripe;
2037  bad_off2 = off;
2038  }
2039  num_errors++;
2040  }
2041  }
2042 
2043  off += sectors_per_stripe;
2044  stripe = (stripe + 1) % c->chunk_item->num_stripes;
2045  }
2046 
2047  if (!alloc)
2048  continue;
2049 
2050  if (num_errors == 0 && !RtlCheckBit(&context->stripes[parity1].error, i) && !RtlCheckBit(&context->stripes[parity2].error, i)) // everything fine
2051  continue;
2052 
2053  if (num_errors == 0) { // parity error
2054  uint64_t addr;
2055 
2056  if (RtlCheckBit(&context->stripes[parity1].error, i)) {
2057  do_xor(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2058  &context->parity_scratch[i * Vcb->superblock.sector_size],
2059  Vcb->superblock.sector_size);
2060 
2061  bad_off1 = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2)) + i;
2062  addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off1 * Vcb->superblock.sector_size);
2063 
2064  context->stripes[parity1].rewrite = true;
2065 
2066  log_error(Vcb, addr, c->devices[parity1]->devitem.dev_id, false, true, true);
2068  }
2069 
2070  if (RtlCheckBit(&context->stripes[parity2].error, i)) {
2071  RtlCopyMemory(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2072  &context->parity_scratch2[i * Vcb->superblock.sector_size],
2073  Vcb->superblock.sector_size);
2074 
2075  bad_off1 = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2)) + i;
2076  addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off1 * Vcb->superblock.sector_size);
2077 
2078  context->stripes[parity2].rewrite = true;
2079 
2080  log_error(Vcb, addr, c->devices[parity2]->devitem.dev_id, false, true, true);
2082  }
2083  } else if (num_errors == 1) {
2084  uint32_t crc32a, crc32b, len;
2085  uint16_t stripe_num, bad_stripe_num;
2086  uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off1 * Vcb->superblock.sector_size);
2087  uint8_t* scratch;
2088 
2089  len = RtlCheckBit(&context->is_tree, bad_off1)? Vcb->superblock.node_size : Vcb->superblock.sector_size;
2090 
2092  if (!scratch) {
2093  ERR("out of memory\n");
2094  return;
2095  }
2096 
2097  RtlZeroMemory(scratch, len);
2098 
2099  do_xor(&context->parity_scratch[i * Vcb->superblock.sector_size],
2100  &context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len);
2101 
2102  stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1);
2103 
2104  if (c->devices[parity2]->devobj) {
2105  stripe_num = c->chunk_item->num_stripes - 3;
2106  while (stripe != parity2) {
2107  galois_double(scratch, len);
2108 
2109  if (stripe != bad_stripe1)
2110  do_xor(scratch, &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len);
2111  else
2112  bad_stripe_num = stripe_num;
2113 
2114  stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2115  stripe_num--;
2116  }
2117 
2118  do_xor(scratch, &context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len);
2119 
2120  if (bad_stripe_num != 0)
2121  galois_divpower(scratch, (uint8_t)bad_stripe_num, len);
2122  }
2123 
2124  if (RtlCheckBit(&context->is_tree, bad_off1)) {
2125  tree_header *th1 = NULL, *th2 = NULL;
2126 
2127  if (c->devices[parity1]->devobj) {
2128  th1 = (tree_header*)&context->parity_scratch[i * Vcb->superblock.sector_size];
2129  crc32a = ~calc_crc32c(0xffffffff, (uint8_t*)&th1->fs_uuid, Vcb->superblock.node_size - sizeof(th1->csum));
2130  }
2131 
2132  if (c->devices[parity2]->devobj) {
2133  th2 = (tree_header*)scratch;
2134  crc32b = ~calc_crc32c(0xffffffff, (uint8_t*)&th2->fs_uuid, Vcb->superblock.node_size - sizeof(th2->csum));
2135  }
2136 
2137  if ((c->devices[parity1]->devobj && crc32a == *((uint32_t*)th1->csum) && th1->address == addr) ||
2138  (c->devices[parity2]->devobj && crc32b == *((uint32_t*)th2->csum) && th2->address == addr)) {
2139  if (!c->devices[parity1]->devobj || crc32a != *((uint32_t*)th1->csum) || th1->address != addr) {
2140  RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2141  scratch, Vcb->superblock.node_size);
2142 
2143  if (c->devices[parity1]->devobj) {
2144  // fix parity 1
2145 
2146  stripe = (parity1 + 2) % c->chunk_item->num_stripes;
2147 
2148  RtlCopyMemory(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2149  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2150  Vcb->superblock.node_size);
2151 
2152  stripe = (stripe + 1) % c->chunk_item->num_stripes;
2153 
2154  while (stripe != parity1) {
2155  do_xor(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2156  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2157  Vcb->superblock.node_size);
2158 
2159  stripe = (stripe + 1) % c->chunk_item->num_stripes;
2160  }
2161 
2162  context->stripes[parity1].rewrite = true;
2163 
2164  log_error(Vcb, addr, c->devices[parity1]->devitem.dev_id, false, true, true);
2166  }
2167  } else {
2168  RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2169  &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.node_size);
2170 
2171  if (!c->devices[parity2]->devobj || crc32b != *((uint32_t*)th2->csum) || th2->address != addr) {
2172  // fix parity 2
2173  stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1);
2174 
2175  if (c->devices[parity2]->devobj) {
2176  RtlCopyMemory(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2177  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2178  Vcb->superblock.node_size);
2179 
2180  stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2181 
2182  while (stripe != parity2) {
2183  galois_double(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], Vcb->superblock.node_size);
2184 
2185  do_xor(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2186  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2187  Vcb->superblock.node_size);
2188 
2189  stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2190  }
2191 
2192  context->stripes[parity2].rewrite = true;
2193 
2194  log_error(Vcb, addr, c->devices[parity2]->devitem.dev_id, false, true, true);
2196  }
2197  }
2198  }
2199 
2200  context->stripes[bad_stripe1].rewrite = true;
2201 
2202  RtlClearBits(&context->stripes[bad_stripe1].error, i + 1, (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1);
2203 
2204  log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, true, true, false);
2205  } else
2206  log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, true, false, false);
2207  } else {
2208  if (c->devices[parity1]->devobj)
2209  crc32a = ~calc_crc32c(0xffffffff, &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size);
2210 
2211  if (c->devices[parity2]->devobj)
2212  crc32b = ~calc_crc32c(0xffffffff, scratch, Vcb->superblock.sector_size);
2213 
2214  if ((c->devices[parity1]->devobj && crc32a == context->csum[bad_off1]) || (c->devices[parity2]->devobj && crc32b == context->csum[bad_off1])) {
2215  if (c->devices[parity2]->devobj && crc32b == context->csum[bad_off1]) {
2216  RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2217  scratch, Vcb->superblock.sector_size);
2218 
2219  if (c->devices[parity1]->devobj && crc32a != context->csum[bad_off1]) {
2220  // fix parity 1
2221 
2222  stripe = (parity1 + 2) % c->chunk_item->num_stripes;
2223 
2224  RtlCopyMemory(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2225  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2226  Vcb->superblock.sector_size);
2227 
2228  stripe = (stripe + 1) % c->chunk_item->num_stripes;
2229 
2230  while (stripe != parity1) {
2231  do_xor(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2232  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2233  Vcb->superblock.sector_size);
2234 
2235  stripe = (stripe + 1) % c->chunk_item->num_stripes;
2236  }
2237 
2238  context->stripes[parity1].rewrite = true;
2239 
2240  log_error(Vcb, addr, c->devices[parity1]->devitem.dev_id, false, true, true);
2242  }
2243  } else {
2244  RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2245  &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size);
2246 
2247  if (c->devices[parity2]->devobj && crc32b != context->csum[bad_off1]) {
2248  // fix parity 2
2249  stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1);
2250 
2251  RtlCopyMemory(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2252  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2253  Vcb->superblock.sector_size);
2254 
2255  stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2256 
2257  while (stripe != parity2) {
2258  galois_double(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], Vcb->superblock.sector_size);
2259 
2260  do_xor(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2261  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2262  Vcb->superblock.sector_size);
2263 
2264  stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2265  }
2266 
2267  context->stripes[parity2].rewrite = true;
2268 
2269  log_error(Vcb, addr, c->devices[parity2]->devitem.dev_id, false, true, true);
2271  }
2272  }
2273 
2274  context->stripes[bad_stripe1].rewrite = true;
2275 
2276  log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, false, true, false);
2277  } else
2278  log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, false, false, false);
2279  }
2280 
2281  ExFreePool(scratch);
2282  } else if (num_errors == 2 && missing_devices == 0) {
2283  uint16_t x, y, k;
2284  uint64_t addr;
2285  uint32_t len = (RtlCheckBit(&context->is_tree, bad_off1) || RtlCheckBit(&context->is_tree, bad_off2)) ? Vcb->superblock.node_size : Vcb->superblock.sector_size;
2286  uint8_t gyx, gx, denom, a, b, *p, *q, *pxy, *qxy;
2287  uint32_t j;
2288 
2289  stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1);
2290 
2291  // put qxy in parity_scratch
2292  // put pxy in parity_scratch2
2293 
2294  k = c->chunk_item->num_stripes - 3;
2295  if (stripe == bad_stripe1 || stripe == bad_stripe2) {
2296  RtlZeroMemory(&context->parity_scratch[i * Vcb->superblock.sector_size], len);
2297  RtlZeroMemory(&context->parity_scratch2[i * Vcb->superblock.sector_size], len);
2298 
2299  if (stripe == bad_stripe1)
2300  x = k;
2301  else
2302  y = k;
2303  } else {
2304  RtlCopyMemory(&context->parity_scratch[i * Vcb->superblock.sector_size],
2305  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len);
2306  RtlCopyMemory(&context->parity_scratch2[i * Vcb->superblock.sector_size],
2307  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len);
2308  }
2309 
2310  stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2311 
2312  k--;
2313  do {
2314  galois_double(&context->parity_scratch[i * Vcb->superblock.sector_size], len);
2315 
2316  if (stripe != bad_stripe1 && stripe != bad_stripe2) {
2317  do_xor(&context->parity_scratch[i * Vcb->superblock.sector_size],
2318  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len);
2319  do_xor(&context->parity_scratch2[i * Vcb->superblock.sector_size],
2320  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len);
2321  } else if (stripe == bad_stripe1)
2322  x = k;
2323  else if (stripe == bad_stripe2)
2324  y = k;
2325 
2326  stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2327  k--;
2328  } while (stripe != parity2);
2329 
2330  gyx = gpow2(y > x ? (y-x) : (255-x+y));
2331  gx = gpow2(255-x);
2332 
2333  denom = gdiv(1, gyx ^ 1);
2334  a = gmul(gyx, denom);
2335  b = gmul(gx, denom);
2336 
2337  p = &context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)];
2338  q = &context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)];
2339  pxy = &context->parity_scratch2[i * Vcb->superblock.sector_size];
2340  qxy = &context->parity_scratch[i * Vcb->superblock.sector_size];
2341 
2342  for (j = 0; j < len; j++) {
2343  *qxy = gmul(a, *p ^ *pxy) ^ gmul(b, *q ^ *qxy);
2344 
2345  p++;
2346  q++;
2347  pxy++;
2348  qxy++;
2349  }
2350 
2351  do_xor(&context->parity_scratch2[i * Vcb->superblock.sector_size], &context->parity_scratch[i * Vcb->superblock.sector_size], len);
2352  do_xor(&context->parity_scratch2[i * Vcb->superblock.sector_size], &context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len);
2353 
2354  addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off1 * Vcb->superblock.sector_size);
2355 
2356  if (RtlCheckBit(&context->is_tree, bad_off1)) {
2357  tree_header* th = (tree_header*)&context->parity_scratch[i * Vcb->superblock.sector_size];
2358  uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
2359 
2360  if (crc32 == *((uint32_t*)th->csum) && th->address == addr) {
2361  RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2362  &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.node_size);
2363 
2364  context->stripes[bad_stripe1].rewrite = true;
2365 
2366  RtlClearBits(&context->stripes[bad_stripe1].error, i + 1, (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1);
2367 
2368  log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, true, true, false);
2369  } else
2370  log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, true, false, false);
2371  } else {
2372  uint32_t crc32 = ~calc_crc32c(0xffffffff, &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size);
2373 
2374  if (crc32 == context->csum[bad_off1]) {
2375  RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2376  &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size);
2377 
2378  context->stripes[bad_stripe1].rewrite = true;
2379 
2380  log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, false, true, false);
2381  } else
2382  log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, false, false, false);
2383  }
2384 
2385  addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off2 * Vcb->superblock.sector_size);
2386 
2387  if (RtlCheckBit(&context->is_tree, bad_off2)) {
2388  tree_header* th = (tree_header*)&context->parity_scratch2[i * Vcb->superblock.sector_size];
2389  uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
2390 
2391  if (crc32 == *((uint32_t*)th->csum) && th->address == addr) {
2392  RtlCopyMemory(&context->stripes[bad_stripe2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2393  &context->parity_scratch2[i * Vcb->superblock.sector_size], Vcb->superblock.node_size);
2394 
2395  context->stripes[bad_stripe2].rewrite = true;
2396 
2397  RtlClearBits(&context->stripes[bad_stripe2].error, i + 1, (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1);
2398 
2399  log_error(Vcb, addr, c->devices[bad_stripe2]->devitem.dev_id, true, true, false);
2400  } else
2401  log_error(Vcb, addr, c->devices[bad_stripe2]->devitem.dev_id, true, false, false);
2402  } else {
2403  uint32_t crc32 = ~calc_crc32c(0xffffffff, &context->parity_scratch2[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size);
2404 
2405  if (crc32 == context->csum[bad_off2]) {
2406  RtlCopyMemory(&context->stripes[bad_stripe2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2407  &context->parity_scratch2[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size);
2408 
2409  context->stripes[bad_stripe2].rewrite = true;
2410 
2411  log_error(Vcb, addr, c->devices[bad_stripe2]->devitem.dev_id, false, true, false);
2412  } else
2413  log_error(Vcb, addr, c->devices[bad_stripe2]->devitem.dev_id, false, false, false);
2414  }
2415  } else {
2416  stripe = (parity2 + 1) % c->chunk_item->num_stripes;
2417  off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2)) + i;
2418 
2419  while (stripe != parity1) {
2420  if (c->devices[stripe]->devobj && RtlCheckBit(&context->alloc, off)) {
2421  if (RtlCheckBit(&context->stripes[stripe].error, i)) {
2422  uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size);
2423 
2424  log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, RtlCheckBit(&context->is_tree, off), false, false);
2425  }
2426  }
2427 
2428  off += sectors_per_stripe;
2429  stripe = (stripe + 1) % c->chunk_item->num_stripes;
2430  }
2431  }
2432  }
2433 }
2434 
2436  NTSTATUS Status;
2437  KEY searchkey;
2438  traverse_ptr tp;
2439  bool b;
2440  uint64_t run_start, run_end, full_stripe_len, stripe;
2441  uint32_t max_read, num_sectors;
2442  ULONG arrlen, *allocarr, *csumarr = NULL, *treearr, num_parity_stripes = c->chunk_item->type & BLOCK_FLAG_RAID6 ? 2 : 1;
2444  uint16_t i;
2445  CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
2446 
2447  TRACE("(%p, %p, %I64x, %I64x)\n", Vcb, c, stripe_start, stripe_end);
2448 
2449  full_stripe_len = (c->chunk_item->num_stripes - num_parity_stripes) * c->chunk_item->stripe_length;
2450  run_start = c->offset + (stripe_start * full_stripe_len);
2451  run_end = c->offset + ((stripe_end + 1) * full_stripe_len);
2452 
2453  searchkey.obj_id = run_start;
2454  searchkey.obj_type = TYPE_METADATA_ITEM;
2455  searchkey.offset = 0xffffffffffffffff;
2456 
2457  Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, NULL);
2458  if (!NT_SUCCESS(Status)) {
2459  ERR("find_item returned %08x\n", Status);
2460  return Status;
2461  }
2462 
2463  num_sectors = (uint32_t)((stripe_end - stripe_start + 1) * full_stripe_len / Vcb->superblock.sector_size);
2464  arrlen = (ULONG)sector_align((num_sectors / 8) + 1, sizeof(ULONG));
2465 
2466  allocarr = ExAllocatePoolWithTag(PagedPool, arrlen, ALLOC_TAG);
2467  if (!allocarr) {
2468  ERR("out of memory\n");
2470  }
2471 
2472  treearr = ExAllocatePoolWithTag(PagedPool, arrlen, ALLOC_TAG);
2473  if (!treearr) {
2474  ERR("out of memory\n");
2475  ExFreePool(allocarr);
2477  }
2478 
2479  RtlInitializeBitMap(&context.alloc, allocarr, num_sectors);
2480  RtlClearAllBits(&context.alloc);
2481 
2482  RtlInitializeBitMap(&context.is_tree, treearr, num_sectors);
2483  RtlClearAllBits(&context.is_tree);
2484 
2485  context.parity_scratch = ExAllocatePoolWithTag(PagedPool, (ULONG)c->chunk_item->stripe_length, ALLOC_TAG);
2486  if (!context.parity_scratch) {
2487  ERR("out of memory\n");
2488  ExFreePool(allocarr);
2489  ExFreePool(treearr);
2491  }
2492 
2493  if (c->chunk_item->type & BLOCK_FLAG_DATA) {
2494  csumarr = ExAllocatePoolWithTag(PagedPool, arrlen, ALLOC_TAG);
2495  if (!csumarr) {
2496  ERR("out of memory\n");
2497  ExFreePool(allocarr);
2498  ExFreePool(treearr);
2499  ExFreePool(context.parity_scratch);
2501  }
2502 
2503  RtlInitializeBitMap(&context.has_csum, csumarr, num_sectors);
2504  RtlClearAllBits(&context.has_csum);
2505 
2506  context.csum = ExAllocatePoolWithTag(PagedPool, num_sectors * sizeof(uint32_t), ALLOC_TAG);
2507  if (!context.csum) {
2508  ERR("out of memory\n");
2509  ExFreePool(allocarr);
2510  ExFreePool(treearr);
2511  ExFreePool(context.parity_scratch);
2512  ExFreePool(csumarr);
2514  }
2515  }
2516 
2517  if (c->chunk_item->type & BLOCK_FLAG_RAID6) {
2518  context.parity_scratch2 = ExAllocatePoolWithTag(PagedPool, (ULONG)c->chunk_item->stripe_length, ALLOC_TAG);
2519  if (!context.parity_scratch2) {
2520  ERR("out of memory\n");
2521  ExFreePool(allocarr);
2522  ExFreePool(treearr);
2523  ExFreePool(context.parity_scratch);
2524 
2525  if (c->chunk_item->type & BLOCK_FLAG_DATA) {
2526  ExFreePool(csumarr);
2527  ExFreePool(context.csum);
2528  }
2529 
2531  }
2532  }
2533 
2534  do {
2535  traverse_ptr next_tp;
2536 
2537  if (tp.item->key.obj_id >= run_end)
2538  break;
2539 
2541  uint64_t size = tp.item->key.obj_type == TYPE_METADATA_ITEM ? Vcb->superblock.node_size : tp.item->key.offset;
2542 
2543  if (tp.item->key.obj_id + size > run_start) {
2544  uint64_t extent_start = max(run_start, tp.item->key.obj_id);
2545  uint64_t extent_end = min(tp.item->key.obj_id + size, run_end);
2546  bool extent_is_tree = false;
2547 
2548  RtlSetBits(&context.alloc, (ULONG)((extent_start - run_start) / Vcb->superblock.sector_size), (ULONG)((extent_end - extent_start) / Vcb->superblock.sector_size));
2549 
2551  extent_is_tree = true;
2552  else {
2553  EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data;
2554 
2555  if (tp.item->size < sizeof(EXTENT_ITEM)) {
2556  ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
2558  goto end;
2559  }
2560 
2561  if (ei->flags & EXTENT_ITEM_TREE_BLOCK)
2562  extent_is_tree = true;
2563  }
2564 
2565  if (extent_is_tree)
2566  RtlSetBits(&context.is_tree, (ULONG)((extent_start - run_start) / Vcb->superblock.sector_size), (ULONG)((extent_end - extent_start) / Vcb->superblock.sector_size));
2567  else if (c->chunk_item->type & BLOCK_FLAG_DATA) {
2568  traverse_ptr tp2;
2569  bool b2;
2570 
2571  searchkey.obj_id = EXTENT_CSUM_ID;
2572  searchkey.obj_type = TYPE_EXTENT_CSUM;
2573  searchkey.offset = extent_start;
2574 
2575  Status = find_item(Vcb, Vcb->checksum_root, &tp2, &searchkey, false, NULL);
2576  if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) {
2577  ERR("find_item returned %08x\n", Status);
2578  goto end;
2579  }
2580 
2581  do {
2582  traverse_ptr next_tp2;
2583 
2584  if (tp2.item->key.offset >= extent_end)
2585  break;
2586 
2587  if (tp2.item->key.offset >= extent_start) {
2588  uint64_t csum_start = max(extent_start, tp2.item->key.offset);
2589  uint64_t csum_end = min(extent_end, tp2.item->key.offset + (tp2.item->size * Vcb->superblock.sector_size / sizeof(uint32_t)));
2590 
2591  RtlSetBits(&context.has_csum, (ULONG)((csum_start - run_start) / Vcb->superblock.sector_size), (ULONG)((csum_end - csum_start) / Vcb->superblock.sector_size));
2592 
2593  RtlCopyMemory(&context.csum[(csum_start - run_start) / Vcb->superblock.sector_size],
2594  tp2.item->data + ((csum_start - tp2.item->key.offset) * sizeof(uint32_t) / Vcb->superblock.sector_size),
2595  (ULONG)((csum_end - csum_start) * sizeof(uint32_t) / Vcb->superblock.sector_size));
2596  }
2597 
2598  b2 = find_next_item(Vcb, &tp2, &next_tp2, false, NULL);
2599 
2600  if (b2)
2601  tp2 = next_tp2;
2602  } while (b2);
2603  }
2604  }
2605  }
2606 
2607  b = find_next_item(Vcb, &tp, &next_tp, false, NULL);
2608 
2609  if (b)
2610  tp = next_tp;
2611  } while (b);
2612 
2613  context.stripes = ExAllocatePoolWithTag(PagedPool, sizeof(scrub_context_raid56_stripe) * c->chunk_item->num_stripes, ALLOC_TAG);
2614  if (!context.stripes) {
2615  ERR("out of memory\n");
2617  goto end;
2618  }
2619 
2620  max_read = (uint32_t)min(1048576 / c->chunk_item->stripe_length, stripe_end - stripe_start + 1); // only process 1 MB of data at a time
2621 
2622  for (i = 0; i < c->chunk_item->num_stripes; i++) {
2623  context.stripes[i].buf = ExAllocatePoolWithTag(PagedPool, (ULONG)(max_read * c->chunk_item->stripe_length), ALLOC_TAG);
2624  if (!context.stripes[i].buf) {
2625  uint64_t j;
2626 
2627  ERR("out of memory\n");
2628 
2629  for (j = 0; j < i; j++) {
2630  ExFreePool(context.stripes[j].buf);
2631  }
2632  ExFreePool(context.stripes);
2633 
2635  goto end;
2636  }
2637 
2638  context.stripes[i].errorarr = ExAllocatePoolWithTag(PagedPool, (ULONG)sector_align(((c->chunk_item->stripe_length / Vcb->superblock.sector_size) / 8) + 1, sizeof(ULONG)), ALLOC_TAG);
2639  if (!context.stripes[i].errorarr) {
2640  uint64_t j;
2641 
2642  ERR("out of memory\n");
2643 
2644  ExFreePool(context.stripes[i].buf);
2645 
2646  for (j = 0; j < i; j++) {
2647  ExFreePool(context.stripes[j].buf);
2648  }
2649  ExFreePool(context.stripes);
2650 
2652  goto end;
2653  }
2654 
2655  RtlInitializeBitMap(&context.stripes[i].error, context.stripes[i].errorarr, (ULONG)(c->chunk_item->stripe_length / Vcb->superblock.sector_size));
2656 
2657  context.stripes[i].context = &context;
2658  context.stripes[i].rewrite = false;
2659  }
2660 
2661  stripe = stripe_start;
2662 
2664 
2665  chunk_lock_range(Vcb, c, run_start, run_end - run_start);
2666 
2667  do {
2668  ULONG read_stripes;
2669  uint16_t missing_devices = 0;
2670  bool need_wait = false;
2671 
2672  if (max_read < stripe_end + 1 - stripe)
2673  read_stripes = max_read;
2674  else
2675  read_stripes = (ULONG)(stripe_end + 1 - stripe);
2676 
2677  context.stripes_left = c->chunk_item->num_stripes;
2678 
2679  // read megabyte by megabyte
2680  for (i = 0; i < c->chunk_item->num_stripes; i++) {
2681  if (c->devices[i]->devobj) {
2683 
2684  context.stripes[i].Irp = IoAllocateIrp(c->devices[i]->devobj->StackSize, false);
2685 
2686  if (!context.stripes[i].Irp) {
2687  ERR("IoAllocateIrp failed\n");
2689  goto end3;
2690  }
2691 
2692  context.stripes[i].Irp->MdlAddress = NULL;
2693 
2694  IrpSp = IoGetNextIrpStackLocation(context.stripes[i].Irp);
2696  IrpSp->FileObject = c->devices[i]->fileobj;
2697 
2698  if (c->devices[i]->devobj->Flags & DO_BUFFERED_IO) {
2699  context.stripes[i].Irp->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(read_stripes * c->chunk_item->stripe_length), ALLOC_TAG);
2700  if (!context.stripes[i].Irp->AssociatedIrp.SystemBuffer) {
2701  ERR("out of memory\n");
2703  goto end3;
2704  }
2705 
2707 
2708  context.stripes[i].Irp->UserBuffer = context.stripes[i].buf;
2709  } else if (c->devices[i]->devobj->Flags & DO_DIRECT_IO) {
2710  context.stripes[i].Irp->MdlAddress = IoAllocateMdl(context.stripes[i].buf, (ULONG)(read_stripes * c->chunk_item->stripe_length), false, false, NULL);
2711  if (!context.stripes[i].Irp->MdlAddress) {
2712  ERR("IoAllocateMdl failed\n");
2714  goto end3;
2715  }
2716 
2718 
2719  _SEH2_TRY {
2720  MmProbeAndLockPages(context.stripes[i].Irp->MdlAddress, KernelMode, IoWriteAccess);
2723  } _SEH2_END;
2724 
2725  if (!NT_SUCCESS(Status)) {
2726  ERR("MmProbeAndLockPages threw exception %08x\n", Status);
2727  IoFreeMdl(context.stripes[i].Irp->MdlAddress);
2728  goto end3;
2729  }
2730  } else
2731  context.stripes[i].Irp->UserBuffer = context.stripes[i].buf;
2732 
2733  context.stripes[i].offset = stripe * c->chunk_item->stripe_length;
2734 
2735  IrpSp->Parameters.Read.Length = (ULONG)(read_stripes * c->chunk_item->stripe_length);
2736  IrpSp->Parameters.Read.ByteOffset.QuadPart = cis[i].offset + context.stripes[i].offset;
2737 
2738  context.stripes[i].Irp->UserIosb = &context.stripes[i].iosb;
2739  context.stripes[i].missing = false;
2740 
2741  IoSetCompletionRoutine(context.stripes[i].Irp, scrub_read_completion_raid56, &context.stripes[i], true, true, true);
2742 
2743  Vcb->scrub.data_scrubbed += read_stripes * c->chunk_item->stripe_length;
2744  need_wait = true;
2745  } else {
2746  context.stripes[i].Irp = NULL;
2747  context.stripes[i].missing = true;
2748  missing_devices++;
2749  InterlockedDecrement(&context.stripes_left);
2750  }
2751  }
2752 
2753  if (c->chunk_item->type & BLOCK_FLAG_RAID5 && missing_devices > 1) {
2754  ERR("too many missing devices (%u, maximum 1)\n", missing_devices);
2756  goto end3;
2757  } else if (c->chunk_item->type & BLOCK_FLAG_RAID6 && missing_devices > 2) {
2758  ERR("too many missing devices (%u, maximum 2)\n", missing_devices);
2760  goto end3;
2761  }
2762 
2763  if (need_wait) {
2765 
2766  for (i = 0; i < c->chunk_item->num_stripes; i++) {
2767  if (c->devices[i]->devobj)
2768  IoCallDriver(c->devices[i]->devobj, context.stripes[i].Irp);
2769  }
2770 
2772  }
2773 
2774  // return an error if any of the stripes returned an error
2775  for (i = 0; i < c->chunk_item->num_stripes; i++) {
2776  if (!context.stripes[i].missing && !NT_SUCCESS(context.stripes[i].iosb.Status)) {
2777  Status = context.stripes[i].iosb.Status;
2779  goto end3;
2780  }
2781  }
2782 
2783  if (c->chunk_item->type & BLOCK_FLAG_RAID6) {
2784  for (i = 0; i < read_stripes; i++) {
2785  scrub_raid6_stripe(Vcb, c, &context, stripe_start, stripe, i, missing_devices);
2786  }
2787  } else {
2788  for (i = 0; i < read_stripes; i++) {
2789  scrub_raid5_stripe(Vcb, c, &context, stripe_start, stripe, i, missing_devices);
2790  }
2791  }
2792  stripe += read_stripes;
2793 
2794 end3:
2795  for (i = 0; i < c->chunk_item->num_stripes; i++) {
2796  if (context.stripes[i].Irp) {
2797  if (c->devices[i]->devobj->Flags & DO_DIRECT_IO && context.stripes[i].Irp->MdlAddress) {
2798  MmUnlockPages(context.stripes[i].Irp->MdlAddress);
2799  IoFreeMdl(context.stripes[i].Irp->MdlAddress);
2800  }
2801  IoFreeIrp(context.stripes[i].Irp);
2802  context.stripes[i].Irp = NULL;
2803 
2804  if (context.stripes[i].rewrite) {
2805  Status = write_data_phys(c->devices[i]->devobj, c->devices[i]->fileobj, cis[i].offset + context.stripes[i].offset,
2806  context.stripes[i].buf, (uint32_t)(read_stripes * c->chunk_item->stripe_length));
2807 
2808  if (!NT_SUCCESS(Status)) {
2809  ERR("write_data_phys returned %08x\n", Status);
2811  goto end2;
2812  }
2813  }
2814  }
2815  }
2816 
2817  if (!NT_SUCCESS(Status))
2818  break;
2819  } while (stripe < stripe_end);
2820 
2821 end2:
2822  chunk_unlock_range(Vcb, c, run_start, run_end - run_start);
2823 
2824  for (i = 0; i < c->chunk_item->num_stripes; i++) {
2825  ExFreePool(context.stripes[i].buf);
2826  ExFreePool(context.stripes[i].errorarr);
2827  }
2828  ExFreePool(context.stripes);
2829 
2830 end:
2831  ExFreePool(treearr);
2832  ExFreePool(allocarr);
2833  ExFreePool(context.parity_scratch);
2834 
2835  if (c->chunk_item->type & BLOCK_FLAG_RAID6)
2836  ExFreePool(context.parity_scratch2);
2837 
2838  if (c->chunk_item->type & BLOCK_FLAG_DATA) {
2839  ExFreePool(csumarr);
2840  ExFreePool(context.csum);
2841  }
2842 
2843  return Status;
2844 }
2845 
2847  NTSTATUS Status;
2848  KEY searchkey;
2849  traverse_ptr tp;
2850  bool b;
2851  uint64_t full_stripe_len, stripe, stripe_start, stripe_end, total_data = 0;
2852  ULONG num_extents = 0, num_parity_stripes = c->chunk_item->type & BLOCK_FLAG_RAID6 ? 2 : 1;
2853 
2854  full_stripe_len = (c->chunk_item->num_stripes - num_parity_stripes) * c->chunk_item->stripe_length;
2855  stripe = (*offset - c->offset) / full_stripe_len;
2856 
2857  *offset = c->offset + (stripe * full_stripe_len);
2858 
2859  searchkey.obj_id = *offset;
2860  searchkey.obj_type = TYPE_METADATA_ITEM;
2861  searchkey.offset = 0xffffffffffffffff;
2862 
2863  Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, NULL);
2864  if (!NT_SUCCESS(Status)) {
2865  ERR("find_item returned %08x\n", Status);
2866  return Status;
2867  }
2868 
2869  *changed = false;
2870 
2871  do {
2872  traverse_ptr next_tp;
2873 
2874  if (tp.item->key.obj_id >= c->offset + c->chunk_item->size)
2875  break;
2876 
2878  uint64_t size = tp.item->key.obj_type == TYPE_METADATA_ITEM ? Vcb->superblock.node_size : tp.item->key.offset;
2879 
2880  TRACE("%I64x\n", tp.item->key.obj_id);
2881 
2883  ERR("extent %I64x has size less than sector_size (%I64x < %x)\n", tp.item->key.obj_id, Vcb->superblock.sector_size);
2884  return STATUS_INTERNAL_ERROR;
2885  }
2886 
2887  stripe = (tp.item->key.obj_id - c->offset) / full_stripe_len;
2888 
2889  if (*changed) {
2890  if (stripe > stripe_end + 1) {
2891  Status = scrub_chunk_raid56_stripe_run(Vcb, c, stripe_start, stripe_end);
2892  if (!NT_SUCCESS(Status)) {
2893  ERR("scrub_chunk_raid56_stripe_run returned %08x\n", Status);
2894  return Status;
2895  }
2896 
2897  stripe_start = stripe;
2898  }
2899  } else
2900  stripe_start = stripe;
2901 
2902  stripe_end = (tp.item->key.obj_id + size - 1 - c->offset) / full_stripe_len;
2903 
2904  *changed = true;
2905 
2906  total_data += size;
2907  num_extents++;
2908 
2909  // only do so much at a time
2910  if (num_extents >= 64 || total_data >= 0x8000000) // 128 MB
2911  break;
2912  }
2913 
2914  b = find_next_item(Vcb, &tp, &next_tp, false, NULL);
2915 
2916  if (b)
2917  tp = next_tp;
2918  } while (b);
2919 
2920  if (*changed) {
2921  Status = scrub_chunk_raid56_stripe_run(Vcb, c, stripe_start, stripe_end);
2922  if (!NT_SUCCESS(Status)) {
2923  ERR("scrub_chunk_raid56_stripe_run returned %08x\n", Status);
2924  return Status;
2925  }
2926 
2927  *offset = c->offset + ((stripe_end + 1) * full_stripe_len);
2928  }
2929 
2930  return STATUS_SUCCESS;
2931 }
2932 
2934  NTSTATUS Status;
2935  KEY searchkey;
2936  traverse_ptr tp;
2937  bool b = false, tree_run = false;
2938  ULONG type, num_extents = 0;
2939  uint64_t total_data = 0, tree_run_start, tree_run_end;
2940 
2941  TRACE("chunk %I64x\n", c->offset);
2942 
2943  ExAcquireResourceSharedLite(&Vcb->tree_lock, true);
2944 
2945  if (c->chunk_item->type & BLOCK_FLAG_DUPLICATE)
2947  else if (c->chunk_item->type & BLOCK_FLAG_RAID0)
2949  else if (c->chunk_item->type & BLOCK_FLAG_RAID1)
2951  else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
2953  else if (c->chunk_item->type & BLOCK_FLAG_RAID5) {
2954  Status = scrub_chunk_raid56(Vcb, c, offset, changed);
2955  goto end;
2956  } else if (c->chunk_item->type & BLOCK_FLAG_RAID6) {
2957  Status = scrub_chunk_raid56(Vcb, c, offset, changed);
2958  goto end;
2959  } else // SINGLE
2961 
2962  searchkey.obj_id = *offset;
2963  searchkey.obj_type = TYPE_METADATA_ITEM;
2964  searchkey.offset = 0xffffffffffffffff;
2965 
2966  Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, NULL);
2967  if (!NT_SUCCESS(Status)) {
2968  ERR("error - find_item returned %08x\n", Status);
2969  goto end;
2970  }
2971 
2972  do {
2973  traverse_ptr next_tp;
2974 
2975  if (tp.item->key.obj_id >= c->offset + c->chunk_item->size)
2976  break;
2977 
2979  uint64_t size = tp.item->key.obj_type == TYPE_METADATA_ITEM ? Vcb->superblock.node_size : tp.item->key.offset;
2980  bool is_tree;
2981  uint32_t* csum = NULL;
2982  RTL_BITMAP bmp;
2983  ULONG* bmparr = NULL, bmplen;
2984 
2985  TRACE("%I64x\n", tp.item->key.obj_id);
2986 
2987  is_tree = false;
2988 
2990  is_tree = true;
2991  else {
2992  EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data;
2993 
2994  if (tp.item->size < sizeof(EXTENT_ITEM)) {
2995  ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
2997  goto end;
2998  }
2999 
3000  if (ei->flags & EXTENT_ITEM_TREE_BLOCK)
3001  is_tree = true;
3002  }
3003 
3005  ERR("extent %I64x has size less than sector_size (%I64x < %x)\n", tp.item->key.obj_id, Vcb->superblock.sector_size);
3007  goto end;
3008  }
3009 
3010  // load csum
3011  if (!is_tree) {
3012  traverse_ptr tp2;
3013 
3014  csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(sizeof(uint32_t) * size / Vcb->superblock.sector_size), ALLOC_TAG);
3015  if (!csum) {
3016  ERR("out of memory\n");
3018  goto end;
3019  }
3020 
3021  bmplen = (ULONG)(size / Vcb->superblock.sector_size);
3022 
3023  bmparr = ExAllocatePoolWithTag(PagedPool, (ULONG)(sector_align((bmplen >> 3) + 1, sizeof(ULONG))), ALLOC_TAG);
3024  if (!bmparr) {
3025  ERR("out of memory\n");
3026  ExFreePool(csum);
3028  goto end;
3029  }
3030 
3031  RtlInitializeBitMap(&bmp, bmparr, bmplen);
3032  RtlSetAllBits(&bmp); // 1 = no csum, 0 = csum
3033 
3034  searchkey.obj_id = EXTENT_CSUM_ID;
3035  searchkey.obj_type = TYPE_EXTENT_CSUM;
3036  searchkey.offset = tp.item->key.obj_id;
3037 
3038  Status = find_item(Vcb, Vcb->checksum_root, &tp2, &searchkey, false, NULL);
3039  if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) {
3040  ERR("find_item returned %08x\n", Status);
3041  ExFreePool(csum);
3042  ExFreePool(bmparr);
3043  goto end;
3044  }
3045 
3046  if (Status != STATUS_NOT_FOUND) {
3047  do {
3048  traverse_ptr next_tp2;
3049 
3050  if (tp2.item->key.obj_type == TYPE_EXTENT_CSUM) {
3051  if (tp2.item->key.offset >= tp.item->key.obj_id + size)
3052  break;
3053  else if (tp2.item->size >= sizeof(uint32_t) && tp2.item->key.offset + (tp2.item->size * Vcb->superblock.sector_size / sizeof(uint32_t)) >= tp.item->key.obj_id) {
3054  uint64_t cs = max(tp.item->key.obj_id, tp2.item->key.offset);
3055  uint64_t ce = min(tp.item->key.obj_id + size, tp2.item->key.offset + (tp2.item->size * Vcb->superblock.sector_size / sizeof(uint32_t)));
3056 
3057  RtlCopyMemory(csum + ((cs - tp.item->key.obj_id) / Vcb->superblock.sector_size),
3058  tp2.item->data + ((cs - tp2.item->key.offset) * sizeof(uint32_t) / Vcb->superblock.sector_size),
3059  (ULONG)((ce - cs) * sizeof(uint32_t) / Vcb->superblock.sector_size));
3060 
3061  RtlClearBits(&bmp, (ULONG)((cs - tp.item->key.obj_id) / Vcb->superblock.sector_size), (ULONG)((ce - cs) / Vcb->superblock.sector_size));
3062 
3063  if (ce == tp.item->key.obj_id + size)
3064  break;
3065  }
3066  }
3067 
3068  if (find_next_item(Vcb, &tp2, &next_tp2, false, NULL))
3069  tp2 = next_tp2;
3070  else
3071  break;
3072  } while (true);
3073  }
3074  }
3075 
3076  if (tree_run) {
3077  if (!is_tree || tp.item->key.obj_id > tree_run_end) {
3078  Status = scrub_extent(Vcb, c, type, tree_run_start, (uint32_t)(tree_run_end - tree_run_start), NULL);
3079  if (!NT_SUCCESS(Status)) {
3080  ERR("scrub_extent returned %08x\n", Status);
3081  goto end;
3082  }
3083 
3084  if (!is_tree)
3085  tree_run = false;
3086  else {
3087  tree_run_start = tp.item->key.obj_id;
3088  tree_run_end = tp.item->key.obj_id + Vcb->superblock.node_size;
3089  }
3090  } else
3091  tree_run_end = tp.item->key.obj_id + Vcb->superblock.node_size;
3092  } else if (is_tree) {
3093  tree_run = true;
3094  tree_run_start = tp.item->key.obj_id;
3095  tree_run_end = tp.item->key.obj_id + Vcb->superblock.node_size;
3096  }
3097 
3098  if (!is_tree) {
3099  Status = scrub_data_extent(Vcb, c, tp.item->key.obj_id, type, csum, &bmp, bmplen);
3100  if (!NT_SUCCESS(Status)) {
3101  ERR("scrub_data_extent returned %08x\n", Status);
3102  ExFreePool(csum);
3103  ExFreePool(bmparr);
3104  goto end;
3105  }
3106 
3107  ExFreePool(csum);
3108  ExFreePool(bmparr);
3109  }
3110 
3111  *offset = tp.item->key.obj_id + size;
3112  *changed = true;
3113 
3114  total_data += size;
3115  num_extents++;
3116 
3117  // only do so much at a time
3118  if (num_extents >= 64 || total_data >= 0x8000000) // 128 MB
3119  break;
3120  }
3121 
3122  b = find_next_item(Vcb, &tp, &next_tp, false, NULL);
3123 
3124  if (b)
3125  tp = next_tp;
3126  } while (b);
3127 
3128  if (tree_run) {
3129  Status = scrub_extent(Vcb, c, type, tree_run_start, (uint32_t)(tree_run_end - tree_run_start), NULL);
3130  if (!NT_SUCCESS(Status)) {
3131  ERR("scrub_extent returned %08x\n", Status);
3132  goto end;
3133  }
3134  }
3135 
3137 
3138 end:
3139  ExReleaseResourceLite(&Vcb->tree_lock);
3140 
3141  return Status;
3142 }
3143 
3144 _Function_class_(KSTART_ROUTINE)
3145 static void __stdcall scrub_thread(void* context) {
3147  LIST_ENTRY chunks, *le;
3148  NTSTATUS Status;
3150 
3151  KeInitializeEvent(&Vcb->scrub.finished, NotificationEvent, false);
3152 
3153  InitializeListHead(&chunks);
3154 
3155  ExAcquireResourceExclusiveLite(&Vcb->tree_lock, true);
3156 
3157  if (Vcb->need_write && !Vcb->readonly)
3158  Status = do_write(Vcb, NULL);
3159  else
3161 
3162  free_trees(Vcb);
3163 
3164  if (!NT_SUCCESS(Status)) {
3165  ExReleaseResourceLite(&Vcb->tree_lock);
3166  ERR("do_write returned %08x\n", Status);
3167  Vcb->scrub.error = Status;
3168  goto end;
3169  }
3170 
3171  ExConvertExclusiveToSharedLite(&Vcb->tree_lock);
3172 
3173  ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, true);
3174 
3175  KeQuerySystemTime(&Vcb->scrub.start_time);
3176  Vcb->scrub.finish_time.QuadPart = 0;
3177  Vcb->scrub.resume_time.QuadPart = Vcb->scrub.start_time.QuadPart;
3178  Vcb->scrub.duration.QuadPart = 0;
3179  Vcb->scrub.total_chunks = 0;
3180  Vcb->scrub.chunks_left = 0;
3181  Vcb->scrub.data_scrubbed = 0;
3182  Vcb->scrub.num_errors = 0;
3183 
3184  while (!IsListEmpty(&Vcb->scrub.errors)) {
3186  ExFreePool(err);
3187  }
3188 
3189  ExAcquireResourceSharedLite(&Vcb->chunk_lock, true);
3190 
3191  le = Vcb->chunks.Flink;
3192  while (le != &Vcb->chunks) {
3194 
3196 
3197  if (!c->readonly) {
3198  InsertTailList(&chunks, &c->list_entry_balance);
3199  Vcb->scrub.total_chunks++;
3200  Vcb->scrub.chunks_left++;
3201  }
3202 
3204 
3205  le = le->Flink;
3206  }
3207 
3208  ExReleaseResourceLite(&Vcb->chunk_lock);
3209 
3210  ExReleaseResource(&Vcb->scrub.stats_lock);
3211 
3212  ExReleaseResourceLite(&Vcb->tree_lock);
3213 
3214  while (!IsListEmpty(&chunks)) {
3215  chunk* c = CONTAINING_RECORD(RemoveHeadList(&chunks), chunk, list_entry_balance);
3216  uint64_t offset = c->offset;
3217  bool changed;
3218 
3219  c->reloc = true;
3220 
3221  KeWaitForSingleObject(&Vcb->scrub.event, Executive, KernelMode, false, NULL);
3222 
3223  if (!Vcb->scrub.stopping) {
3224  do {
3225  changed = false;
3226 
3227  Status = scrub_chunk(Vcb, c, &offset, &changed);
3228  if (!NT_SUCCESS(Status)) {
3229  ERR("scrub_chunk returned %08x\n", Status);
3230  Vcb->scrub.stopping = true;
3231  Vcb->scrub.error = Status;
3232  break;
3233  }
3234 
3235  if (offset == c->offset + c->chunk_item->size || Vcb->scrub.stopping)
3236  break;
3237 
3238  KeWaitForSingleObject(&Vcb->scrub.event, Executive, KernelMode, false, NULL);
3239  } while (changed);
3240  }
3241 
3242  ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, true);
3243 
3244  if (!Vcb->scrub.stopping)
3245  Vcb->scrub.chunks_left--;
3246 
3247  if (IsListEmpty(&chunks))
3248  KeQuerySystemTime(&Vcb->scrub.finish_time);
3249 
3250  ExReleaseResource(&Vcb->scrub.stats_lock);
3251 
3252  c->reloc = false;
3253  c->list_entry_balance.Flink = NULL;
3254  }
3255 
3257  Vcb->scrub.duration.QuadPart += time.QuadPart - Vcb->scrub.resume_time.QuadPart;
3258 
3259 end:
3260  ZwClose(Vcb->scrub.thread);
3261  Vcb->scrub.thread = NULL;
3262 
3263  KeSetEvent(&Vcb->scrub.finished, 0, false);
3264 }
3265 
3267  NTSTATUS Status;
3268  OBJECT_ATTRIBUTES oa;
3269 
3270  if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode))
3272 
3273  if (Vcb->locked) {
3274  WARN("cannot start scrub while locked\n");
3275  return STATUS_DEVICE_NOT_READY;
3276  }
3277 
3278  if (Vcb->balance.thread) {
3279  WARN("cannot start scrub while balance running\n");
3280  return STATUS_DEVICE_NOT_READY;
3281  }
3282 
3283  if (Vcb->scrub.thread) {
3284  WARN("scrub already running\n");
3285  return STATUS_DEVICE_NOT_READY;
3286  }
3287 
3288  if (Vcb->readonly)
3290 
3291  Vcb->scrub.stopping = false;
3292  Vcb->scrub.paused = false;
3293  Vcb->scrub.error = STATUS_SUCCESS;
3294  KeInitializeEvent(&Vcb->scrub.event, NotificationEvent, !Vcb->scrub.paused);
3295 
3297 
3298  Status = PsCreateSystemThread(&Vcb->scrub.thread, 0, &oa, NULL, NULL, scrub_thread, Vcb);
3299  if (!NT_SUCCESS(Status)) {
3300  ERR("PsCreateSystemThread returned %08x\n", Status);
3301  return Status;
3302  }
3303 
3304  return STATUS_SUCCESS;
3305 }
3306 
3309  ULONG len;
3310  NTSTATUS Status;
3311  LIST_ENTRY* le;
3312  btrfs_scrub_error* bse = NULL;
3313 
3314  if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode))
3316 
3317  if (length < offsetof(btrfs_query_scrub, errors))
3318  return STATUS_BUFFER_TOO_SMALL;
3319 
3320  ExAcquireResourceSharedLite(&Vcb->scrub.stats_lock, true);
3321 
3322  if (Vcb->scrub.thread && Vcb->scrub.chunks_left > 0)
3323  bqs->status = Vcb->scrub.paused ? BTRFS_SCRUB_PAUSED : BTRFS_SCRUB_RUNNING;
3324  else
3325  bqs->status = BTRFS_SCRUB_STOPPED;
3326 
3327  bqs->start_time.QuadPart = Vcb->scrub.start_time.QuadPart;
3328  bqs->finish_time.QuadPart = Vcb->scrub.finish_time.QuadPart;
3329  bqs->chunks_left = Vcb->scrub.chunks_left;
3330  bqs->total_chunks = Vcb->scrub.total_chunks;
3331  bqs->data_scrubbed = Vcb->scrub.data_scrubbed;
3332 
3333  bqs->duration = Vcb->scrub.duration.QuadPart;
3334 
3335  if (bqs->status == BTRFS_SCRUB_RUNNING) {
3337 
3339  bqs->duration += time.QuadPart - Vcb->scrub.resume_time.QuadPart;
3340  }
3341 
3342  bqs->error = Vcb->scrub.error;
3343 
3344  bqs->num_errors = Vcb->scrub.num_errors;
3345 
3346  len = length - offsetof(btrfs_query_scrub, errors);
3347 
3348  le = Vcb->scrub.errors.Flink;
3349  while (le != &Vcb->scrub.errors) {
3351  ULONG errlen;
3352 
3353  if (err->is_metadata)
3354  errlen = offsetof(btrfs_scrub_error, metadata.firstitem) + sizeof(KEY);
3355  else
3356  errlen = offsetof(btrfs_scrub_error, data.filename) + err->data.filename_length;
3357 
3358  if (len < errlen) {
3360  goto end;
3361  }
3362 
3363  if (!bse)
3364  bse = &bqs->errors;
3365  else {
3366  ULONG lastlen;
3367 
3368  if (bse->is_metadata)
3369  lastlen = offsetof(btrfs_scrub_error, metadata.firstitem) + sizeof(KEY);
3370  else
3371  lastlen = offsetof(btrfs_scrub_error, data.filename) + bse->data.filename_length;
3372 
3373  bse->next_entry = lastlen;
3374  bse = (btrfs_scrub_error*)(((uint8_t*)bse) + lastlen);
3375  }
3376 
3377  bse->next_entry = 0;
3378  bse->address = err->address;
3379  bse->device = err->device;
3380  bse->recovered = err->recovered;
3381  bse->is_metadata = err->is_metadata;
3382  bse->parity = err->parity;
3383 
3384  if (err->is_metadata) {
3385  bse->metadata.root = err->metadata.root;
3386  bse->metadata.level = err->metadata.level;
3387  bse->metadata.firstitem = err->metadata.firstitem;
3388  } else {
3389  bse->data.subvol = err->data.subvol;
3390  bse->data.offset = err->data.offset;
3391  bse->data.filename_length = err->data.filename_length;
3392  RtlCopyMemory(bse->data.filename, err->data.filename, err->data.filename_length);
3393  }
3394 
3395  len -= errlen;
3396  le = le->Flink;
3397  }
3398 
3400 
3401 end:
3402  ExReleaseResourceLite(&Vcb->scrub.stats_lock);
3403 
3404  return Status;
3405 }
3406 
3409 
3410  if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode))
3412 
3413  if (!Vcb->scrub.thread)
3414  return STATUS_DEVICE_NOT_READY;
3415 
3416  if (Vcb->scrub.paused)
3417  return STATUS_DEVICE_NOT_READY;
3418 
3419  Vcb->scrub.paused = true;
3420  KeClearEvent(&Vcb->scrub.event);
3421 
3423  Vcb->scrub.duration.QuadPart += time.QuadPart - Vcb->scrub.resume_time.QuadPart;
3424 
3425  return STATUS_SUCCESS;
3426 }
3427 
3429  if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(