ReactOS  0.4.10-dev-19-g39281f0
scrub.c
Go to the documentation of this file.
1 /* Copyright (c) Mark Harmstone 2017
2  *
3  * This file is part of WinBtrfs.
4  *
5  * WinBtrfs is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public Licence as published by
7  * the Free Software Foundation, either version 3 of the Licence, or
8  * (at your option) any later version.
9  *
10  * WinBtrfs is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser General Public Licence for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public Licence
16  * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
17 
18 #include "btrfs_drv.h"
19 
20 #define SCRUB_UNIT 0x100000 // 1 MB
21 
22 struct _scrub_context;
23 
24 typedef struct {
34 
35 typedef struct _scrub_context {
40 
41 typedef struct {
45 } path_part;
46 
48  LIST_ENTRY *le, parts;
49  root* r = NULL;
50  KEY searchkey;
52  UINT64 dir;
53  BOOL orig_subvol = TRUE, not_in_tree = FALSE;
57  ULONG utf16len;
58 
59  le = Vcb->roots.Flink;
60  while (le != &Vcb->roots) {
62 
63  if (r2->id == subvol) {
64  r = r2;
65  break;
66  }
67 
68  le = le->Flink;
69  }
70 
71  if (!r) {
72  ERR("could not find subvol %llx\n", subvol);
73  return;
74  }
75 
76  InitializeListHead(&parts);
77 
78  dir = inode;
79 
80  while (TRUE) {
81  if (dir == r->root_item.objid) {
82  if (r == Vcb->root_fileref->fcb->subvol)
83  break;
84 
85  searchkey.obj_id = r->id;
86  searchkey.obj_type = TYPE_ROOT_BACKREF;
87  searchkey.offset = 0xffffffffffffffff;
88 
89  Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, NULL);
90  if (!NT_SUCCESS(Status)) {
91  ERR("find_item returned %08x\n", Status);
92  goto end;
93  }
94 
95  if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
96  ROOT_REF* rr = (ROOT_REF*)tp.item->data;
97  path_part* pp;
98 
99  if (tp.item->size < sizeof(ROOT_REF)) {
100  ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(ROOT_REF));
101  goto end;
102  }
103 
104  if (tp.item->size < offsetof(ROOT_REF, name[0]) + rr->n) {
105  ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
106  tp.item->size, offsetof(ROOT_REF, name[0]) + rr->n);
107  goto end;
108  }
109 
111  if (!pp) {
112  ERR("out of memory\n");
113  goto end;
114  }
115 
116  pp->name.Buffer = rr->name;
117  pp->name.Length = pp->name.MaximumLength = rr->n;
118  pp->orig_subvol = FALSE;
119 
120  InsertTailList(&parts, &pp->list_entry);
121 
122  r = NULL;
123 
124  le = Vcb->roots.Flink;
125  while (le != &Vcb->roots) {
127 
128  if (r2->id == tp.item->key.offset) {
129  r = r2;
130  break;
131  }
132 
133  le = le->Flink;
134  }
135 
136  if (!r) {
137  ERR("could not find subvol %llx\n", tp.item->key.offset);
138  goto end;
139  }
140 
141  dir = rr->dir;
142  orig_subvol = FALSE;
143  } else {
144  not_in_tree = TRUE;
145  break;
146  }
147  } else {
148  searchkey.obj_id = dir;
149  searchkey.obj_type = TYPE_INODE_EXTREF;
150  searchkey.offset = 0xffffffffffffffff;
151 
152  Status = find_item(Vcb, r, &tp, &searchkey, FALSE, NULL);
153  if (!NT_SUCCESS(Status)) {
154  ERR("find_item returned %08x\n", Status);
155  goto end;
156  }
157 
158  if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == TYPE_INODE_REF) {
159  INODE_REF* ir = (INODE_REF*)tp.item->data;
160  path_part* pp;
161 
162  if (tp.item->size < sizeof(INODE_REF)) {
163  ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(INODE_REF));
164  goto end;
165  }
166 
167  if (tp.item->size < offsetof(INODE_REF, name[0]) + ir->n) {
168  ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
169  tp.item->size, offsetof(INODE_REF, name[0]) + ir->n);
170  goto end;
171  }
172 
174  if (!pp) {
175  ERR("out of memory\n");
176  goto end;
177  }
178 
179  pp->name.Buffer = ir->name;
180  pp->name.Length = pp->name.MaximumLength = ir->n;
181  pp->orig_subvol = orig_subvol;
182 
183  InsertTailList(&parts, &pp->list_entry);
184 
185  if (dir == tp.item->key.offset)
186  break;
187 
188  dir = tp.item->key.offset;
189  } else if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == TYPE_INODE_EXTREF) {
190  INODE_EXTREF* ier = (INODE_EXTREF*)tp.item->data;
191  path_part* pp;
192 
193  if (tp.item->size < sizeof(INODE_EXTREF)) {
194  ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
195  tp.item->size, sizeof(INODE_EXTREF));
196  goto end;
197  }
198 
199  if (tp.item->size < offsetof(INODE_EXTREF, name[0]) + ier->n) {
200  ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
201  tp.item->size, offsetof(INODE_EXTREF, name[0]) + ier->n);
202  goto end;
203  }
204 
206  if (!pp) {
207  ERR("out of memory\n");
208  goto end;
209  }
210 
211  pp->name.Buffer = ier->name;
212  pp->name.Length = pp->name.MaximumLength = ier->n;
213  pp->orig_subvol = orig_subvol;
214 
215  InsertTailList(&parts, &pp->list_entry);
216 
217  if (dir == ier->dir)
218  break;
219 
220  dir = ier->dir;
221  } else {
222  ERR("could not find INODE_REF for inode %llx in subvol %llx\n", dir, r->id);
223  goto end;
224  }
225  }
226  }
227 
228  fn.MaximumLength = 0;
229 
230  if (not_in_tree) {
231  le = parts.Blink;
232  while (le != &parts) {
234  LIST_ENTRY* le2 = le->Blink;
235 
236  if (pp->orig_subvol)
237  break;
238 
239  RemoveTailList(&parts);
240  ExFreePool(pp);
241 
242  le = le2;
243  }
244  }
245 
246  le = parts.Flink;
247  while (le != &parts) {
249 
250  fn.MaximumLength += pp->name.Length + 1;
251 
252  le = le->Flink;
253  }
254 
256  if (!fn.Buffer) {
257  ERR("out of memory\n");
258  goto end;
259  }
260 
261  fn.Length = 0;
262 
263  le = parts.Blink;
264  while (le != &parts) {
266 
267  fn.Buffer[fn.Length] = '\\';
268  fn.Length++;
269 
270  RtlCopyMemory(&fn.Buffer[fn.Length], pp->name.Buffer, pp->name.Length);
271  fn.Length += pp->name.Length;
272 
273  le = le->Blink;
274  }
275 
276  if (not_in_tree)
277  ERR("subvol %llx, %.*s, offset %llx\n", subvol, fn.Length, fn.Buffer, offset);
278  else
279  ERR("%.*s, offset %llx\n", fn.Length, fn.Buffer, offset);
280 
281  Status = RtlUTF8ToUnicodeN(NULL, 0, &utf16len, fn.Buffer, fn.Length);
282  if (!NT_SUCCESS(Status)) {
283  ERR("RtlUTF8ToUnicodeN 1 returned %08x\n", Status);
284  ExFreePool(fn.Buffer);
285  goto end;
286  }
287 
289  if (!err) {
290  ERR("out of memory\n");
291  ExFreePool(fn.Buffer);
292  goto end;
293  }
294 
295  err->address = addr;
296  err->device = devid;
297  err->recovered = FALSE;
298  err->is_metadata = FALSE;
299  err->parity = FALSE;
300 
301  err->data.subvol = not_in_tree ? subvol : 0;
302  err->data.offset = offset;
303  err->data.filename_length = (UINT16)utf16len;
304 
305  Status = RtlUTF8ToUnicodeN(err->data.filename, utf16len, &utf16len, fn.Buffer, fn.Length);
306  if (!NT_SUCCESS(Status)) {
307  ERR("RtlUTF8ToUnicodeN 2 returned %08x\n", Status);
308  ExFreePool(fn.Buffer);
309  ExFreePool(err);
310  goto end;
311  }
312 
314 
315  Vcb->scrub.num_errors++;
316  InsertTailList(&Vcb->scrub.errors, &err->list_entry);
317 
319 
320  ExFreePool(fn.Buffer);
321 
322 end:
323  while (!IsListEmpty(&parts)) {
325 
326  ExFreePool(pp);
327  }
328 }
329 
331  tree_header* tree;
333  leaf_node* ln;
334  ULONG i;
335 
337  if (!tree) {
338  ERR("out of memory\n");
339  return;
340  }
341 
342  Status = read_data(Vcb, treeaddr, Vcb->superblock.node_size, NULL, TRUE, (UINT8*)tree, NULL, NULL, NULL, 0, FALSE, NormalPagePriority);
343  if (!NT_SUCCESS(Status)) {
344  ERR("read_data returned %08x\n", Status);
345  goto end;
346  }
347 
348  if (tree->level != 0) {
349  ERR("tree level was %x, expected 0\n", tree->level);
350  goto end;
351  }
352 
353  ln = (leaf_node*)&tree[1];
354 
355  for (i = 0; i < tree->num_items; i++) {
356  if (ln[i].key.obj_type == TYPE_EXTENT_DATA && ln[i].size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
357  EXTENT_DATA* ed = (EXTENT_DATA*)((UINT8*)tree + sizeof(tree_header) + ln[i].offset);
359 
360  if (ed->type == EXTENT_TYPE_REGULAR && ed2->size != 0 && ed2->address == addr)
361  log_file_checksum_error(Vcb, addr, devid, tree->tree_id, ln[i].key.obj_id, ln[i].key.offset + addr - extent);
362  }
363  }
364 
365 end:
366  ExFreePool(tree);
367 }
368 
370  scrub_error* err;
371 
373  if (!err) {
374  ERR("out of memory\n");
375  return;
376  }
377 
378  err->address = addr;
379  err->device = devid;
380  err->recovered = FALSE;
381  err->is_metadata = TRUE;
382  err->parity = FALSE;
383 
384  err->metadata.root = root;
385  err->metadata.level = level;
386 
387  if (firstitem) {
388  ERR("root %llx, level %u, first item (%llx,%x,%llx)\n", root, level, firstitem->obj_id,
389  firstitem->obj_type, firstitem->offset);
390 
391  err->metadata.firstitem = *firstitem;
392  } else {
393  ERR("root %llx, level %u\n", root, level);
394 
395  RtlZeroMemory(&err->metadata.firstitem, sizeof(KEY));
396  }
397 
399 
400  Vcb->scrub.num_errors++;
401  InsertTailList(&Vcb->scrub.errors, &err->list_entry);
402 
404 }
405 
407  tree_header* tree;
409  internal_node* in;
410  ULONG i;
411 
413  if (!tree) {
414  ERR("out of memory\n");
415  return;
416  }
417 
418  Status = read_data(Vcb, offset, Vcb->superblock.node_size, NULL, TRUE, (UINT8*)tree, NULL, NULL, NULL, 0, FALSE, NormalPagePriority);
419  if (!NT_SUCCESS(Status)) {
420  ERR("read_data returned %08x\n", Status);
421  goto end;
422  }
423 
424  if (tree->level == 0) {
425  ERR("tree level was 0\n");
426  goto end;
427  }
428 
429  in = (internal_node*)&tree[1];
430 
431  for (i = 0; i < tree->num_items; i++) {
432  if (in[i].address == address) {
433  log_tree_checksum_error(Vcb, address, devid, tree->tree_id, tree->level - 1, &in[i].key);
434  break;
435  }
436  }
437 
438 end:
439  ExFreePool(tree);
440 }
441 
443  KEY searchkey;
446  EXTENT_ITEM* ei;
447  EXTENT_ITEM2* ei2 = NULL;
448  UINT8* ptr;
449  ULONG len;
450  UINT64 rc;
451 
452  // FIXME - still log even if rest of this function fails
453 
454  searchkey.obj_id = address;
455  searchkey.obj_type = TYPE_METADATA_ITEM;
456  searchkey.offset = 0xffffffffffffffff;
457 
458  Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, NULL);
459  if (!NT_SUCCESS(Status)) {
460  ERR("find_item returned %08x\n", Status);
461  return;
462  }
463 
465  tp.item->key.obj_id >= address + Vcb->superblock.sector_size ||
466  (tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->key.obj_id + tp.item->key.offset <= address) ||
467  (tp.item->key.obj_type == TYPE_METADATA_ITEM && tp.item->key.obj_id + Vcb->superblock.node_size <= address)
468  )
469  return;
470 
471  if (tp.item->size < sizeof(EXTENT_ITEM)) {
472  ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
473  return;
474  }
475 
476  ei = (EXTENT_ITEM*)tp.item->data;
477  ptr = (UINT8*)&ei[1];
478  len = tp.item->size - sizeof(EXTENT_ITEM);
479 
481  if (tp.item->size < sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2)) {
482  ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
483  tp.item->size, sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2));
484  return;
485  }
486 
487  ei2 = (EXTENT_ITEM2*)ptr;
488 
489  ptr += sizeof(EXTENT_ITEM2);
490  len -= sizeof(EXTENT_ITEM2);
491  }
492 
493  rc = 0;
494 
495  while (len > 0) {
496  UINT8 type = *ptr;
497 
498  ptr++;
499  len--;
500 
501  if (type == TYPE_TREE_BLOCK_REF) {
502  TREE_BLOCK_REF* tbr;
503 
504  if (len < sizeof(TREE_BLOCK_REF)) {
505  ERR("TREE_BLOCK_REF takes up %u bytes, but only %u remaining\n", sizeof(TREE_BLOCK_REF), len);
506  break;
507  }
508 
509  tbr = (TREE_BLOCK_REF*)ptr;
510 
511  log_tree_checksum_error(Vcb, address, devid, tbr->offset, ei2 ? ei2->level : (UINT8)tp.item->key.offset, ei2 ? &ei2->firstitem : NULL);
512 
513  rc++;
514 
515  ptr += sizeof(TREE_BLOCK_REF);
516  len -= sizeof(TREE_BLOCK_REF);
517  } else if (type == TYPE_EXTENT_DATA_REF) {
518  EXTENT_DATA_REF* edr;
519 
520  if (len < sizeof(EXTENT_DATA_REF)) {
521  ERR("EXTENT_DATA_REF takes up %u bytes, but only %u remaining\n", sizeof(EXTENT_DATA_REF), len);
522  break;
523  }
524 
525  edr = (EXTENT_DATA_REF*)ptr;
526 
527  log_file_checksum_error(Vcb, address, devid, edr->root, edr->objid, edr->offset + address - tp.item->key.obj_id);
528 
529  rc += edr->count;
530 
531  ptr += sizeof(EXTENT_DATA_REF);
532  len -= sizeof(EXTENT_DATA_REF);
533  } else if (type == TYPE_SHARED_BLOCK_REF) {
534  SHARED_BLOCK_REF* sbr;
535 
536  if (len < sizeof(SHARED_BLOCK_REF)) {
537  ERR("SHARED_BLOCK_REF takes up %u bytes, but only %u remaining\n", sizeof(SHARED_BLOCK_REF), len);
538  break;
539  }
540 
541  sbr = (SHARED_BLOCK_REF*)ptr;
542 
543  log_tree_checksum_error_shared(Vcb, sbr->offset, address, devid);
544 
545  rc++;
546 
547  ptr += sizeof(SHARED_BLOCK_REF);
548  len -= sizeof(SHARED_BLOCK_REF);
549  } else if (type == TYPE_SHARED_DATA_REF) {
550  SHARED_DATA_REF* sdr;
551 
552  if (len < sizeof(SHARED_DATA_REF)) {
553  ERR("SHARED_DATA_REF takes up %u bytes, but only %u remaining\n", sizeof(SHARED_DATA_REF), len);
554  break;
555  }
556 
557  sdr = (SHARED_DATA_REF*)ptr;
558 
559  log_file_checksum_error_shared(Vcb, sdr->offset, address, devid, tp.item->key.obj_id);
560 
561  rc += sdr->count;
562 
563  ptr += sizeof(SHARED_DATA_REF);
564  len -= sizeof(SHARED_DATA_REF);
565  } else {
566  ERR("unknown extent type %x\n", type);
567  break;
568  }
569  }
570 
571  if (rc < ei->refcount) {
572  do {
573  traverse_ptr next_tp;
574 
575  if (find_next_item(Vcb, &tp, &next_tp, FALSE, NULL))
576  tp = next_tp;
577  else
578  break;
579 
580  if (tp.item->key.obj_id == address) {
582  log_tree_checksum_error(Vcb, address, devid, tp.item->key.offset, ei2 ? ei2->level : (UINT8)tp.item->key.offset, ei2 ? &ei2->firstitem : NULL);
583  else if (tp.item->key.obj_type == TYPE_EXTENT_DATA_REF) {
584  EXTENT_DATA_REF* edr;
585 
586  if (tp.item->size < sizeof(EXTENT_DATA_REF)) {
587  ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
588  tp.item->size, sizeof(EXTENT_DATA_REF));
589  break;
590  }
591 
592  edr = (EXTENT_DATA_REF*)tp.item->data;
593 
594  log_file_checksum_error(Vcb, address, devid, edr->root, edr->objid, edr->offset + address - tp.item->key.obj_id);
595  } else if (tp.item->key.obj_type == TYPE_SHARED_BLOCK_REF)
596  log_tree_checksum_error_shared(Vcb, tp.item->key.offset, address, devid);
597  else if (tp.item->key.obj_type == TYPE_SHARED_DATA_REF)
598  log_file_checksum_error_shared(Vcb, tp.item->key.offset, address, devid, tp.item->key.obj_id);
599  } else
600  break;
601  } while (TRUE);
602  }
603 }
604 
605 static void log_error(device_extension* Vcb, UINT64 addr, UINT64 devid, BOOL metadata, BOOL recoverable, BOOL parity) {
606  if (recoverable) {
607  scrub_error* err;
608 
609  if (parity) {
610  ERR("recovering from parity error at %llx on device %llx\n", addr, devid);
611  } else {
612  if (metadata)
613  ERR("recovering from metadata checksum error at %llx on device %llx\n", addr, devid);
614  else
615  ERR("recovering from data checksum error at %llx on device %llx\n", addr, devid);
616  }
617 
619  if (!err) {
620  ERR("out of memory\n");
621  return;
622  }
623 
624  err->address = addr;
625  err->device = devid;
626  err->recovered = TRUE;
627  err->is_metadata = metadata;
628  err->parity = parity;
629 
630  if (metadata)
631  RtlZeroMemory(&err->metadata, sizeof(err->metadata));
632  else
633  RtlZeroMemory(&err->data, sizeof(err->data));
634 
636 
637  Vcb->scrub.num_errors++;
638  InsertTailList(&Vcb->scrub.errors, &err->list_entry);
639 
641  } else {
642  if (metadata)
643  ERR("unrecoverable metadata checksum error at %llx\n", addr);
644  else
645  ERR("unrecoverable data checksum error at %llx\n", addr);
646 
647  log_unrecoverable_error(Vcb, addr, devid);
648  }
649 }
650 
651 _Function_class_(IO_COMPLETION_ROUTINE)
652 #ifdef __REACTOS__
653 static NTSTATUS NTAPI scrub_read_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
654 #else
655 static NTSTATUS scrub_read_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
656 #endif
657  scrub_context_stripe* stripe = conptr;
660 
661  UNUSED(DeviceObject);
662 
663  stripe->iosb = Irp->IoStatus;
664 
665  if (left == 0)
666  KeSetEvent(&context->Event, 0, FALSE);
667 
669 }
670 
673  BOOL csum_error = FALSE;
674  ULONG i;
676  UINT16 present_devices = 0;
677 
678  if (csum) {
679  ULONG good_stripe = 0xffffffff;
680 
681  for (i = 0; i < c->chunk_item->num_stripes; i++) {
682  if (c->devices[i]->devobj) {
683  present_devices++;
684 
685  // if first stripe is okay, we only need to check that the others are identical to it
686  if (good_stripe != 0xffffffff) {
687  if (RtlCompareMemory(context->stripes[i].buf, context->stripes[good_stripe].buf,
688  context->stripes[good_stripe].length) != context->stripes[i].length) {
689  context->stripes[i].csum_error = TRUE;
690  csum_error = TRUE;
692  }
693  } else {
694  Status = check_csum(Vcb, context->stripes[i].buf, context->stripes[i].length / Vcb->superblock.sector_size, csum);
695  if (Status == STATUS_CRC_ERROR) {
696  context->stripes[i].csum_error = TRUE;
697  csum_error = TRUE;
699  } else if (!NT_SUCCESS(Status)) {
700  ERR("check_csum returned %08x\n", Status);
701  return Status;
702  } else
703  good_stripe = i;
704  }
705  }
706  }
707  } else {
708  ULONG good_stripe = 0xffffffff;
709 
710  for (i = 0; i < c->chunk_item->num_stripes; i++) {
711  ULONG j;
712 
713  if (c->devices[i]->devobj) {
714  // if first stripe is okay, we only need to check that the others are identical to it
715  if (good_stripe != 0xffffffff) {
716  if (RtlCompareMemory(context->stripes[i].buf, context->stripes[good_stripe].buf,
717  context->stripes[good_stripe].length) != context->stripes[i].length) {
718  context->stripes[i].csum_error = TRUE;
719  csum_error = TRUE;
721  }
722  } else {
723  for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) {
724  tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size];
725  UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
726 
727  if (crc32 != *((UINT32*)th->csum) || th->address != offset + UInt32x32To64(j, Vcb->superblock.node_size)) {
728  context->stripes[i].csum_error = TRUE;
729  csum_error = TRUE;
731  }
732  }
733 
734  if (!context->stripes[i].csum_error)
735  good_stripe = i;
736  }
737  }
738  }
739  }
740 
741  if (!csum_error)
742  return STATUS_SUCCESS;
743 
744  // handle checksum error
745 
746  for (i = 0; i < c->chunk_item->num_stripes; i++) {
747  if (context->stripes[i].csum_error) {
748  if (csum) {
750  if (!context->stripes[i].bad_csums) {
751  ERR("out of memory\n");
753  }
754 
755  Status = calc_csum(Vcb, context->stripes[i].buf, context->stripes[i].length / Vcb->superblock.sector_size, context->stripes[i].bad_csums);
756  if (!NT_SUCCESS(Status)) {
757  ERR("calc_csum returned %08x\n", Status);
758  return Status;
759  }
760  } else {
761  ULONG j;
762 
764  if (!context->stripes[i].bad_csums) {
765  ERR("out of memory\n");
767  }
768 
769  for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) {
770  tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size];
771  UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
772 
773  context->stripes[i].bad_csums[j] = crc32;
774  }
775  }
776  }
777  }
778 
779  if (present_devices > 1) {
780  ULONG good_stripe = 0xffffffff;
781 
782  for (i = 0; i < c->chunk_item->num_stripes; i++) {
783  if (c->devices[i]->devobj && !context->stripes[i].csum_error) {
784  good_stripe = i;
785  break;
786  }
787  }
788 
789  if (good_stripe != 0xffffffff) {
790  // log
791 
792  for (i = 0; i < c->chunk_item->num_stripes; i++) {
793  if (context->stripes[i].csum_error) {
794  ULONG j;
795 
796  if (csum) {
797  for (j = 0; j < context->stripes[i].length / Vcb->superblock.sector_size; j++) {
798  if (context->stripes[i].bad_csums[j] != csum[j]) {
799  UINT64 addr = offset + UInt32x32To64(j, Vcb->superblock.sector_size);
800 
801  log_error(Vcb, addr, c->devices[i]->devitem.dev_id, FALSE, TRUE, FALSE);
803  }
804  }
805  } else {
806  for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) {
807  tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size];
808  UINT64 addr = offset + UInt32x32To64(j, Vcb->superblock.node_size);
809 
810  if (context->stripes[i].bad_csums[j] != *((UINT32*)th->csum) || th->address != addr) {
811  log_error(Vcb, addr, c->devices[i]->devitem.dev_id, TRUE, TRUE, FALSE);
813  }
814  }
815  }
816  }
817  }
818 
819  // write good data over bad
820 
821  for (i = 0; i < c->chunk_item->num_stripes; i++) {
822  if (context->stripes[i].csum_error && !c->devices[i]->readonly) {
823  Status = write_data_phys(c->devices[i]->devobj, cis[i].offset + offset - c->offset,
824  context->stripes[good_stripe].buf, context->stripes[i].length);
825 
826  if (!NT_SUCCESS(Status)) {
827  ERR("write_data_phys returned %08x\n", Status);
829  return Status;
830  }
831  }
832  }
833 
834  return STATUS_SUCCESS;
835  }
836 
837  // if csum errors on all stripes, check sector by sector
838 
839  for (i = 0; i < c->chunk_item->num_stripes; i++) {
840  ULONG j;
841 
842  if (c->devices[i]->devobj) {
843  if (csum) {
844  for (j = 0; j < context->stripes[i].length / Vcb->superblock.sector_size; j++) {
845  if (context->stripes[i].bad_csums[j] != csum[j]) {
846  ULONG k;
847  UINT64 addr = offset + UInt32x32To64(j, Vcb->superblock.sector_size);
848  BOOL recovered = FALSE;
849 
850  for (k = 0; k < c->chunk_item->num_stripes; k++) {
851  if (i != k && c->devices[k]->devobj && context->stripes[k].bad_csums[j] == csum[j]) {
852  log_error(Vcb, addr, c->devices[i]->devitem.dev_id, FALSE, TRUE, FALSE);
854 
855  RtlCopyMemory(context->stripes[i].buf + (j * Vcb->superblock.sector_size),
856  context->stripes[k].buf + (j * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
857 
858  recovered = TRUE;
859  break;
860  }
861  }
862 
863  if (!recovered) {
864  log_error(Vcb, addr, c->devices[i]->devitem.dev_id, FALSE, FALSE, FALSE);
866  }
867  }
868  }
869  } else {
870  for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) {
871  tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size];
872  UINT64 addr = offset + UInt32x32To64(j, Vcb->superblock.node_size);
873 
874  if (context->stripes[i].bad_csums[j] != *((UINT32*)th->csum) || th->address != addr) {
875  ULONG k;
876  BOOL recovered = FALSE;
877 
878  for (k = 0; k < c->chunk_item->num_stripes; k++) {
879  if (i != k && c->devices[k]->devobj) {
880  tree_header* th2 = (tree_header*)&context->stripes[k].buf[j * Vcb->superblock.node_size];
881 
882  if (context->stripes[k].bad_csums[j] == *((UINT32*)th2->csum) && th2->address == addr) {
883  log_error(Vcb, addr, c->devices[i]->devitem.dev_id, TRUE, TRUE, FALSE);
885 
886  RtlCopyMemory(th, th2, Vcb->superblock.node_size);
887 
888  recovered = TRUE;
889  break;
890  }
891  }
892  }
893 
894  if (!recovered) {
895  log_error(Vcb, addr, c->devices[i]->devitem.dev_id, TRUE, FALSE, FALSE);
897  }
898  }
899  }
900  }
901  }
902  }
903 
904  // write good data over bad
905 
906  for (i = 0; i < c->chunk_item->num_stripes; i++) {
907  if (c->devices[i]->devobj && !c->devices[i]->readonly) {
908  Status = write_data_phys(c->devices[i]->devobj, cis[i].offset + offset - c->offset,
909  context->stripes[i].buf, context->stripes[i].length);
910  if (!NT_SUCCESS(Status)) {
911  ERR("write_data_phys returned %08x\n", Status);
913  return Status;
914  }
915  }
916  }
917 
918  return STATUS_SUCCESS;
919  }
920 
921  for (i = 0; i < c->chunk_item->num_stripes; i++) {
922  if (c->devices[i]->devobj) {
923  ULONG j;
924 
925  if (csum) {
926  for (j = 0; j < context->stripes[i].length / Vcb->superblock.sector_size; j++) {
927  if (context->stripes[i].bad_csums[j] != csum[j]) {
928  UINT64 addr = offset + UInt32x32To64(j, Vcb->superblock.sector_size);
929 
930  log_error(Vcb, addr, c->devices[i]->devitem.dev_id, FALSE, FALSE, FALSE);
931  }
932  }
933  } else {
934  for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) {
935  tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size];
936  UINT64 addr = offset + UInt32x32To64(j, Vcb->superblock.node_size);
937 
938  if (context->stripes[i].bad_csums[j] != *((UINT32*)th->csum) || th->address != addr)
939  log_error(Vcb, addr, c->devices[i]->devitem.dev_id, TRUE, FALSE, FALSE);
940  }
941  }
942  }
943  }
944 
945  return STATUS_SUCCESS;
946 }
947 
949  ULONG j;
950  UINT16 stripe;
951  UINT32 pos, *stripeoff;
952 
953  pos = 0;
955  if (!stripeoff) {
956  ERR("out of memory\n");
958  }
959 
960  RtlZeroMemory(stripeoff, sizeof(UINT32) * c->chunk_item->num_stripes);
961 
962  stripe = startoffstripe;
963  while (pos < length) {
964  UINT32 readlen;
965 
966  if (pos == 0)
967  readlen = (UINT32)min(context->stripes[stripe].length, c->chunk_item->stripe_length - (context->stripes[stripe].start % c->chunk_item->stripe_length));
968  else
969  readlen = min(length - pos, (UINT32)c->chunk_item->stripe_length);
970 
971  if (csum) {
972  for (j = 0; j < readlen; j += Vcb->superblock.sector_size) {
973  UINT32 crc32 = ~calc_crc32c(0xffffffff, context->stripes[stripe].buf + stripeoff[stripe], Vcb->superblock.sector_size);
974 
975  if (crc32 != csum[pos / Vcb->superblock.sector_size]) {
976  UINT64 addr = offset + pos;
977 
978  log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, FALSE, FALSE, FALSE);
980  }
981 
982  pos += Vcb->superblock.sector_size;
983  stripeoff[stripe] += Vcb->superblock.sector_size;
984  }
985  } else {
986  for (j = 0; j < readlen; j += Vcb->superblock.node_size) {
987  tree_header* th = (tree_header*)(context->stripes[stripe].buf + stripeoff[stripe]);
988  UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
989  UINT64 addr = offset + pos;
990 
991  if (crc32 != *((UINT32*)th->csum) || th->address != addr) {
992  log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, TRUE, FALSE, FALSE);
994  }
995 
996  pos += Vcb->superblock.node_size;
997  stripeoff[stripe] += Vcb->superblock.node_size;
998  }
999  }
1000 
1001  stripe = (stripe + 1) % c->chunk_item->num_stripes;
1002  }
1003 
1004  ExFreePool(stripeoff);
1005 
1006  return STATUS_SUCCESS;
1007 }
1008 
1010  ULONG j;
1011  UINT16 stripe, sub_stripes = max(c->chunk_item->sub_stripes, 1);
1012  UINT32 pos, *stripeoff;
1013  BOOL csum_error = FALSE;
1014  NTSTATUS Status;
1015 
1016  pos = 0;
1017  stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * c->chunk_item->num_stripes / sub_stripes, ALLOC_TAG);
1018  if (!stripeoff) {
1019  ERR("out of memory\n");
1021  }
1022 
1023  RtlZeroMemory(stripeoff, sizeof(UINT32) * c->chunk_item->num_stripes / sub_stripes);
1024 
1025  stripe = startoffstripe;
1026  while (pos < length) {
1027  UINT32 readlen;
1028 
1029  if (pos == 0)
1030  readlen = (UINT32)min(context->stripes[stripe * sub_stripes].length,
1031  c->chunk_item->stripe_length - (context->stripes[stripe * sub_stripes].start % c->chunk_item->stripe_length));
1032  else
1033  readlen = min(length - pos, (UINT32)c->chunk_item->stripe_length);
1034 
1035  if (csum) {
1036  ULONG good_stripe = 0xffffffff;
1037  UINT16 k;
1038 
1039  for (k = 0; k < sub_stripes; k++) {
1040  if (c->devices[(stripe * sub_stripes) + k]->devobj) {
1041  // if first stripe is okay, we only need to check that the others are identical to it
1042  if (good_stripe != 0xffffffff) {
1043  if (RtlCompareMemory(context->stripes[(stripe * sub_stripes) + k].buf + stripeoff[stripe],
1044  context->stripes[(stripe * sub_stripes) + good_stripe].buf + stripeoff[stripe],
1045  readlen) != readlen) {
1046  context->stripes[(stripe * sub_stripes) + k].csum_error = TRUE;
1047  csum_error = TRUE;
1048  log_device_error(Vcb, c->devices[(stripe * sub_stripes) + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1049  }
1050  } else {
1051  for (j = 0; j < readlen; j += Vcb->superblock.sector_size) {
1052  UINT32 crc32 = ~calc_crc32c(0xffffffff, context->stripes[(stripe * sub_stripes) + k].buf + stripeoff[stripe] + j, Vcb->superblock.sector_size);
1053 
1054  if (crc32 != csum[(pos + j) / Vcb->superblock.sector_size]) {
1055  csum_error = TRUE;
1056  context->stripes[(stripe * sub_stripes) + k].csum_error = TRUE;
1057  log_device_error(Vcb, c->devices[(stripe * sub_stripes) + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1058  break;
1059  }
1060  }
1061 
1062  if (!context->stripes[(stripe * sub_stripes) + k].csum_error)
1063  good_stripe = k;
1064  }
1065  }
1066  }
1067 
1068  pos += readlen;
1069  stripeoff[stripe] += readlen;
1070  } else {
1071  ULONG good_stripe = 0xffffffff;
1072  UINT16 k;
1073 
1074  for (k = 0; k < sub_stripes; k++) {
1075  if (c->devices[(stripe * sub_stripes) + k]->devobj) {
1076  // if first stripe is okay, we only need to check that the others are identical to it
1077  if (good_stripe != 0xffffffff) {
1078  if (RtlCompareMemory(context->stripes[(stripe * sub_stripes) + k].buf + stripeoff[stripe],
1079  context->stripes[(stripe * sub_stripes) + good_stripe].buf + stripeoff[stripe],
1080  readlen) != readlen) {
1081  context->stripes[(stripe * sub_stripes) + k].csum_error = TRUE;
1082  csum_error = TRUE;
1083  log_device_error(Vcb, c->devices[(stripe * sub_stripes) + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1084  }
1085  } else {
1086  for (j = 0; j < readlen; j += Vcb->superblock.node_size) {
1087  tree_header* th = (tree_header*)(context->stripes[(stripe * sub_stripes) + k].buf + stripeoff[stripe] + j);
1088  UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1089  UINT64 addr = offset + pos + j;
1090 
1091  if (crc32 != *((UINT32*)th->csum) || th->address != addr) {
1092  csum_error = TRUE;
1093  context->stripes[(stripe * sub_stripes) + k].csum_error = TRUE;
1094  log_device_error(Vcb, c->devices[(stripe * sub_stripes) + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1095  break;
1096  }
1097  }
1098 
1099  if (!context->stripes[(stripe * sub_stripes) + k].csum_error)
1100  good_stripe = k;
1101  }
1102  }
1103  }
1104 
1105  pos += readlen;
1106  stripeoff[stripe] += readlen;
1107  }
1108 
1109  stripe = (stripe + 1) % (c->chunk_item->num_stripes / sub_stripes);
1110  }
1111 
1112  if (!csum_error) {
1113  Status = STATUS_SUCCESS;
1114  goto end;
1115  }
1116 
1117  for (j = 0; j < c->chunk_item->num_stripes; j += sub_stripes) {
1118  ULONG goodstripe = 0xffffffff;
1119  UINT16 k;
1120  BOOL hasbadstripe = FALSE;
1121 
1122  if (context->stripes[j].length == 0)
1123  continue;
1124 
1125  for (k = 0; k < sub_stripes; k++) {
1126  if (c->devices[j + k]->devobj) {
1127  if (!context->stripes[j + k].csum_error)
1128  goodstripe = k;
1129  else
1130  hasbadstripe = TRUE;
1131  }
1132  }
1133 
1134  if (hasbadstripe) {
1135  if (goodstripe != 0xffffffff) {
1136  for (k = 0; k < sub_stripes; k++) {
1137  if (c->devices[j + k]->devobj && context->stripes[j + k].csum_error) {
1138  UINT32 so = 0;
1139  BOOL recovered = FALSE;
1140 
1141  pos = 0;
1142 
1143  stripe = startoffstripe;
1144  while (pos < length) {
1145  UINT32 readlen;
1146 
1147  if (pos == 0)
1148  readlen = (UINT32)min(context->stripes[stripe * sub_stripes].length,
1149  c->chunk_item->stripe_length - (context->stripes[stripe * sub_stripes].start % c->chunk_item->stripe_length));
1150  else
1151  readlen = min(length - pos, (UINT32)c->chunk_item->stripe_length);
1152 
1153  if (stripe == j / sub_stripes) {
1154  if (csum) {
1155  ULONG l;
1156 
1157  for (l = 0; l < readlen; l += Vcb->superblock.sector_size) {
1158  if (RtlCompareMemory(context->stripes[j + k].buf + so,
1159  context->stripes[j + goodstripe].buf + so,
1160  Vcb->superblock.sector_size) != Vcb->superblock.sector_size) {
1161  UINT64 addr = offset + pos;
1162 
1163  log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, FALSE, TRUE, FALSE);
1164 
1165  recovered = TRUE;
1166  }
1167 
1168  pos += Vcb->superblock.sector_size;
1169  so += Vcb->superblock.sector_size;
1170  }
1171  } else {
1172  ULONG l;
1173 
1174  for (l = 0; l < readlen; l += Vcb->superblock.node_size) {
1175  if (RtlCompareMemory(context->stripes[j + k].buf + so,
1176  context->stripes[j + goodstripe].buf + so,
1177  Vcb->superblock.node_size) != Vcb->superblock.node_size) {
1178  UINT64 addr = offset + pos;
1179 
1180  log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, TRUE, TRUE, FALSE);
1181 
1182  recovered = TRUE;
1183  }
1184 
1185  pos += Vcb->superblock.node_size;
1186  so += Vcb->superblock.node_size;
1187  }
1188  }
1189  } else
1190  pos += readlen;
1191 
1192  stripe = (stripe + 1) % (c->chunk_item->num_stripes / sub_stripes);
1193  }
1194 
1195  if (recovered) {
1196  // write good data over bad
1197 
1198  if (!c->devices[j + k]->readonly) {
1200 
1201  Status = write_data_phys(c->devices[j + k]->devobj, cis[j + k].offset + offset - c->offset,
1202  context->stripes[j + goodstripe].buf, context->stripes[j + goodstripe].length);
1203 
1204  if (!NT_SUCCESS(Status)) {
1205  ERR("write_data_phys returned %08x\n", Status);
1207  goto end;
1208  }
1209  }
1210  }
1211  }
1212  }
1213  } else {
1214  UINT32 so = 0;
1215  BOOL recovered = FALSE;
1216 
1217  if (csum) {
1218  for (k = 0; k < sub_stripes; k++) {
1219  if (c->devices[j + k]->devobj) {
1220  context->stripes[j + k].bad_csums = ExAllocatePoolWithTag(PagedPool, context->stripes[j + k].length * sizeof(UINT32) / Vcb->superblock.sector_size, ALLOC_TAG);
1221  if (!context->stripes[j + k].bad_csums) {
1222  ERR("out of memory\n");
1224  goto end;
1225  }
1226 
1227  Status = calc_csum(Vcb, context->stripes[j + k].buf, context->stripes[j + k].length / Vcb->superblock.sector_size, context->stripes[j + k].bad_csums);
1228  if (!NT_SUCCESS(Status)) {
1229  ERR("calc_csum returned %08x\n", Status);
1230  goto end;
1231  }
1232  }
1233  }
1234  } else {
1235  for (k = 0; k < sub_stripes; k++) {
1236  if (c->devices[j + k]->devobj) {
1237  ULONG l;
1238 
1239  context->stripes[j + k].bad_csums = ExAllocatePoolWithTag(PagedPool, context->stripes[j + k].length * sizeof(UINT32) / Vcb->superblock.node_size, ALLOC_TAG);
1240  if (!context->stripes[j + k].bad_csums) {
1241  ERR("out of memory\n");
1243  goto end;
1244  }
1245 
1246  for (l = 0; l < context->stripes[j + k].length / Vcb->superblock.node_size; l++) {
1247  tree_header* th = (tree_header*)&context->stripes[j + k].buf[l * Vcb->superblock.node_size];
1248  UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1249 
1250  context->stripes[j + k].bad_csums[l] = crc32;
1251  }
1252  }
1253  }
1254  }
1255 
1256  pos = 0;
1257 
1258  stripe = startoffstripe;
1259  while (pos < length) {
1260  UINT32 readlen;
1261 
1262  if (pos == 0)
1263  readlen = (UINT32)min(context->stripes[stripe * sub_stripes].length,
1264  c->chunk_item->stripe_length - (context->stripes[stripe * sub_stripes].start % c->chunk_item->stripe_length));
1265  else
1266  readlen = min(length - pos, (UINT32)c->chunk_item->stripe_length);
1267 
1268  if (stripe == j / sub_stripes) {
1269  ULONG l;
1270 
1271  if (csum) {
1272  for (l = 0; l < readlen; l += Vcb->superblock.sector_size) {
1273  UINT32 crc32 = csum[pos / Vcb->superblock.sector_size];
1274  BOOL has_error = FALSE;
1275 
1276  goodstripe = 0xffffffff;
1277  for (k = 0; k < sub_stripes; k++) {
1278  if (c->devices[j + k]->devobj) {
1279  if (context->stripes[j + k].bad_csums[so / Vcb->superblock.sector_size] != crc32)
1280  has_error = TRUE;
1281  else
1282  goodstripe = k;
1283  }
1284  }
1285 
1286  if (has_error) {
1287  if (goodstripe != 0xffffffff) {
1288  for (k = 0; k < sub_stripes; k++) {
1289  if (c->devices[j + k]->devobj && context->stripes[j + k].bad_csums[so / Vcb->superblock.sector_size] != crc32) {
1290  UINT64 addr = offset + pos;
1291 
1292  log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, FALSE, TRUE, FALSE);
1293 
1294  recovered = TRUE;
1295 
1296  RtlCopyMemory(context->stripes[j + k].buf + so, context->stripes[j + goodstripe].buf + so,
1297  Vcb->superblock.sector_size);
1298  }
1299  }
1300  } else {
1301  UINT64 addr = offset + pos;
1302 
1303  for (k = 0; k < sub_stripes; k++) {
1304  if (c->devices[j + j]->devobj) {
1305  log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, FALSE, FALSE, FALSE);
1307  }
1308  }
1309  }
1310  }
1311 
1312  pos += Vcb->superblock.sector_size;
1313  so += Vcb->superblock.sector_size;
1314  }
1315  } else {
1316  for (l = 0; l < readlen; l += Vcb->superblock.node_size) {
1317  for (k = 0; k < sub_stripes; k++) {
1318  if (c->devices[j + k]->devobj) {
1319  tree_header* th = (tree_header*)&context->stripes[j + k].buf[so];
1320  UINT64 addr = offset + pos;
1321 
1322  if (context->stripes[j + k].bad_csums[so / Vcb->superblock.node_size] != *((UINT32*)th->csum) || th->address != addr) {
1323  ULONG m;
1324 
1325  recovered = FALSE;
1326 
1327  for (m = 0; m < sub_stripes; m++) {
1328  if (m != k) {
1329  tree_header* th2 = (tree_header*)&context->stripes[j + m].buf[so];
1330 
1331  if (context->stripes[j + m].bad_csums[so / Vcb->superblock.node_size] == *((UINT32*)th2->csum) && th2->address == addr) {
1332  log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, TRUE, TRUE, FALSE);
1333 
1334  RtlCopyMemory(th, th2, Vcb->superblock.node_size);
1335 
1336  recovered = TRUE;
1337  break;
1338  } else
1340  }
1341  }
1342 
1343  if (!recovered)
1344  log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, TRUE, FALSE, FALSE);
1345  }
1346  }
1347  }
1348 
1349  pos += Vcb->superblock.node_size;
1350  so += Vcb->superblock.node_size;
1351  }
1352  }
1353  } else
1354  pos += readlen;
1355 
1356  stripe = (stripe + 1) % (c->chunk_item->num_stripes / sub_stripes);
1357  }
1358 
1359  if (recovered) {
1360  // write good data over bad
1361 
1362  for (k = 0; k < sub_stripes; k++) {
1363  if (c->devices[j + k]->devobj && !c->devices[j + k]->readonly) {
1365 
1366  Status = write_data_phys(c->devices[j + k]->devobj, cis[j + k].offset + offset - c->offset,
1367  context->stripes[j + k].buf, context->stripes[j + k].length);
1368 
1369  if (!NT_SUCCESS(Status)) {
1370  ERR("write_data_phys returned %08x\n", Status);
1372  goto end;
1373  }
1374  }
1375  }
1376  }
1377  }
1378  }
1379  }
1380 
1381  Status = STATUS_SUCCESS;
1382 
1383 end:
1384  ExFreePool(stripeoff);
1385 
1386  return Status;
1387 }
1388 
1390  ULONG i;
1392  CHUNK_ITEM_STRIPE* cis;
1393  NTSTATUS Status;
1394  UINT16 startoffstripe, num_missing, allowed_missing;
1395 
1396  TRACE("(%p, %p, %llx, %llx, %p)\n", Vcb, c, offset, size, csum);
1397 
1399  if (!context.stripes) {
1400  ERR("out of memory\n");
1402  goto end;
1403  }
1404 
1406 
1407  context.stripes_left = 0;
1408 
1409  cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
1410 
1411  if (type == BLOCK_FLAG_RAID0) {
1412  UINT64 startoff, endoff;
1413  UINT16 endoffstripe;
1414 
1415  get_raid0_offset(offset - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &startoff, &startoffstripe);
1416  get_raid0_offset(offset + size - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &endoff, &endoffstripe);
1417 
1418  for (i = 0; i < c->chunk_item->num_stripes; i++) {
1419  if (startoffstripe > i)
1420  context.stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1421  else if (startoffstripe == i)
1422  context.stripes[i].start = startoff;
1423  else
1424  context.stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length);
1425 
1426  if (endoffstripe > i)
1427  context.stripes[i].length = (UINT32)(endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length - context.stripes[i].start);
1428  else if (endoffstripe == i)
1429  context.stripes[i].length = (UINT32)(endoff + 1 - context.stripes[i].start);
1430  else
1431  context.stripes[i].length = (UINT32)(endoff - (endoff % c->chunk_item->stripe_length) - context.stripes[i].start);
1432  }
1433 
1434  allowed_missing = 0;
1435  } else if (type == BLOCK_FLAG_RAID10) {
1436  UINT64 startoff, endoff;
1437  UINT16 endoffstripe, j, sub_stripes = max(c->chunk_item->sub_stripes, 1);
1438 
1439  get_raid0_offset(offset - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes / sub_stripes, &startoff, &startoffstripe);
1440  get_raid0_offset(offset + size - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes / sub_stripes, &endoff, &endoffstripe);
1441 
1442  if ((c->chunk_item->num_stripes % sub_stripes) != 0) {
1443  ERR("chunk %llx: num_stripes %x was not a multiple of sub_stripes %x!\n", c->offset, c->chunk_item->num_stripes, sub_stripes);
1444  Status = STATUS_INTERNAL_ERROR;
1445  goto end;
1446  }
1447 
1448  startoffstripe *= sub_stripes;
1449  endoffstripe *= sub_stripes;
1450 
1451  for (i = 0; i < c->chunk_item->num_stripes; i += sub_stripes) {
1452  if (startoffstripe > i)
1453  context.stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1454  else if (startoffstripe == i)
1455  context.stripes[i].start = startoff;
1456  else
1457  context.stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length);
1458 
1459  if (endoffstripe > i)
1460  context.stripes[i].length = (UINT32)(endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length - context.stripes[i].start);
1461  else if (endoffstripe == i)
1462  context.stripes[i].length = (UINT32)(endoff + 1 - context.stripes[i].start);
1463  else
1464  context.stripes[i].length = (UINT32)(endoff - (endoff % c->chunk_item->stripe_length) - context.stripes[i].start);
1465 
1466  for (j = 1; j < sub_stripes; j++) {
1467  context.stripes[i+j].start = context.stripes[i].start;
1468  context.stripes[i+j].length = context.stripes[i].length;
1469  }
1470  }
1471 
1472  startoffstripe /= sub_stripes;
1473  allowed_missing = 1;
1474  } else
1475  allowed_missing = c->chunk_item->num_stripes - 1;
1476 
1477  num_missing = 0;
1478 
1479  for (i = 0; i < c->chunk_item->num_stripes; i++) {
1481 
1482  context.stripes[i].context = (struct _scrub_context*)&context;
1483 
1484  if (type == BLOCK_FLAG_DUPLICATE) {
1485  context.stripes[i].start = offset - c->offset;
1486  context.stripes[i].length = size;
1487  } else if (type != BLOCK_FLAG_RAID0 && type != BLOCK_FLAG_RAID10) {
1488  ERR("unexpected chunk type %x\n", type);
1489  Status = STATUS_INTERNAL_ERROR;
1490  goto end;
1491  }
1492 
1493  if (!c->devices[i]->devobj) {
1494  num_missing++;
1495 
1496  if (num_missing > allowed_missing) {
1497  ERR("too many missing devices (at least %u, maximum allowed %u)\n", num_missing, allowed_missing);
1498  Status = STATUS_INTERNAL_ERROR;
1499  goto end;
1500  }
1501  } else if (context.stripes[i].length > 0) {
1503 
1504  if (!context.stripes[i].buf) {
1505  ERR("out of memory\n");
1507  goto end;
1508  }
1509 
1510  context.stripes[i].Irp = IoAllocateIrp(c->devices[i]->devobj->StackSize, FALSE);
1511 
1512  if (!context.stripes[i].Irp) {
1513  ERR("IoAllocateIrp failed\n");
1515  goto end;
1516  }
1517 
1518  IrpSp = IoGetNextIrpStackLocation(context.stripes[i].Irp);
1519  IrpSp->MajorFunction = IRP_MJ_READ;
1520 
1521  if (c->devices[i]->devobj->Flags & DO_BUFFERED_IO) {
1522  context.stripes[i].Irp->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, context.stripes[i].length, ALLOC_TAG);
1523  if (!context.stripes[i].Irp->AssociatedIrp.SystemBuffer) {
1524  ERR("out of memory\n");
1526  goto end;
1527  }
1528 
1530 
1531  context.stripes[i].Irp->UserBuffer = context.stripes[i].buf;
1532  } else if (c->devices[i]->devobj->Flags & DO_DIRECT_IO) {
1533  context.stripes[i].Irp->MdlAddress = IoAllocateMdl(context.stripes[i].buf, context.stripes[i].length, FALSE, FALSE, NULL);
1534  if (!context.stripes[i].Irp->MdlAddress) {
1535  ERR("IoAllocateMdl failed\n");
1537  goto end;
1538  }
1539 
1540  Status = STATUS_SUCCESS;
1541 
1542  _SEH2_TRY {
1543  MmProbeAndLockPages(context.stripes[i].Irp->MdlAddress, KernelMode, IoWriteAccess);
1545  Status = _SEH2_GetExceptionCode();
1546  } _SEH2_END;
1547 
1548  if (!NT_SUCCESS(Status)) {
1549  ERR("MmProbeAndLockPages threw exception %08x\n", Status);
1550  IoFreeMdl(context.stripes[i].Irp->MdlAddress);
1551  context.stripes[i].Irp->MdlAddress = NULL;
1552  goto end;
1553  }
1554  } else
1555  context.stripes[i].Irp->UserBuffer = context.stripes[i].buf;
1556 
1557  IrpSp->Parameters.Read.Length = context.stripes[i].length;
1558  IrpSp->Parameters.Read.ByteOffset.QuadPart = context.stripes[i].start + cis[i].offset;
1559 
1560  context.stripes[i].Irp->UserIosb = &context.stripes[i].iosb;
1561 
1562  IoSetCompletionRoutine(context.stripes[i].Irp, scrub_read_completion, &context.stripes[i], TRUE, TRUE, TRUE);
1563 
1564  context.stripes_left++;
1565 
1566  Vcb->scrub.data_scrubbed += context.stripes[i].length;
1567  }
1568  }
1569 
1570  if (context.stripes_left == 0) {
1571  ERR("error - not reading any stripes\n");
1572  Status = STATUS_INTERNAL_ERROR;
1573  goto end;
1574  }
1575 
1577 
1578  for (i = 0; i < c->chunk_item->num_stripes; i++) {
1579  if (c->devices[i]->devobj && context.stripes[i].length > 0)
1580  IoCallDriver(c->devices[i]->devobj, context.stripes[i].Irp);
1581  }
1582 
1584 
1585  // return an error if any of the stripes returned an error
1586  for (i = 0; i < c->chunk_item->num_stripes; i++) {
1587  if (!NT_SUCCESS(context.stripes[i].iosb.Status)) {
1588  Status = context.stripes[i].iosb.Status;
1590  goto end;
1591  }
1592  }
1593 
1594  if (type == BLOCK_FLAG_DUPLICATE) {
1595  Status = scrub_extent_dup(Vcb, c, offset, csum, &context);
1596  if (!NT_SUCCESS(Status)) {
1597  ERR("scrub_extent_dup returned %08x\n", Status);
1598  goto end;
1599  }
1600  } else if (type == BLOCK_FLAG_RAID0) {
1601  Status = scrub_extent_raid0(Vcb, c, offset, size, startoffstripe, csum, &context);
1602  if (!NT_SUCCESS(Status)) {
1603  ERR("scrub_extent_raid0 returned %08x\n", Status);
1604  goto end;
1605  }
1606  } else if (type == BLOCK_FLAG_RAID10) {
1607  Status = scrub_extent_raid10(Vcb, c, offset, size, startoffstripe, csum, &context);
1608  if (!NT_SUCCESS(Status)) {
1609  ERR("scrub_extent_raid10 returned %08x\n", Status);
1610  goto end;
1611  }
1612  }
1613 
1614 end:
1615  if (context.stripes) {
1616  for (i = 0; i < c->chunk_item->num_stripes; i++) {
1617  if (context.stripes[i].Irp) {
1618  if (c->devices[i]->devobj->Flags & DO_DIRECT_IO && context.stripes[i].Irp->MdlAddress) {
1619  MmUnlockPages(context.stripes[i].Irp->MdlAddress);
1620  IoFreeMdl(context.stripes[i].Irp->MdlAddress);
1621  }
1622  IoFreeIrp(context.stripes[i].Irp);
1623  }
1624 
1625  if (context.stripes[i].buf)
1626  ExFreePool(context.stripes[i].buf);
1627 
1628  if (context.stripes[i].bad_csums)
1629  ExFreePool(context.stripes[i].bad_csums);
1630  }
1631 
1632  ExFreePool(context.stripes);
1633  }
1634 
1635  return Status;
1636 }
1637 
1639  NTSTATUS Status;
1640  ULONG runlength, index;
1641 
1642  runlength = RtlFindFirstRunClear(bmp, &index);
1643 
1644  while (runlength != 0) {
1645  do {
1646  ULONG rl;
1647 
1648  if (runlength * Vcb->superblock.sector_size > SCRUB_UNIT)
1649  rl = SCRUB_UNIT / Vcb->superblock.sector_size;
1650  else
1651  rl = runlength;
1652 
1653  Status = scrub_extent(Vcb, c, type, offset + UInt32x32To64(index, Vcb->superblock.sector_size), rl * Vcb->superblock.sector_size, &csum[index]);
1654  if (!NT_SUCCESS(Status)) {
1655  ERR("scrub_data_extent_dup returned %08x\n", Status);
1656  return Status;
1657  }
1658 
1659  runlength -= rl;
1660  index += rl;
1661  } while (runlength > 0);
1662 
1663  runlength = RtlFindNextForwardRunClear(bmp, index, &index);
1664  }
1665 
1666  return STATUS_SUCCESS;
1667 }
1668 
1669 typedef struct {
1672  void* context;
1675  BOOL rewrite, missing;
1679 
1680 typedef struct {
1691 
1692 _Function_class_(IO_COMPLETION_ROUTINE)
1693 #ifdef __REACTOS__
1694 static NTSTATUS NTAPI scrub_read_completion_raid56(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
1695 #else
1696 static NTSTATUS scrub_read_completion_raid56(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
1697 #endif
1701 
1702  UNUSED(DeviceObject);
1703 
1704  stripe->iosb = Irp->IoStatus;
1705 
1706  if (left == 0)
1707  KeSetEvent(&context->Event, 0, FALSE);
1708 
1710 }
1711 
1713  UINT64 num, UINT16 missing_devices) {
1714  ULONG sectors_per_stripe = (ULONG)(c->chunk_item->stripe_length / Vcb->superblock.sector_size), i, off;
1715  UINT16 stripe, parity = (bit_start + num + c->chunk_item->num_stripes - 1) % c->chunk_item->num_stripes;
1716  UINT64 stripeoff;
1717 
1718  stripe = (parity + 1) % c->chunk_item->num_stripes;
1719  off = (ULONG)(bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 1);
1720  stripeoff = num * sectors_per_stripe;
1721 
1722  if (missing_devices == 0)
1723  RtlCopyMemory(context->parity_scratch, &context->stripes[parity].buf[num * c->chunk_item->stripe_length], (ULONG)c->chunk_item->stripe_length);
1724 
1725  while (stripe != parity) {
1726  RtlClearAllBits(&context->stripes[stripe].error);
1727 
1728  for (i = 0; i < sectors_per_stripe; i++) {
1729  if (c->devices[stripe]->devobj && RtlCheckBit(&context->alloc, off)) {
1730  if (RtlCheckBit(&context->is_tree, off)) {
1731  tree_header* th = (tree_header*)&context->stripes[stripe].buf[stripeoff * Vcb->superblock.sector_size];
1732  UINT64 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size);
1733  UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1734 
1735  if (crc32 != *((UINT32*)th->csum) || th->address != addr) {
1736  RtlSetBits(&context->stripes[stripe].error, i, Vcb->superblock.node_size / Vcb->superblock.sector_size);
1738 
1739  if (missing_devices > 0)
1740  log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, TRUE, FALSE, FALSE);
1741  }
1742 
1743  off += Vcb->superblock.node_size / Vcb->superblock.sector_size;
1744  stripeoff += Vcb->superblock.node_size / Vcb->superblock.sector_size;
1745  i += (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1;
1746 
1747  continue;
1748  } else if (RtlCheckBit(&context->has_csum, off)) {
1749  UINT32 crc32 = ~calc_crc32c(0xffffffff, context->stripes[stripe].buf + (stripeoff * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1750 
1751  if (crc32 != context->csum[off]) {
1752  RtlSetBit(&context->stripes[stripe].error, i);
1754 
1755  if (missing_devices > 0) {
1756  UINT64 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size);
1757 
1758  log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, FALSE, FALSE, FALSE);
1759  }
1760  }
1761  }
1762  }
1763 
1764  off++;
1765  stripeoff++;
1766  }
1767 
1768  if (missing_devices == 0)
1769  do_xor(context->parity_scratch, &context->stripes[stripe].buf[num * c->chunk_item->stripe_length], (ULONG)c->chunk_item->stripe_length);
1770 
1771  stripe = (stripe + 1) % c->chunk_item->num_stripes;
1772  stripeoff = num * sectors_per_stripe;
1773  }
1774 
1775  // check parity
1776 
1777  if (missing_devices == 0) {
1778  RtlClearAllBits(&context->stripes[parity].error);
1779 
1780  for (i = 0; i < sectors_per_stripe; i++) {
1781  ULONG o, j;
1782 
1783  o = i * Vcb->superblock.sector_size;
1784  for (j = 0; j < Vcb->superblock.sector_size; j++) { // FIXME - use SSE
1785  if (context->parity_scratch[o] != 0) {
1786  RtlSetBit(&context->stripes[parity].error, i);
1787  break;
1788  }
1789  o++;
1790  }
1791  }
1792  }
1793 
1794  // log and fix errors
1795 
1796  if (missing_devices > 0)
1797  return;
1798 
1799  for (i = 0; i < sectors_per_stripe; i++) {
1800  ULONG num_errors = 0, bad_off;
1801  UINT64 bad_stripe;
1802  BOOL alloc = FALSE;
1803 
1804  stripe = (parity + 1) % c->chunk_item->num_stripes;
1805  off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 1)) + i;
1806 
1807  while (stripe != parity) {
1808  if (RtlCheckBit(&context->alloc, off)) {
1809  alloc = TRUE;
1810 
1811  if (RtlCheckBit(&context->stripes[stripe].error, i)) {
1812  bad_stripe = stripe;
1813  bad_off = off;
1814  num_errors++;
1815  }
1816  }
1817 
1818  off += sectors_per_stripe;
1819  stripe = (stripe + 1) % c->chunk_item->num_stripes;
1820  }
1821 
1822  if (!alloc)
1823  continue;
1824 
1825  if (num_errors == 0 && !RtlCheckBit(&context->stripes[parity].error, i)) // everything fine
1826  continue;
1827 
1828  if (num_errors == 0 && RtlCheckBit(&context->stripes[parity].error, i)) { // parity error
1829  UINT64 addr;
1830 
1831  do_xor(&context->stripes[parity].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
1832  &context->parity_scratch[i * Vcb->superblock.sector_size],
1833  Vcb->superblock.sector_size);
1834 
1835  bad_off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 1)) + i;
1836  addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (bad_off * Vcb->superblock.sector_size);
1837 
1838  context->stripes[parity].rewrite = TRUE;
1839 
1840  log_error(Vcb, addr, c->devices[parity]->devitem.dev_id, FALSE, TRUE, TRUE);
1842  } else if (num_errors == 1) {
1843  UINT32 crc32;
1844  UINT64 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (bad_off * Vcb->superblock.sector_size);
1845 
1846  if (RtlCheckBit(&context->is_tree, bad_off)) {
1847  tree_header* th;
1848 
1849  do_xor(&context->parity_scratch[i * Vcb->superblock.sector_size],
1850  &context->stripes[bad_stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
1851  Vcb->superblock.node_size);
1852 
1853  th = (tree_header*)&context->parity_scratch[i * Vcb->superblock.sector_size];
1854  crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1855 
1856  if (crc32 == *((UINT32*)th->csum) && th->address == addr) {
1857  RtlCopyMemory(&context->stripes[bad_stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
1858  &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.node_size);
1859 
1860  context->stripes[bad_stripe].rewrite = TRUE;
1861 
1862  RtlClearBits(&context->stripes[bad_stripe].error, i + 1, (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1);
1863 
1864  log_error(Vcb, addr, c->devices[bad_stripe]->devitem.dev_id, TRUE, TRUE, FALSE);
1865  } else
1866  log_error(Vcb, addr, c->devices[bad_stripe]->devitem.dev_id, TRUE, FALSE, FALSE);
1867  } else {
1868  do_xor(&context->parity_scratch[i * Vcb->superblock.sector_size],
1869  &context->stripes[bad_stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
1870  Vcb->superblock.sector_size);
1871 
1872  crc32 = ~calc_crc32c(0xffffffff, &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size);
1873 
1874  if (crc32 == context->csum[bad_off]) {
1875  RtlCopyMemory(&context->stripes[bad_stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
1876  &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size);
1877 
1878  context->stripes[bad_stripe].rewrite = TRUE;
1879 
1880  log_error(Vcb, addr, c->devices[bad_stripe]->devitem.dev_id, FALSE, TRUE, FALSE);
1881  } else
1882  log_error(Vcb, addr, c->devices[bad_stripe]->devitem.dev_id, FALSE, FALSE, FALSE);
1883  }
1884  } else {
1885  stripe = (parity + 1) % c->chunk_item->num_stripes;
1886  off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 1)) + i;
1887 
1888  while (stripe != parity) {
1889  if (RtlCheckBit(&context->alloc, off)) {
1890  if (RtlCheckBit(&context->stripes[stripe].error, i)) {
1891  UINT64 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size);
1892 
1893  log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, RtlCheckBit(&context->is_tree, off), FALSE, FALSE);
1894  }
1895  }
1896 
1897  off += sectors_per_stripe;
1898  stripe = (stripe + 1) % c->chunk_item->num_stripes;
1899  }
1900  }
1901  }
1902 }
1903 
1905  UINT64 num, UINT16 missing_devices) {
1906  ULONG sectors_per_stripe = (ULONG)(c->chunk_item->stripe_length / Vcb->superblock.sector_size), i, off;
1907  UINT16 stripe, parity1 = (bit_start + num + c->chunk_item->num_stripes - 2) % c->chunk_item->num_stripes;
1908  UINT16 parity2 = (parity1 + 1) % c->chunk_item->num_stripes;
1909  UINT64 stripeoff;
1910 
1911  stripe = (parity1 + 2) % c->chunk_item->num_stripes;
1912  off = (ULONG)(bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2);
1913  stripeoff = num * sectors_per_stripe;
1914 
1915  if (c->devices[parity1]->devobj)
1916  RtlCopyMemory(context->parity_scratch, &context->stripes[parity1].buf[num * c->chunk_item->stripe_length], (ULONG)c->chunk_item->stripe_length);
1917 
1918  if (c->devices[parity2]->devobj)
1920 
1921  while (stripe != parity1) {
1922  RtlClearAllBits(&context->stripes[stripe].error);
1923 
1924  for (i = 0; i < sectors_per_stripe; i++) {
1925  if (c->devices[stripe]->devobj && RtlCheckBit(&context->alloc, off)) {
1926  if (RtlCheckBit(&context->is_tree, off)) {
1927  tree_header* th = (tree_header*)&context->stripes[stripe].buf[stripeoff * Vcb->superblock.sector_size];
1928  UINT64 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size);
1929  UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1930 
1931  if (crc32 != *((UINT32*)th->csum) || th->address != addr) {
1932  RtlSetBits(&context->stripes[stripe].error, i, Vcb->superblock.node_size / Vcb->superblock.sector_size);
1934 
1935  if (missing_devices == 2)
1936  log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, TRUE, FALSE, FALSE);
1937  }
1938 
1939  off += Vcb->superblock.node_size / Vcb->superblock.sector_size;
1940  stripeoff += Vcb->superblock.node_size / Vcb->superblock.sector_size;
1941  i += (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1;
1942 
1943  continue;
1944  } else if (RtlCheckBit(&context->has_csum, off)) {
1945  UINT32 crc32 = ~calc_crc32c(0xffffffff, context->stripes[stripe].buf + (stripeoff * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1946 
1947  if (crc32 != context->csum[off]) {
1948  UINT64 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size);
1949 
1950  RtlSetBit(&context->stripes[stripe].error, i);
1952 
1953  if (missing_devices == 2)
1954  log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, FALSE, FALSE, FALSE);
1955  }
1956  }
1957  }
1958 
1959  off++;
1960  stripeoff++;
1961  }
1962 
1963  if (c->devices[parity1]->devobj)
1964  do_xor(context->parity_scratch, &context->stripes[stripe].buf[num * c->chunk_item->stripe_length], (UINT32)c->chunk_item->stripe_length);
1965 
1966  stripe = (stripe + 1) % c->chunk_item->num_stripes;
1967  stripeoff = num * sectors_per_stripe;
1968  }
1969 
1970  RtlClearAllBits(&context->stripes[parity1].error);
1971 
1972  if (missing_devices == 0 || (missing_devices == 1 && !c->devices[parity2]->devobj)) {
1973  // check parity 1
1974 
1975  for (i = 0; i < sectors_per_stripe; i++) {
1976  ULONG o, j;
1977 
1978  o = i * Vcb->superblock.sector_size;
1979  for (j = 0; j < Vcb->superblock.sector_size; j++) { // FIXME - use SSE
1980  if (context->parity_scratch[o] != 0) {
1981  RtlSetBit(&context->stripes[parity1].error, i);
1982  break;
1983  }
1984  o++;
1985  }
1986  }
1987  }
1988 
1989  RtlClearAllBits(&context->stripes[parity2].error);
1990 
1991  if (missing_devices == 0 || (missing_devices == 1 && !c->devices[parity1]->devobj)) {
1992  // check parity 2
1993 
1994  stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1);
1995 
1996  while (stripe != parity2) {
1998  do_xor(context->parity_scratch2, &context->stripes[stripe].buf[num * c->chunk_item->stripe_length], (UINT32)c->chunk_item->stripe_length);
1999 
2000  stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2001  }
2002 
2003  for (i = 0; i < sectors_per_stripe; i++) {
2004  if (RtlCompareMemory(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2006  RtlSetBit(&context->stripes[parity2].error, i);
2007  }
2008  }
2009 
2010  if (missing_devices == 2)
2011  return;
2012 
2013  // log and fix errors
2014 
2015  for (i = 0; i < sectors_per_stripe; i++) {
2016  ULONG num_errors = 0;
2017  UINT64 bad_stripe1, bad_stripe2;
2018  ULONG bad_off1, bad_off2;
2019  BOOL alloc = FALSE;
2020 
2021  stripe = (parity1 + 2) % c->chunk_item->num_stripes;
2022  off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2)) + i;
2023 
2024  while (stripe != parity1) {
2025  if (RtlCheckBit(&context->alloc, off)) {
2026  alloc = TRUE;
2027 
2028  if (!c->devices[stripe]->devobj || RtlCheckBit(&context->stripes[stripe].error, i)) {
2029  if (num_errors == 0) {
2030  bad_stripe1 = stripe;
2031  bad_off1 = off;
2032  } else if (num_errors == 1) {
2033  bad_stripe2 = stripe;
2034  bad_off2 = off;
2035  }
2036  num_errors++;
2037  }
2038  }
2039 
2040  off += sectors_per_stripe;
2041  stripe = (stripe + 1) % c->chunk_item->num_stripes;
2042  }
2043 
2044  if (!alloc)
2045  continue;
2046 
2047  if (num_errors == 0 && !RtlCheckBit(&context->stripes[parity1].error, i) && !RtlCheckBit(&context->stripes[parity2].error, i)) // everything fine
2048  continue;
2049 
2050  if (num_errors == 0) { // parity error
2051  UINT64 addr;
2052 
2053  if (RtlCheckBit(&context->stripes[parity1].error, i)) {
2054  do_xor(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2055  &context->parity_scratch[i * Vcb->superblock.sector_size],
2056  Vcb->superblock.sector_size);
2057 
2058  bad_off1 = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2)) + i;
2059  addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off1 * Vcb->superblock.sector_size);
2060 
2061  context->stripes[parity1].rewrite = TRUE;
2062 
2063  log_error(Vcb, addr, c->devices[parity1]->devitem.dev_id, FALSE, TRUE, TRUE);
2065  }
2066 
2067  if (RtlCheckBit(&context->stripes[parity2].error, i)) {
2068  RtlCopyMemory(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2069  &context->parity_scratch2[i * Vcb->superblock.sector_size],
2070  Vcb->superblock.sector_size);
2071 
2072  bad_off1 = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2)) + i;
2073  addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off1 * Vcb->superblock.sector_size);
2074 
2075  context->stripes[parity2].rewrite = TRUE;
2076 
2077  log_error(Vcb, addr, c->devices[parity2]->devitem.dev_id, FALSE, TRUE, TRUE);
2079  }
2080  } else if (num_errors == 1) {
2081  UINT32 crc32a, crc32b, len;
2082  UINT16 stripe_num, bad_stripe_num;
2083  UINT64 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off1 * Vcb->superblock.sector_size);
2084  UINT8* scratch;
2085 
2086  len = RtlCheckBit(&context->is_tree, bad_off1)? Vcb->superblock.node_size : Vcb->superblock.sector_size;
2087 
2088  scratch = ExAllocatePoolWithTag(PagedPool, len, ALLOC_TAG);
2089  if (!scratch) {
2090  ERR("out of memory\n");
2091  return;
2092  }
2093 
2094  RtlZeroMemory(scratch, len);
2095 
2096  do_xor(&context->parity_scratch[i * Vcb->superblock.sector_size],
2097  &context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len);
2098 
2099  stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1);
2100 
2101  if (c->devices[parity2]->devobj) {
2102  stripe_num = c->chunk_item->num_stripes - 3;
2103  while (stripe != parity2) {
2104  galois_double(scratch, len);
2105 
2106  if (stripe != bad_stripe1)
2107  do_xor(scratch, &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len);
2108  else
2109  bad_stripe_num = stripe_num;
2110 
2111  stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2112  stripe_num--;
2113  }
2114 
2115  do_xor(scratch, &context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len);
2116  }
2117 
2118  if (bad_stripe_num != 0)
2119  galois_divpower(scratch, (UINT8)bad_stripe_num, len);
2120 
2121  if (RtlCheckBit(&context->is_tree, bad_off1)) {
2122  tree_header *th1 = NULL, *th2 = NULL;
2123 
2124  if (c->devices[parity1]->devobj) {
2125  th1 = (tree_header*)&context->parity_scratch[i * Vcb->superblock.sector_size];
2126  crc32a = ~calc_crc32c(0xffffffff, (UINT8*)&th1->fs_uuid, Vcb->superblock.node_size - sizeof(th1->csum));
2127  }
2128 
2129  if (c->devices[parity2]->devobj) {
2130  th2 = (tree_header*)scratch;
2131  crc32b = ~calc_crc32c(0xffffffff, (UINT8*)&th2->fs_uuid, Vcb->superblock.node_size - sizeof(th2->csum));
2132  }
2133 
2134  if ((c->devices[parity1]->devobj && crc32a == *((UINT32*)th1->csum) && th1->address == addr) ||
2135  (c->devices[parity2]->devobj && crc32b == *((UINT32*)th2->csum) && th2->address == addr)) {
2136  if (!c->devices[parity1]->devobj || crc32a != *((UINT32*)th1->csum) || th1->address != addr) {
2137  RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2138  scratch, Vcb->superblock.node_size);
2139 
2140  if (c->devices[parity1]->devobj) {
2141  // fix parity 1
2142 
2143  stripe = (parity1 + 2) % c->chunk_item->num_stripes;
2144 
2145  RtlCopyMemory(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2146  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2147  Vcb->superblock.node_size);
2148 
2149  stripe = (stripe + 1) % c->chunk_item->num_stripes;
2150 
2151  while (stripe != parity1) {
2152  do_xor(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2153  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2154  Vcb->superblock.node_size);
2155 
2156  stripe = (stripe + 1) % c->chunk_item->num_stripes;
2157  }
2158 
2159  context->stripes[parity1].rewrite = TRUE;
2160 
2161  log_error(Vcb, addr, c->devices[parity1]->devitem.dev_id, FALSE, TRUE, TRUE);
2163  }
2164  } else {
2165  RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2166  &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.node_size);
2167 
2168  if (!c->devices[parity2]->devobj || crc32b != *((UINT32*)th2->csum) || th2->address != addr) {
2169  // fix parity 2
2170  stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1);
2171 
2172  if (c->devices[parity2]->devobj) {
2173  RtlCopyMemory(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2174  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2175  Vcb->superblock.node_size);
2176 
2177  stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2178 
2179  while (stripe != parity2) {
2180  galois_double(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], Vcb->superblock.node_size);
2181 
2182  do_xor(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2183  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2184  Vcb->superblock.node_size);
2185 
2186  stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2187  }
2188 
2189  context->stripes[parity2].rewrite = TRUE;
2190 
2191  log_error(Vcb, addr, c->devices[parity2]->devitem.dev_id, FALSE, TRUE, TRUE);
2193  }
2194  }
2195  }
2196 
2197  context->stripes[bad_stripe1].rewrite = TRUE;
2198 
2199  RtlClearBits(&context->stripes[bad_stripe1].error, i + 1, (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1);
2200 
2201  log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, TRUE, TRUE, FALSE);
2202  } else
2203  log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, TRUE, FALSE, FALSE);
2204  } else {
2205  if (c->devices[parity1]->devobj)
2206  crc32a = ~calc_crc32c(0xffffffff, &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size);
2207 
2208  if (c->devices[parity2]->devobj)
2209  crc32b = ~calc_crc32c(0xffffffff, scratch, Vcb->superblock.sector_size);
2210 
2211  if ((c->devices[parity1]->devobj && crc32a == context->csum[bad_off1]) || (c->devices[parity2]->devobj && crc32b == context->csum[bad_off1])) {
2212  if (c->devices[parity2]->devobj && crc32b == context->csum[bad_off1]) {
2213  RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2214  scratch, Vcb->superblock.sector_size);
2215 
2216  if (c->devices[parity1]->devobj && crc32a != context->csum[bad_off1]) {
2217  // fix parity 1
2218 
2219  stripe = (parity1 + 2) % c->chunk_item->num_stripes;
2220 
2221  RtlCopyMemory(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2222  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2223  Vcb->superblock.sector_size);
2224 
2225  stripe = (stripe + 1) % c->chunk_item->num_stripes;
2226 
2227  while (stripe != parity1) {
2228  do_xor(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2229  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2230  Vcb->superblock.sector_size);
2231 
2232  stripe = (stripe + 1) % c->chunk_item->num_stripes;
2233  }
2234 
2235  context->stripes[parity1].rewrite = TRUE;
2236 
2237  log_error(Vcb, addr, c->devices[parity1]->devitem.dev_id, FALSE, TRUE, TRUE);
2239  }
2240  } else {
2241  RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2242  &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size);
2243 
2244  if (c->devices[parity2]->devobj && crc32b != context->csum[bad_off1]) {
2245  // fix parity 2
2246  stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1);
2247 
2248  RtlCopyMemory(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2249  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2250  Vcb->superblock.sector_size);
2251 
2252  stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2253 
2254  while (stripe != parity2) {
2255  galois_double(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], Vcb->superblock.sector_size);
2256 
2257  do_xor(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2258  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2259  Vcb->superblock.sector_size);
2260 
2261  stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2262  }
2263 
2264  context->stripes[parity2].rewrite = TRUE;
2265 
2266  log_error(Vcb, addr, c->devices[parity2]->devitem.dev_id, FALSE, TRUE, TRUE);
2268  }
2269  }
2270 
2271  context->stripes[bad_stripe1].rewrite = TRUE;
2272 
2273  log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, FALSE, TRUE, FALSE);
2274  } else
2275  log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, FALSE, FALSE, FALSE);
2276  }
2277 
2278  ExFreePool(scratch);
2279  } else if (num_errors == 2 && missing_devices == 0) {
2280  UINT16 x, y, k;
2281  UINT64 addr;
2282  UINT32 len = (RtlCheckBit(&context->is_tree, bad_off1) || RtlCheckBit(&context->is_tree, bad_off2)) ? Vcb->superblock.node_size : Vcb->superblock.sector_size;
2283  UINT8 gyx, gx, denom, a, b, *p, *q, *pxy, *qxy;
2284  UINT32 j;
2285 
2286  stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1);
2287 
2288  // put qxy in parity_scratch
2289  // put pxy in parity_scratch2
2290 
2291  k = c->chunk_item->num_stripes - 3;
2292  if (stripe == bad_stripe1 || stripe == bad_stripe2) {
2293  RtlZeroMemory(&context->parity_scratch[i * Vcb->superblock.sector_size], len);
2294  RtlZeroMemory(&context->parity_scratch2[i * Vcb->superblock.sector_size], len);
2295 
2296  if (stripe == bad_stripe1)
2297  x = k;
2298  else
2299  y = k;
2300  } else {
2302  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len);
2304  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len);
2305  }
2306 
2307  stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2308 
2309  k--;
2310  do {
2311  galois_double(&context->parity_scratch[i * Vcb->superblock.sector_size], len);
2312 
2313  if (stripe != bad_stripe1 && stripe != bad_stripe2) {
2314  do_xor(&context->parity_scratch[i * Vcb->superblock.sector_size],
2315  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len);
2316  do_xor(&context->parity_scratch2[i * Vcb->superblock.sector_size],
2317  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len);
2318  } else if (stripe == bad_stripe1)
2319  x = k;
2320  else if (stripe == bad_stripe2)
2321  y = k;
2322 
2323  stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2324  k--;
2325  } while (stripe != parity2);
2326 
2327  gyx = gpow2(y > x ? (y-x) : (255-x+y));
2328  gx = gpow2(255-x);
2329 
2330  denom = gdiv(1, gyx ^ 1);
2331  a = gmul(gyx, denom);
2332  b = gmul(gx, denom);
2333 
2334  p = &context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)];
2335  q = &context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)];
2336  pxy = &context->parity_scratch2[i * Vcb->superblock.sector_size];
2337  qxy = &context->parity_scratch[i * Vcb->superblock.sector_size];
2338 
2339  for (j = 0; j < len; j++) {
2340  *qxy = gmul(a, *p ^ *pxy) ^ gmul(b, *q ^ *qxy);
2341 
2342  p++;
2343  q++;
2344  pxy++;
2345  qxy++;
2346  }
2347 
2348  do_xor(&context->parity_scratch2[i * Vcb->superblock.sector_size], &context->parity_scratch[i * Vcb->superblock.sector_size], len);
2349  do_xor(&context->parity_scratch2[i * Vcb->superblock.sector_size], &context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len);
2350 
2351  addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off1 * Vcb->superblock.sector_size);
2352 
2353  if (RtlCheckBit(&context->is_tree, bad_off1)) {
2354  tree_header* th = (tree_header*)&context->parity_scratch[i * Vcb->superblock.sector_size];
2355  UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
2356 
2357  if (crc32 == *((UINT32*)th->csum) && th->address == addr) {
2358  RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2359  &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.node_size);
2360 
2361  context->stripes[bad_stripe1].rewrite = TRUE;
2362 
2363  RtlClearBits(&context->stripes[bad_stripe1].error, i + 1, (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1);
2364 
2365  log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, TRUE, TRUE, FALSE);
2366  } else
2367  log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, TRUE, FALSE, FALSE);
2368  } else {
2369  UINT32 crc32 = ~calc_crc32c(0xffffffff, &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size);
2370 
2371  if (crc32 == context->csum[bad_off1]) {
2372  RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2373  &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size);
2374 
2375  context->stripes[bad_stripe1].rewrite = TRUE;
2376 
2377  log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, FALSE, TRUE, FALSE);
2378  } else
2379  log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, FALSE, FALSE, FALSE);
2380  }
2381 
2382  addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off2 * Vcb->superblock.sector_size);
2383 
2384  if (RtlCheckBit(&context->is_tree, bad_off2)) {
2385  tree_header* th = (tree_header*)&context->parity_scratch2[i * Vcb->superblock.sector_size];
2386  UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
2387 
2388  if (crc32 == *((UINT32*)th->csum) && th->address == addr) {
2389  RtlCopyMemory(&context->stripes[bad_stripe2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2390  &context->parity_scratch2[i * Vcb->superblock.sector_size], Vcb->superblock.node_size);
2391 
2392  context->stripes[bad_stripe2].rewrite = TRUE;
2393 
2394  RtlClearBits(&context->stripes[bad_stripe2].error, i + 1, (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1);
2395 
2396  log_error(Vcb, addr, c->devices[bad_stripe2]->devitem.dev_id, TRUE, TRUE, FALSE);
2397  } else
2398  log_error(Vcb, addr, c->devices[bad_stripe2]->devitem.dev_id, TRUE, FALSE, FALSE);
2399  } else {
2400  UINT32 crc32 = ~calc_crc32c(0xffffffff, &context->parity_scratch2[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size);
2401 
2402  if (crc32 == context->csum[bad_off2]) {
2403  RtlCopyMemory(&context->stripes[bad_stripe2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2404  &context->parity_scratch2[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size);
2405 
2406  context->stripes[bad_stripe2].rewrite = TRUE;
2407 
2408  log_error(Vcb, addr, c->devices[bad_stripe2]->devitem.dev_id, FALSE, TRUE, FALSE);
2409  } else
2410  log_error(Vcb, addr, c->devices[bad_stripe2]->devitem.dev_id, FALSE, FALSE, FALSE);
2411  }
2412  } else {
2413  stripe = (parity2 + 1) % c->chunk_item->num_stripes;
2414  off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2)) + i;
2415 
2416  while (stripe != parity1) {
2417  if (c->devices[stripe]->devobj && RtlCheckBit(&context->alloc, off)) {
2418  if (RtlCheckBit(&context->stripes[stripe].error, i)) {
2419  UINT64 addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size);
2420 
2421  log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, RtlCheckBit(&context->is_tree, off), FALSE, FALSE);
2422  }
2423  }
2424 
2425  off += sectors_per_stripe;
2426  stripe = (stripe + 1) % c->chunk_item->num_stripes;
2427  }
2428  }
2429  }
2430 }
2431 
2433  NTSTATUS Status;
2434  KEY searchkey;
2435  traverse_ptr tp;
2436  BOOL b;
2437  UINT64 run_start, run_end, full_stripe_len, stripe;
2438  UINT32 max_read, num_sectors;
2439  ULONG arrlen, *allocarr, *csumarr = NULL, *treearr, num_parity_stripes = c->chunk_item->type & BLOCK_FLAG_RAID6 ? 2 : 1;
2441  UINT16 i;
2443 
2444  TRACE("(%p, %p, %llx, %llx)\n", Vcb, c, stripe_start, stripe_end);
2445 
2446  full_stripe_len = (c->chunk_item->num_stripes - num_parity_stripes) * c->chunk_item->stripe_length;
2447  run_start = c->offset + (stripe_start * full_stripe_len);
2448  run_end = c->offset + ((stripe_end + 1) * full_stripe_len);
2449 
2450  searchkey.obj_id = run_start;
2451  searchkey.obj_type = TYPE_METADATA_ITEM;
2452  searchkey.offset = 0xffffffffffffffff;
2453 
2454  Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, NULL);
2455  if (!NT_SUCCESS(Status)) {
2456  ERR("find_item returned %08x\n", Status);
2457  return Status;
2458  }
2459 
2460  num_sectors = (UINT32)((stripe_end - stripe_start + 1) * full_stripe_len / Vcb->superblock.sector_size);
2461  arrlen = (ULONG)sector_align((num_sectors / 8) + 1, sizeof(ULONG));
2462 
2463  allocarr = ExAllocatePoolWithTag(PagedPool, arrlen, ALLOC_TAG);
2464  if (!allocarr) {
2465  ERR("out of memory\n");
2467  }
2468 
2469  treearr = ExAllocatePoolWithTag(PagedPool, arrlen, ALLOC_TAG);
2470  if (!treearr) {
2471  ERR("out of memory\n");
2472  ExFreePool(allocarr);
2474  }
2475 
2476  RtlInitializeBitMap(&context.alloc, allocarr, num_sectors);
2477  RtlClearAllBits(&context.alloc);
2478 
2479  RtlInitializeBitMap(&context.is_tree, treearr, num_sectors);
2480  RtlClearAllBits(&context.is_tree);
2481 
2483  if (!context.parity_scratch) {
2484  ERR("out of memory\n");
2485  ExFreePool(allocarr);
2486  ExFreePool(treearr);
2488  }
2489 
2490  if (c->chunk_item->type & BLOCK_FLAG_DATA) {
2491  csumarr = ExAllocatePoolWithTag(PagedPool, arrlen, ALLOC_TAG);
2492  if (!csumarr) {
2493  ERR("out of memory\n");
2494  ExFreePool(allocarr);
2495  ExFreePool(treearr);
2496  ExFreePool(context.parity_scratch);
2498  }
2499 
2500  RtlInitializeBitMap(&context.has_csum, csumarr, num_sectors);
2501  RtlClearAllBits(&context.has_csum);
2502 
2503  context.csum = ExAllocatePoolWithTag(PagedPool, num_sectors * sizeof(UINT32), ALLOC_TAG);
2504  if (!context.csum) {
2505  ERR("out of memory\n");
2506  ExFreePool(allocarr);
2507  ExFreePool(treearr);
2508  ExFreePool(context.parity_scratch);
2509  ExFreePool(csumarr);
2511  }
2512  }
2513 
2514  if (c->chunk_item->type & BLOCK_FLAG_RAID6) {
2516  if (!context.parity_scratch2) {
2517  ERR("out of memory\n");
2518  ExFreePool(allocarr);
2519  ExFreePool(treearr);
2520  ExFreePool(context.parity_scratch);
2521 
2522  if (c->chunk_item->type & BLOCK_FLAG_DATA) {
2523  ExFreePool(csumarr);
2524  ExFreePool(context.csum);
2525  }
2526 
2528  }
2529  }
2530 
2531  do {
2532  traverse_ptr next_tp;
2533 
2534  if (tp.item->key.obj_id >= run_end)
2535  break;
2536 
2539 
2540  if (tp.item->key.obj_id + size > run_start) {
2541  UINT64 extent_start = max(run_start, tp.item->key.obj_id);
2542  UINT64 extent_end = min(tp.item->key.obj_id + size, run_end);
2543  BOOL extent_is_tree = FALSE;
2544 
2545  RtlSetBits(&context.alloc, (ULONG)((extent_start - run_start) / Vcb->superblock.sector_size), (ULONG)((extent_end - extent_start) / Vcb->superblock.sector_size));
2546 
2547  if (tp.item->key.obj_type == TYPE_METADATA_ITEM)
2548  extent_is_tree = TRUE;
2549  else {
2550  EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data;
2551 
2552  if (tp.item->size < sizeof(EXTENT_ITEM)) {
2553  ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
2554  Status = STATUS_INTERNAL_ERROR;
2555  goto end;
2556  }
2557 
2558  if (ei->flags & EXTENT_ITEM_TREE_BLOCK)
2559  extent_is_tree = TRUE;
2560  }
2561 
2562  if (extent_is_tree)
2563  RtlSetBits(&context.is_tree, (ULONG)((extent_start - run_start) / Vcb->superblock.sector_size), (ULONG)((extent_end - extent_start) / Vcb->superblock.sector_size));
2564  else if (c->chunk_item->type & BLOCK_FLAG_DATA) {
2565  traverse_ptr tp2;
2566  BOOL b2;
2567 
2568  searchkey.obj_id = EXTENT_CSUM_ID;
2569  searchkey.obj_type = TYPE_EXTENT_CSUM;
2570  searchkey.offset = extent_start;
2571 
2572  Status = find_item(Vcb, Vcb->checksum_root, &tp2, &searchkey, FALSE, NULL);
2573  if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) {
2574  ERR("find_item returned %08x\n", Status);
2575  goto end;
2576  }
2577 
2578  do {
2579  traverse_ptr next_tp2;
2580 
2581  if (tp2.item->key.offset >= extent_end)
2582  break;
2583 
2584  if (tp2.item->key.offset >= extent_start) {
2585  UINT64 csum_start = max(extent_start, tp2.item->key.offset);
2586  UINT64 csum_end = min(extent_end, tp2.item->key.offset + (tp2.item->size * Vcb->superblock.sector_size / sizeof(UINT32)));
2587 
2588  RtlSetBits(&context.has_csum, (ULONG)((csum_start - run_start) / Vcb->superblock.sector_size), (ULONG)((csum_end - csum_start) / Vcb->superblock.sector_size));
2589 
2590  RtlCopyMemory(&context.csum[(csum_start - run_start) / Vcb->superblock.sector_size],
2591  tp2.item->data + ((csum_start - tp2.item->key.offset) * sizeof(UINT32) / Vcb->superblock.sector_size),
2592  (ULONG)((csum_end - csum_start) * sizeof(UINT32) / Vcb->superblock.sector_size));
2593  }
2594 
2595  b2 = find_next_item(Vcb, &tp2, &next_tp2, FALSE, NULL);
2596 
2597  if (b2)
2598  tp2 = next_tp2;
2599  } while (b2);
2600  }
2601  }
2602  }
2603 
2604  b = find_next_item(Vcb, &tp, &next_tp, FALSE, NULL);
2605 
2606  if (b)
2607  tp = next_tp;
2608  } while (b);
2609 
2611  if (!context.stripes) {
2612  ERR("out of memory\n");
2614  goto end;
2615  }
2616 
2617  max_read = (UINT32)min(1048576 / c->chunk_item->stripe_length, stripe_end - stripe_start + 1); // only process 1 MB of data at a time
2618 
2619  for (i = 0; i < c->chunk_item->num_stripes; i++) {
2621  if (!context.stripes[i].buf) {
2622  UINT64 j;
2623 
2624  ERR("out of memory\n");
2625 
2626  for (j = 0; j < i; j++) {
2627  ExFreePool(context.stripes[j].buf);
2628  }
2629  ExFreePool(context.stripes);
2630 
2632  goto end;
2633  }
2634 
2636  if (!context.stripes[i].errorarr) {
2637  UINT64 j;
2638 
2639  ERR("out of memory\n");
2640 
2641  ExFreePool(context.stripes[i].buf);
2642 
2643  for (j = 0; j < i; j++) {
2644  ExFreePool(context.stripes[j].buf);
2645  }
2646  ExFreePool(context.stripes);
2647 
2649  goto end;
2650  }
2651 
2653 
2654  context.stripes[i].context = &context;
2655  context.stripes[i].rewrite = FALSE;
2656  }
2657 
2658  stripe = stripe_start;
2659 
2660  Status = STATUS_SUCCESS;
2661 
2662  chunk_lock_range(Vcb, c, run_start, run_end - run_start);
2663 
2664  do {
2665  ULONG read_stripes;
2666  UINT16 missing_devices = 0;
2667  BOOL need_wait = FALSE;
2668 
2669  if (max_read < stripe_end + 1 - stripe)
2670  read_stripes = max_read;
2671  else
2672  read_stripes = (ULONG)(stripe_end + 1 - stripe);
2673 
2674  context.stripes_left = c->chunk_item->num_stripes;
2675 
2676  // read megabyte by megabyte
2677  for (i = 0; i < c->chunk_item->num_stripes; i++) {
2678  if (c->devices[i]->devobj) {
2680 
2681  context.stripes[i].Irp = IoAllocateIrp(c->devices[i]->devobj->StackSize, FALSE);
2682 
2683  if (!context.stripes[i].Irp) {
2684  ERR("IoAllocateIrp failed\n");
2686  goto end3;
2687  }
2688 
2689  context.stripes[i].Irp->MdlAddress = NULL;
2690 
2691  IrpSp = IoGetNextIrpStackLocation(context.stripes[i].Irp);
2692  IrpSp->MajorFunction = IRP_MJ_READ;
2693 
2694  if (c->devices[i]->devobj->Flags & DO_BUFFERED_IO) {
2695  context.stripes[i].Irp->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(read_stripes * c->chunk_item->stripe_length), ALLOC_TAG);
2696  if (!context.stripes[i].Irp->AssociatedIrp.SystemBuffer) {
2697  ERR("out of memory\n");
2699  goto end3;
2700  }
2701 
2703 
2704  context.stripes[i].Irp->UserBuffer = context.stripes[i].buf;
2705  } else if (c->devices[i]->devobj->Flags & DO_DIRECT_IO) {
2706  context.stripes[i].Irp->MdlAddress = IoAllocateMdl(context.stripes[i].buf, (ULONG)(read_stripes * c->chunk_item->stripe_length), FALSE, FALSE, NULL);
2707  if (!context.stripes[i].Irp->MdlAddress) {
2708  ERR("IoAllocateMdl failed\n");
2710  goto end3;
2711  }
2712 
2713  Status = STATUS_SUCCESS;
2714 
2715  _SEH2_TRY {
2716  MmProbeAndLockPages(context.stripes[i].Irp->MdlAddress, KernelMode, IoWriteAccess);
2718  Status = _SEH2_GetExceptionCode();
2719  } _SEH2_END;
2720 
2721  if (!NT_SUCCESS(Status)) {
2722  ERR("MmProbeAndLockPages threw exception %08x\n", Status);
2723  IoFreeMdl(context.stripes[i].Irp->MdlAddress);
2724  goto end3;
2725  }
2726  } else
2727  context.stripes[i].Irp->UserBuffer = context.stripes[i].buf;
2728 
2729  context.stripes[i].offset = stripe * c->chunk_item->stripe_length;
2730 
2731  IrpSp->Parameters.Read.Length = (ULONG)(read_stripes * c->chunk_item->stripe_length);
2732  IrpSp->Parameters.Read.ByteOffset.QuadPart = cis[i].offset + context.stripes[i].offset;
2733 
2734  context.stripes[i].Irp->UserIosb = &context.stripes[i].iosb;
2735  context.stripes[i].missing = FALSE;
2736 
2737  IoSetCompletionRoutine(context.stripes[i].Irp, scrub_read_completion_raid56, &context.stripes[i], TRUE, TRUE, TRUE);
2738 
2739  Vcb->scrub.data_scrubbed += read_stripes * c->chunk_item->stripe_length;
2740  need_wait = TRUE;
2741  } else {
2742  context.stripes[i].Irp = NULL;
2743  context.stripes[i].missing = TRUE;
2744  missing_devices++;
2746  }
2747  }
2748 
2749  if (c->chunk_item->type & BLOCK_FLAG_RAID5 && missing_devices > 1) {
2750  ERR("too many missing devices (%u, maximum 1)\n", missing_devices);
2751  Status = STATUS_UNEXPECTED_IO_ERROR;
2752  goto end3;
2753  } else if (c->chunk_item->type & BLOCK_FLAG_RAID6 && missing_devices > 2) {
2754  ERR("too many missing devices (%u, maximum 2)\n", missing_devices);
2755  Status = STATUS_UNEXPECTED_IO_ERROR;
2756  goto end3;
2757  }
2758 
2759  if (need_wait) {
2761 
2762  for (i = 0; i < c->chunk_item->num_stripes; i++) {
2763  if (c->devices[i]->devobj)
2764  IoCallDriver(c->devices[i]->devobj, context.stripes[i].Irp);
2765  }
2766 
2768  }
2769 
2770  // return an error if any of the stripes returned an error
2771  for (i = 0; i < c->chunk_item->num_stripes; i++) {
2772  if (!context.stripes[i].missing && !NT_SUCCESS(context.stripes[i].iosb.Status)) {
2773  Status = context.stripes[i].iosb.Status;
2775  goto end3;
2776  }
2777  }
2778 
2779  if (c->chunk_item->type & BLOCK_FLAG_RAID6) {
2780  for (i = 0; i < read_stripes; i++) {
2781  scrub_raid6_stripe(Vcb, c, &context, stripe_start, stripe, i, missing_devices);
2782  }
2783  } else {
2784  for (i = 0; i < read_stripes; i++) {
2785  scrub_raid5_stripe(Vcb, c, &context, stripe_start, stripe, i, missing_devices);
2786  }
2787  }
2788  stripe += read_stripes;
2789 
2790 end3:
2791  for (i = 0; i < c->chunk_item->num_stripes; i++) {
2792  if (context.stripes[i].Irp) {
2793  if (c->devices[i]->devobj->Flags & DO_DIRECT_IO && context.stripes[i].Irp->MdlAddress) {
2794  MmUnlockPages(context.stripes[i].Irp->MdlAddress);
2795  IoFreeMdl(context.stripes[i].Irp->MdlAddress);
2796  }
2797  IoFreeIrp(context.stripes[i].Irp);
2798  context.stripes[i].Irp = NULL;
2799 
2800  if (context.stripes[i].rewrite) {
2801  Status = write_data_phys(c->devices[i]->devobj, cis[i].offset + context.stripes[i].offset,
2802  context.stripes[i].buf, (UINT32)(read_stripes * c->chunk_item->stripe_length));
2803 
2804  if (!NT_SUCCESS(Status)) {
2805  ERR("write_data_phys returned %08x\n", Status);
2807  goto end2;
2808  }
2809  }
2810  }
2811  }
2812 
2813  if (!NT_SUCCESS(Status))
2814  break;
2815  } while (stripe < stripe_end);
2816 
2817 end2:
2818  chunk_unlock_range(Vcb, c, run_start, run_end - run_start);
2819 
2820  for (i = 0; i < c->chunk_item->num_stripes; i++) {
2821  ExFreePool(context.stripes[i].buf);
2822  ExFreePool(context.stripes[i].errorarr);
2823  }
2824  ExFreePool(context.stripes);
2825 
2826 end:
2827  ExFreePool(treearr);
2828  ExFreePool(allocarr);
2829  ExFreePool(context.parity_scratch);
2830 
2831  if (c->chunk_item->type & BLOCK_FLAG_RAID6)
2832  ExFreePool(context.parity_scratch2);
2833 
2834  if (c->chunk_item->type & BLOCK_FLAG_DATA) {
2835  ExFreePool(csumarr);
2836  ExFreePool(context.csum);
2837  }
2838 
2839  return Status;
2840 }
2841 
2843  NTSTATUS Status;
2844  KEY searchkey;
2845  traverse_ptr tp;
2846  BOOL b;
2847  UINT64 full_stripe_len, stripe, stripe_start, stripe_end, total_data = 0;
2848  ULONG num_extents = 0, num_parity_stripes = c->chunk_item->type & BLOCK_FLAG_RAID6 ? 2 : 1;
2849 
2850  full_stripe_len = (c->chunk_item->num_stripes - num_parity_stripes) * c->chunk_item->stripe_length;
2851  stripe = (*offset - c->offset) / full_stripe_len;
2852 
2853  *offset = c->offset + (stripe * full_stripe_len);
2854 
2855  searchkey.obj_id = *offset;
2856  searchkey.obj_type = TYPE_METADATA_ITEM;
2857  searchkey.offset = 0xffffffffffffffff;
2858 
2859  Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, NULL);
2860  if (!NT_SUCCESS(Status)) {
2861  ERR("find_item returned %08x\n", Status);
2862  return Status;
2863  }
2864 
2865  *changed = FALSE;
2866 
2867  do {
2868  traverse_ptr next_tp;
2869 
2870  if (tp.item->key.obj_id >= c->offset + c->chunk_item->size)
2871  break;
2872 
2873  if (tp.item->key.obj_id >= *offset && (tp.item->key.obj_type == TYPE_EXTENT_ITEM || tp.item->key.obj_type == TYPE_METADATA_ITEM)) {
2875 
2876  TRACE("%llx\n", tp.item->key.obj_id);
2877 
2879  ERR("extent %llx has size less than sector_size (%llx < %x)\n", tp.item->key.obj_id, Vcb->superblock.sector_size);
2880  return STATUS_INTERNAL_ERROR;
2881  }
2882 
2883  stripe = (tp.item->key.obj_id - c->offset) / full_stripe_len;
2884 
2885  if (*changed) {
2886  if (stripe > stripe_end + 1) {
2887  Status = scrub_chunk_raid56_stripe_run(Vcb, c, stripe_start, stripe_end);
2888  if (!NT_SUCCESS(Status)) {
2889  ERR("scrub_chunk_raid56_stripe_run returned %08x\n", Status);
2890  return Status;
2891  }
2892 
2893  stripe_start = stripe;
2894  }
2895  } else
2896  stripe_start = stripe;
2897 
2898  stripe_end = (tp.item->key.obj_id + size - 1 - c->offset) / full_stripe_len;
2899 
2900  *changed = TRUE;
2901 
2902  total_data += size;
2903  num_extents++;
2904 
2905  // only do so much at a time
2906  if (num_extents >= 64 || total_data >= 0x8000000) // 128 MB
2907  break;
2908  }
2909 
2910  b = find_next_item(Vcb, &tp, &next_tp, FALSE, NULL);
2911 
2912  if (b)
2913  tp = next_tp;
2914  } while (b);
2915 
2916  if (*changed) {
2917  Status = scrub_chunk_raid56_stripe_run(Vcb, c, stripe_start, stripe_end);
2918  if (!NT_SUCCESS(Status)) {
2919  ERR("scrub_chunk_raid56_stripe_run returned %08x\n", Status);
2920  return Status;
2921  }
2922 
2923  *offset = c->offset + ((stripe_end + 1) * full_stripe_len);
2924  }
2925 
2926  return STATUS_SUCCESS;
2927 }
2928 
2930  NTSTATUS Status;
2931  KEY searchkey;
2932  traverse_ptr tp;
2933  BOOL b = FALSE, tree_run = FALSE;
2934  ULONG type, num_extents = 0;
2935  UINT64 total_data = 0, tree_run_start, tree_run_end;
2936 
2937  TRACE("chunk %llx\n", c->offset);
2938 
2939  ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE);
2940 
2942  type = BLOCK_FLAG_DUPLICATE;
2943  else if (c->chunk_item->type & BLOCK_FLAG_RAID0)
2944  type = BLOCK_FLAG_RAID0;
2945  else if (c->chunk_item->type & BLOCK_FLAG_RAID1)
2946  type = BLOCK_FLAG_DUPLICATE;
2947  else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
2948  type = BLOCK_FLAG_RAID10;
2949  else if (c->chunk_item->type & BLOCK_FLAG_RAID5) {
2950  Status = scrub_chunk_raid56(Vcb, c, offset, changed);
2951  goto end;
2952  } else if (c->chunk_item->type & BLOCK_FLAG_RAID6) {
2953  Status = scrub_chunk_raid56(Vcb, c, offset, changed);
2954  goto end;
2955  } else // SINGLE
2956  type = BLOCK_FLAG_DUPLICATE;
2957 
2958  searchkey.obj_id = *offset;
2959  searchkey.obj_type = TYPE_METADATA_ITEM;
2960  searchkey.offset = 0xffffffffffffffff;
2961 
2962  Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, NULL);
2963  if (!NT_SUCCESS(Status)) {
2964  ERR("error - find_item returned %08x\n", Status);
2965  goto end;
2966  }
2967 
2968  do {
2969  traverse_ptr next_tp;
2970 
2971  if (tp.item->key.obj_id >= c->offset + c->chunk_item->size)
2972  break;
2973 
2974  if (tp.item->key.obj_id >= *offset && (tp.item->key.obj_type == TYPE_EXTENT_ITEM || tp.item->key.obj_type == TYPE_METADATA_ITEM)) {
2976  BOOL is_tree;
2977  UINT32* csum = NULL;
2978  RTL_BITMAP bmp;
2979  ULONG* bmparr = NULL;
2980 
2981  TRACE("%llx\n", tp.item->key.obj_id);
2982 
2983  is_tree = FALSE;
2984 
2985  if (tp.item->key.obj_type == TYPE_METADATA_ITEM)
2986  is_tree = TRUE;
2987  else {
2988  EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data;
2989 
2990  if (tp.item->size < sizeof(EXTENT_ITEM)) {
2991  ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
2992  Status = STATUS_INTERNAL_ERROR;
2993  goto end;
2994  }
2995 
2996  if (ei->flags & EXTENT_ITEM_TREE_BLOCK)
2997  is_tree = TRUE;
2998  }
2999 
3001  ERR("extent %llx has size less than sector_size (%llx < %x)\n", tp.item->key.obj_id, Vcb->superblock.sector_size);
3002  Status = STATUS_INTERNAL_ERROR;
3003  goto end;
3004  }
3005 
3006  // load csum
3007  if (!is_tree) {
3008  traverse_ptr tp2;
3009 
3010  csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(sizeof(UINT32) * size / Vcb->superblock.sector_size), ALLOC_TAG);
3011  if (!csum) {
3012  ERR("out of memory\n");
3014  goto end;
3015  }
3016 
3017  bmparr = ExAllocatePoolWithTag(PagedPool, (ULONG)(sector_align(((size / Vcb->superblock.sector_size) >> 3) + 1, sizeof(ULONG))), ALLOC_TAG);
3018  if (!bmparr) {
3019  ERR("out of memory\n");
3020  ExFreePool(csum);
3022  goto end;
3023  }
3024 
3025  RtlInitializeBitMap(&bmp, bmparr, (ULONG)(size / Vcb->superblock.sector_size));
3026  RtlSetAllBits(&bmp); // 1 = no csum, 0 = csum
3027 
3028  searchkey.obj_id = EXTENT_CSUM_ID;
3029  searchkey.obj_type = TYPE_EXTENT_CSUM;
3030  searchkey.offset = tp.item->key.obj_id;
3031 
3032  Status = find_item(Vcb, Vcb->checksum_root, &tp2, &searchkey, FALSE, NULL);
3033  if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) {
3034  ERR("find_item returned %08x\n", Status);
3035  ExFreePool(csum);
3036  ExFreePool(bmparr);
3037  goto end;
3038  }
3039 
3040  if (Status != STATUS_NOT_FOUND) {
3041  do {
3042  traverse_ptr next_tp2;
3043 
3044  if (tp2.item->key.obj_type == TYPE_EXTENT_CSUM) {
3045  if (tp2.item->key.offset >= tp.item->key.obj_id + size)
3046  break;
3047  else if (tp2.item->size >= sizeof(UINT32) && tp2.item->key.offset + (tp2.item->size * Vcb->superblock.sector_size / sizeof(UINT32)) >= tp.item->key.obj_id) {
3048  UINT64 cs = max(tp.item->key.obj_id, tp2.item->key.offset);
3049  UINT64 ce = min(tp.item->key.obj_id + size, tp2.item->key.offset + (tp2.item->size * Vcb->superblock.sector_size / sizeof(UINT32)));
3050 
3051  RtlCopyMemory(csum + ((cs - tp.item->key.obj_id) / Vcb->superblock.sector_size),
3052  tp2.item->data + ((cs - tp2.item->key.offset) * sizeof(UINT32) / Vcb->superblock.sector_size),
3053  (ULONG)((ce - cs) * sizeof(UINT32) / Vcb->superblock.sector_size));
3054 
3055  RtlClearBits(&bmp, (ULONG)((cs - tp.item->key.obj_id) / Vcb->superblock.sector_size), (ULONG)((ce - cs) / Vcb->superblock.sector_size));
3056 
3057  if (ce == tp.item->key.obj_id + size)
3058  break;
3059  }
3060  }
3061 
3062  if (find_next_item(Vcb, &tp2, &next_tp2, FALSE, NULL))
3063  tp2 = next_tp2;
3064  else
3065  break;
3066  } while (TRUE);
3067  }
3068  }
3069 
3070  if (tree_run) {
3071  if (!is_tree || tp.item->key.obj_id > tree_run_end) {
3072  Status = scrub_extent(Vcb, c, type, tree_run_start, (UINT32)(tree_run_end - tree_run_start), NULL);
3073  if (!NT_SUCCESS(Status)) {
3074  ERR("scrub_extent returned %08x\n", Status);
3075  goto end;
3076  }
3077 
3078  if (!is_tree)
3079  tree_run = FALSE;
3080  else {
3081  tree_run_start = tp.item->key.obj_id;
3082  tree_run_end = tp.item->key.obj_id + Vcb->superblock.node_size;
3083  }
3084  } else
3085  tree_run_end = tp.item->key.obj_id + Vcb->superblock.node_size;
3086  } else if (is_tree) {
3087  tree_run = TRUE;
3088  tree_run_start = tp.item->key.obj_id;
3089  tree_run_end = tp.item->key.obj_id + Vcb->superblock.node_size;
3090  }
3091 
3092  if (!is_tree) {
3093  Status = scrub_data_extent(Vcb, c, tp.item->key.obj_id, type, csum, &bmp);
3094  if (!NT_SUCCESS(Status)) {
3095  ERR("scrub_data_extent returned %08x\n", Status);
3096  ExFreePool(csum);
3097  ExFreePool(bmparr);
3098  goto end;
3099  }
3100 
3101  ExFreePool(csum);
3102  ExFreePool(bmparr);
3103  }
3104 
3105  *offset = tp.item->key.obj_id + size;
3106  *changed = TRUE;
3107 
3108  total_data += size;
3109  num_extents++;
3110 
3111  // only do so much at a time
3112  if (num_extents >= 64 || total_data >= 0x8000000) // 128 MB
3113  break;
3114  }
3115 
3116  b = find_next_item(Vcb, &tp, &next_tp, FALSE, NULL);
3117 
3118  if (b)
3119  tp = next_tp;
3120  } while (b);
3121 
3122  if (tree_run) {
3123  Status = scrub_extent(Vcb, c, type, tree_run_start, (UINT32)(tree_run_end - tree_run_start), NULL);
3124  if (!NT_SUCCESS(Status)) {
3125  ERR("scrub_extent returned %08x\n", Status);
3126  goto end;
3127  }
3128  }
3129 
3130  Status = STATUS_SUCCESS;
3131 
3132 end:
3133  ExReleaseResourceLite(&Vcb->tree_lock);
3134 
3135  return Status;
3136 }
3137 
3138 _Function_class_(KSTART_ROUTINE)
3139 #ifdef __REACTOS__
3140 static void NTAPI scrub_thread(void* context) {
3141 #else
3142 static void scrub_thread(void* context) {
3143 #endif
3145  LIST_ENTRY chunks, *le;
3146  NTSTATUS Status;
3148 
3150 
3151  InitializeListHead(&chunks);
3152 
3153  ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
3154 
3155  if (Vcb->need_write && !Vcb->readonly)
3156  Status = do_write(Vcb, NULL);
3157  else
3158  Status = STATUS_SUCCESS;
3159 
3160  free_trees(Vcb);
3161 
3162  if (!NT_SUCCESS(Status)) {
3163  ExReleaseResourceLite(&Vcb->tree_lock);
3164  ERR("do_write returned %08x\n", Status);
3165  Vcb->scrub.error = Status;
3166  goto end;
3167  }
3168 
3169  ExConvertExclusiveToSharedLite(&Vcb->tree_lock);
3170 
3172 
3174  Vcb->scrub.finish_time.QuadPart = 0;
3176  Vcb->scrub.duration.QuadPart = 0;
3177  Vcb->scrub.total_chunks = 0;
3178  Vcb->scrub.chunks_left = 0;
3179  Vcb->scrub.data_scrubbed = 0;
3180  Vcb->scrub.num_errors = 0;
3181 
3182  while (!IsListEmpty(&Vcb->scrub.errors)) {
3184  ExFreePool(err);
3185  }
3186 
3188 
3189  le = Vcb->chunks.Flink;
3190  while (le != &Vcb->chunks) {
3192 
3194 
3195  if (!c->readonly) {
3196  InsertTailList(&chunks, &c->list_entry_balance);
3197  Vcb->scrub.total_chunks++;
3198  Vcb->scrub.chunks_left++;
3199  }
3200 
3202 
3203  le = le->Flink;
3204  }
3205 
3207 
3209 
3210  ExReleaseResourceLite(&Vcb->tree_lock);
3211 
3212  while (!IsListEmpty(&chunks)) {
3213  chunk* c = CONTAINING_RECORD(RemoveHeadList(&chunks), chunk, list_entry_balance);
3214  UINT64 offset = c->offset;
3215  BOOL changed;
3216 
3217  c->reloc = TRUE;
3218 
3220 
3221  if (!Vcb->scrub.stopping) {
3222  do {
3223  changed = FALSE;
3224 
3225  Status = scrub_chunk(Vcb, c, &offset, &changed);
3226  if (!NT_SUCCESS(Status)) {
3227  ERR("scrub_chunk returned %08x\n", Status);
3228  Vcb->scrub.stopping = TRUE;
3229  Vcb->scrub.error = Status;
3230  break;
3231  }
3232 
3233  if (offset == c->offset + c->chunk_item->size || Vcb->scrub.stopping)
3234  break;
3235 
3237  } while (changed);
3238  }
3239 
3241 
3242  if (!Vcb->scrub.stopping)
3243  Vcb->scrub.chunks_left--;
3244 
3245  if (IsListEmpty(&chunks))
3247 
3249 
3250  c->reloc = FALSE;
3252  }
3253 
3254  KeQuerySystemTime(&time);
3256 
3257 end:
3258  ZwClose(Vcb->scrub.thread);
3259  Vcb->scrub.thread = NULL;
3260 
3261  KeSetEvent(&Vcb->scrub.finished, 0, FALSE);
3262 }
3263 
3265  NTSTATUS Status;
3266 
3267  if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode))
3269 
3270  if (Vcb->locked) {
3271  WARN("cannot start scrub while locked\n");
3272  return STATUS_DEVICE_NOT_READY;
3273  }
3274 
3275  if (Vcb->balance.thread) {
3276  WARN("cannot start scrub while balance running\n");
3277  return STATUS_DEVICE_NOT_READY;
3278  }
3279 
3280  if (Vcb->scrub.thread) {
3281  WARN("scrub already running\n");
3282  return STATUS_DEVICE_NOT_READY;
3283  }
3284 
3285  if (Vcb->readonly)
3287 
3288  Vcb->scrub.stopping = FALSE;
3289  Vcb->scrub.paused = FALSE;
3290  Vcb->scrub.error = STATUS_SUCCESS;
3292 
3293  Status = PsCreateSystemThread(&Vcb->scrub.thread, 0, NULL, NULL, NULL, scrub_thread, Vcb);
3294  if (!NT_SUCCESS(Status)) {
3295  ERR("PsCreateSystemThread returned %08x\n", Status);
3296  return Status;
3297  }
3298 
3299  return STATUS_SUCCESS;
3300 }
3301 
3303  btrfs_query_scrub* bqs = (btrfs_query_scrub*)data;
3304  ULONG len;
3305  NTSTATUS Status;
3306  LIST_ENTRY* le;
3307  btrfs_scrub_error* bse = NULL;
3308 
3309  if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode))
3311 
3312  if (length < offsetof(btrfs_query_scrub, errors))
3313  return STATUS_BUFFER_TOO_SMALL;
3314 
3316 
3317  if (Vcb->scrub.thread && Vcb->scrub.chunks_left > 0)
3319  else
3320  bqs->status = BTRFS_SCRUB_STOPPED;
3321 
3324  bqs->chunks_left = Vcb->scrub.chunks_left;
3325  bqs->total_chunks = Vcb->scrub.total_chunks;
3326  bqs->data_scrubbed = Vcb->scrub.data_scrubbed;
3327 
3328  bqs->duration = Vcb->scrub.duration.QuadPart;
3329 
3330  if (bqs->status == BTRFS_SCRUB_RUNNING) {
3332 
3333  KeQuerySystemTime(&time);
3334  bqs->duration += time.QuadPart - Vcb->scrub.resume_time.QuadPart;
3335  }
3336 
3337  bqs->error = Vcb->scrub.error;
3338 
3339  bqs->num_errors = Vcb->scrub.num_errors;
3340 
3341  len = length - offsetof(btrfs_query_scrub, errors);
3342 
3343  le = Vcb->scrub.errors.Flink;
3344  while (le != &Vcb->scrub.errors) {
3346  ULONG errlen;
3347 
3348  if (err->is_metadata)
3349  errlen = offsetof(btrfs_scrub_error, metadata.firstitem) + sizeof(KEY);
3350  else
3351  errlen = offsetof(btrfs_scrub_error, data.filename) + err->data.filename_length;
3352 
3353  if (len < errlen) {
3354  Status = STATUS_BUFFER_OVERFLOW;
3355  goto end;
3356  }
3357 
3358  if (!bse)
3359  bse = &bqs->errors;
3360  else {
3361  ULONG lastlen;
3362 
3363  if (bse->is_metadata)
3364  lastlen = offsetof(btrfs_scrub_error, metadata.firstitem) + sizeof(KEY);
3365  else
3366  lastlen = offsetof(btrfs_scrub_error, data.filename) + bse->data.filename_length;
3367 
3368  bse->next_entry = lastlen;
3369  bse = (btrfs_scrub_error*)(((UINT8*)bse) + lastlen);
3370  }
3371 
3372  bse->next_entry = 0;
3373  bse->address = err->address;
3374  bse->device = err->device;
3375  bse->recovered = err->recovered;
3376  bse->is_metadata = err->is_metadata;
3377  bse->parity = err->parity;
3378 
3379  if (err->is_metadata) {
3380  bse->metadata.root = err->metadata.root;
3381  bse->metadata.level = err->metadata.level;
3382  bse->metadata.firstitem = err->metadata.firstitem;
3383  } else {
3384  bse->data.subvol = err->data.subvol;
3385  bse->data.offset = err->data.offset;
3386  bse->data.filename_length = err->data.filename_length;
3387  RtlCopyMemory(bse->data.filename, err->data.filename, err->data.filename_length);
3388  }
3389 
3390  len -= errlen;
3391  le = le->Flink;
3392  }
3393 
3394  Status = STATUS_SUCCESS;
3395 
3396 end: