ReactOS  0.4.15-dev-439-g292f67a
scrub.c
Go to the documentation of this file.
1 /* Copyright (c) Mark Harmstone 2017
2  *
3  * This file is part of WinBtrfs.
4  *
5  * WinBtrfs is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public Licence as published by
7  * the Free Software Foundation, either version 3 of the Licence, or
8  * (at your option) any later version.
9  *
10  * WinBtrfs is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser General Public Licence for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public Licence
16  * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
17 
18 #include "btrfs_drv.h"
19 
20 #define SCRUB_UNIT 0x100000 // 1 MB
21 
22 struct _scrub_context;
23 
24 typedef struct {
31  bool csum_error;
32  void* bad_csums;
34 
35 typedef struct _scrub_context {
40 
41 typedef struct {
45 } path_part;
46 
48  LIST_ENTRY *le, parts;
49  root* r = NULL;
50  KEY searchkey;
52  uint64_t dir;
53  bool orig_subvol = true, not_in_tree = false;
57  ULONG utf16len;
58 
59  le = Vcb->roots.Flink;
60  while (le != &Vcb->roots) {
62 
63  if (r2->id == subvol) {
64  r = r2;
65  break;
66  }
67 
68  le = le->Flink;
69  }
70 
71  if (!r) {
72  ERR("could not find subvol %I64x\n", subvol);
73  return;
74  }
75 
77 
78  dir = inode;
79 
80  while (true) {
81  if (dir == r->root_item.objid) {
82  if (r == Vcb->root_fileref->fcb->subvol)
83  break;
84 
85  searchkey.obj_id = r->id;
86  searchkey.obj_type = TYPE_ROOT_BACKREF;
87  searchkey.offset = 0xffffffffffffffff;
88 
89  Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, NULL);
90  if (!NT_SUCCESS(Status)) {
91  ERR("find_item returned %08lx\n", Status);
92  goto end;
93  }
94 
95  if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
96  ROOT_REF* rr = (ROOT_REF*)tp.item->data;
97  path_part* pp;
98 
99  if (tp.item->size < sizeof(ROOT_REF)) {
100  ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(ROOT_REF));
101  goto end;
102  }
103 
104  if (tp.item->size < offsetof(ROOT_REF, name[0]) + rr->n) {
105  ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
106  tp.item->size, offsetof(ROOT_REF, name[0]) + rr->n);
107  goto end;
108  }
109 
111  if (!pp) {
112  ERR("out of memory\n");
113  goto end;
114  }
115 
116  pp->name.Buffer = rr->name;
117  pp->name.Length = pp->name.MaximumLength = rr->n;
118  pp->orig_subvol = false;
119 
120  InsertTailList(&parts, &pp->list_entry);
121 
122  r = NULL;
123 
124  le = Vcb->roots.Flink;
125  while (le != &Vcb->roots) {
127 
128  if (r2->id == tp.item->key.offset) {
129  r = r2;
130  break;
131  }
132 
133  le = le->Flink;
134  }
135 
136  if (!r) {
137  ERR("could not find subvol %I64x\n", tp.item->key.offset);
138  goto end;
139  }
140 
141  dir = rr->dir;
142  orig_subvol = false;
143  } else {
144  not_in_tree = true;
145  break;
146  }
147  } else {
148  searchkey.obj_id = dir;
149  searchkey.obj_type = TYPE_INODE_EXTREF;
150  searchkey.offset = 0xffffffffffffffff;
151 
152  Status = find_item(Vcb, r, &tp, &searchkey, false, NULL);
153  if (!NT_SUCCESS(Status)) {
154  ERR("find_item returned %08lx\n", Status);
155  goto end;
156  }
157 
158  if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == TYPE_INODE_REF) {
159  INODE_REF* ir = (INODE_REF*)tp.item->data;
160  path_part* pp;
161 
162  if (tp.item->size < sizeof(INODE_REF)) {
163  ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(INODE_REF));
164  goto end;
165  }
166 
167  if (tp.item->size < offsetof(INODE_REF, name[0]) + ir->n) {
168  ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
169  tp.item->size, offsetof(INODE_REF, name[0]) + ir->n);
170  goto end;
171  }
172 
174  if (!pp) {
175  ERR("out of memory\n");
176  goto end;
177  }
178 
179  pp->name.Buffer = ir->name;
180  pp->name.Length = pp->name.MaximumLength = ir->n;
181  pp->orig_subvol = orig_subvol;
182 
183  InsertTailList(&parts, &pp->list_entry);
184 
185  if (dir == tp.item->key.offset)
186  break;
187 
188  dir = tp.item->key.offset;
189  } else if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == TYPE_INODE_EXTREF) {
191  path_part* pp;
192 
193  if (tp.item->size < sizeof(INODE_EXTREF)) {
194  ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
195  tp.item->size, sizeof(INODE_EXTREF));
196  goto end;
197  }
198 
199  if (tp.item->size < offsetof(INODE_EXTREF, name[0]) + ier->n) {
200  ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
201  tp.item->size, offsetof(INODE_EXTREF, name[0]) + ier->n);
202  goto end;
203  }
204 
206  if (!pp) {
207  ERR("out of memory\n");
208  goto end;
209  }
210 
211  pp->name.Buffer = ier->name;
212  pp->name.Length = pp->name.MaximumLength = ier->n;
213  pp->orig_subvol = orig_subvol;
214 
215  InsertTailList(&parts, &pp->list_entry);
216 
217  if (dir == ier->dir)
218  break;
219 
220  dir = ier->dir;
221  } else {
222  ERR("could not find INODE_REF for inode %I64x in subvol %I64x\n", dir, r->id);
223  goto end;
224  }
225  }
226  }
227 
228  fn.MaximumLength = 0;
229 
230  if (not_in_tree) {
231  le = parts.Blink;
232  while (le != &parts) {
234  LIST_ENTRY* le2 = le->Blink;
235 
236  if (pp->orig_subvol)
237  break;
238 
240  ExFreePool(pp);
241 
242  le = le2;
243  }
244  }
245 
246  le = parts.Flink;
247  while (le != &parts) {
249 
250  fn.MaximumLength += pp->name.Length + 1;
251 
252  le = le->Flink;
253  }
254 
255  fn.Buffer = ExAllocatePoolWithTag(PagedPool, fn.MaximumLength, ALLOC_TAG);
256  if (!fn.Buffer) {
257  ERR("out of memory\n");
258  goto end;
259  }
260 
261  fn.Length = 0;
262 
263  le = parts.Blink;
264  while (le != &parts) {
266 
267  fn.Buffer[fn.Length] = '\\';
268  fn.Length++;
269 
270  RtlCopyMemory(&fn.Buffer[fn.Length], pp->name.Buffer, pp->name.Length);
271  fn.Length += pp->name.Length;
272 
273  le = le->Blink;
274  }
275 
276  if (not_in_tree)
277  ERR("subvol %I64x, %.*s, offset %I64x\n", subvol, fn.Length, fn.Buffer, offset);
278  else
279  ERR("%.*s, offset %I64x\n", fn.Length, fn.Buffer, offset);
280 
281  Status = utf8_to_utf16(NULL, 0, &utf16len, fn.Buffer, fn.Length);
282  if (!NT_SUCCESS(Status)) {
283  ERR("utf8_to_utf16 1 returned %08lx\n", Status);
284  ExFreePool(fn.Buffer);
285  goto end;
286  }
287 
289  if (!err) {
290  ERR("out of memory\n");
291  ExFreePool(fn.Buffer);
292  goto end;
293  }
294 
295  err->address = addr;
296  err->device = devid;
297  err->recovered = false;
298  err->is_metadata = false;
299  err->parity = false;
300 
301  err->data.subvol = not_in_tree ? subvol : 0;
302  err->data.offset = offset;
303  err->data.filename_length = (uint16_t)utf16len;
304 
305  Status = utf8_to_utf16(err->data.filename, utf16len, &utf16len, fn.Buffer, fn.Length);
306  if (!NT_SUCCESS(Status)) {
307  ERR("utf8_to_utf16 2 returned %08lx\n", Status);
308  ExFreePool(fn.Buffer);
309  ExFreePool(err);
310  goto end;
311  }
312 
313  ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, true);
314 
315  Vcb->scrub.num_errors++;
316  InsertTailList(&Vcb->scrub.errors, &err->list_entry);
317 
318  ExReleaseResourceLite(&Vcb->scrub.stats_lock);
319 
320  ExFreePool(fn.Buffer);
321 
322 end:
323  while (!IsListEmpty(&parts)) {
325 
326  ExFreePool(pp);
327  }
328 }
329 
331  tree_header* tree;
333  leaf_node* ln;
334  ULONG i;
335 
336  tree = ExAllocatePoolWithTag(PagedPool, Vcb->superblock.node_size, ALLOC_TAG);
337  if (!tree) {
338  ERR("out of memory\n");
339  return;
340  }
341 
342  Status = read_data(Vcb, treeaddr, Vcb->superblock.node_size, NULL, true, (uint8_t*)tree, NULL, NULL, NULL, 0, false, NormalPagePriority);
343  if (!NT_SUCCESS(Status)) {
344  ERR("read_data returned %08lx\n", Status);
345  goto end;
346  }
347 
348  if (tree->level != 0) {
349  ERR("tree level was %x, expected 0\n", tree->level);
350  goto end;
351  }
352 
353  ln = (leaf_node*)&tree[1];
354 
355  for (i = 0; i < tree->num_items; i++) {
356  if (ln[i].key.obj_type == TYPE_EXTENT_DATA && ln[i].size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
357  EXTENT_DATA* ed = (EXTENT_DATA*)((uint8_t*)tree + sizeof(tree_header) + ln[i].offset);
359 
360  if (ed->type == EXTENT_TYPE_REGULAR && ed2->size != 0 && ed2->address == addr)
361  log_file_checksum_error(Vcb, addr, devid, tree->tree_id, ln[i].key.obj_id, ln[i].key.offset + addr - extent);
362  }
363  }
364 
365 end:
366  ExFreePool(tree);
367 }
368 
370  scrub_error* err;
371 
373  if (!err) {
374  ERR("out of memory\n");
375  return;
376  }
377 
378  err->address = addr;
379  err->device = devid;
380  err->recovered = false;
381  err->is_metadata = true;
382  err->parity = false;
383 
384  err->metadata.root = root;
385  err->metadata.level = level;
386 
387  if (firstitem) {
388  ERR("root %I64x, level %u, first item (%I64x,%x,%I64x)\n", root, level, firstitem->obj_id,
389  firstitem->obj_type, firstitem->offset);
390 
391  err->metadata.firstitem = *firstitem;
392  } else {
393  ERR("root %I64x, level %u\n", root, level);
394 
395  RtlZeroMemory(&err->metadata.firstitem, sizeof(KEY));
396  }
397 
398  ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, true);
399 
400  Vcb->scrub.num_errors++;
401  InsertTailList(&Vcb->scrub.errors, &err->list_entry);
402 
403  ExReleaseResourceLite(&Vcb->scrub.stats_lock);
404 }
405 
407  tree_header* tree;
409  internal_node* in;
410  ULONG i;
411 
412  tree = ExAllocatePoolWithTag(PagedPool, Vcb->superblock.node_size, ALLOC_TAG);
413  if (!tree) {
414  ERR("out of memory\n");
415  return;
416  }
417 
418  Status = read_data(Vcb, offset, Vcb->superblock.node_size, NULL, true, (uint8_t*)tree, NULL, NULL, NULL, 0, false, NormalPagePriority);
419  if (!NT_SUCCESS(Status)) {
420  ERR("read_data returned %08lx\n", Status);
421  goto end;
422  }
423 
424  if (tree->level == 0) {
425  ERR("tree level was 0\n");
426  goto end;
427  }
428 
429  in = (internal_node*)&tree[1];
430 
431  for (i = 0; i < tree->num_items; i++) {
432  if (in[i].address == address) {
433  log_tree_checksum_error(Vcb, address, devid, tree->tree_id, tree->level - 1, &in[i].key);
434  break;
435  }
436  }
437 
438 end:
439  ExFreePool(tree);
440 }
441 
443  KEY searchkey;
446  EXTENT_ITEM* ei;
447  EXTENT_ITEM2* ei2 = NULL;
448  uint8_t* ptr;
449  ULONG len;
450  uint64_t rc;
451 
452  // FIXME - still log even if rest of this function fails
453 
454  searchkey.obj_id = address;
455  searchkey.obj_type = TYPE_METADATA_ITEM;
456  searchkey.offset = 0xffffffffffffffff;
457 
458  Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, NULL);
459  if (!NT_SUCCESS(Status)) {
460  ERR("find_item returned %08lx\n", Status);
461  return;
462  }
463 
465  tp.item->key.obj_id >= address + Vcb->superblock.sector_size ||
467  (tp.item->key.obj_type == TYPE_METADATA_ITEM && tp.item->key.obj_id + Vcb->superblock.node_size <= address)
468  )
469  return;
470 
471  if (tp.item->size < sizeof(EXTENT_ITEM)) {
472  ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
473  return;
474  }
475 
476  ei = (EXTENT_ITEM*)tp.item->data;
477  ptr = (uint8_t*)&ei[1];
478  len = tp.item->size - sizeof(EXTENT_ITEM);
479 
481  if (tp.item->size < sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2)) {
482  ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
483  tp.item->size, sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2));
484  return;
485  }
486 
487  ei2 = (EXTENT_ITEM2*)ptr;
488 
489  ptr += sizeof(EXTENT_ITEM2);
490  len -= sizeof(EXTENT_ITEM2);
491  }
492 
493  rc = 0;
494 
495  while (len > 0) {
496  uint8_t type = *ptr;
497 
498  ptr++;
499  len--;
500 
501  if (type == TYPE_TREE_BLOCK_REF) {
502  TREE_BLOCK_REF* tbr;
503 
504  if (len < sizeof(TREE_BLOCK_REF)) {
505  ERR("TREE_BLOCK_REF takes up %Iu bytes, but only %lu remaining\n", sizeof(TREE_BLOCK_REF), len);
506  break;
507  }
508 
509  tbr = (TREE_BLOCK_REF*)ptr;
510 
511  log_tree_checksum_error(Vcb, address, devid, tbr->offset, ei2 ? ei2->level : (uint8_t)tp.item->key.offset, ei2 ? &ei2->firstitem : NULL);
512 
513  rc++;
514 
515  ptr += sizeof(TREE_BLOCK_REF);
516  len -= sizeof(TREE_BLOCK_REF);
517  } else if (type == TYPE_EXTENT_DATA_REF) {
518  EXTENT_DATA_REF* edr;
519 
520  if (len < sizeof(EXTENT_DATA_REF)) {
521  ERR("EXTENT_DATA_REF takes up %Iu bytes, but only %lu remaining\n", sizeof(EXTENT_DATA_REF), len);
522  break;
523  }
524 
525  edr = (EXTENT_DATA_REF*)ptr;
526 
527  log_file_checksum_error(Vcb, address, devid, edr->root, edr->objid, edr->offset + address - tp.item->key.obj_id);
528 
529  rc += edr->count;
530 
531  ptr += sizeof(EXTENT_DATA_REF);
532  len -= sizeof(EXTENT_DATA_REF);
533  } else if (type == TYPE_SHARED_BLOCK_REF) {
534  SHARED_BLOCK_REF* sbr;
535 
536  if (len < sizeof(SHARED_BLOCK_REF)) {
537  ERR("SHARED_BLOCK_REF takes up %Iu bytes, but only %lu remaining\n", sizeof(SHARED_BLOCK_REF), len);
538  break;
539  }
540 
541  sbr = (SHARED_BLOCK_REF*)ptr;
542 
544 
545  rc++;
546 
547  ptr += sizeof(SHARED_BLOCK_REF);
548  len -= sizeof(SHARED_BLOCK_REF);
549  } else if (type == TYPE_SHARED_DATA_REF) {
550  SHARED_DATA_REF* sdr;
551 
552  if (len < sizeof(SHARED_DATA_REF)) {
553  ERR("SHARED_DATA_REF takes up %Iu bytes, but only %lu remaining\n", sizeof(SHARED_DATA_REF), len);
554  break;
555  }
556 
557  sdr = (SHARED_DATA_REF*)ptr;
558 
560 
561  rc += sdr->count;
562 
563  ptr += sizeof(SHARED_DATA_REF);
564  len -= sizeof(SHARED_DATA_REF);
565  } else {
566  ERR("unknown extent type %x\n", type);
567  break;
568  }
569  }
570 
571  if (rc < ei->refcount) {
572  do {
573  traverse_ptr next_tp;
574 
575  if (find_next_item(Vcb, &tp, &next_tp, false, NULL))
576  tp = next_tp;
577  else
578  break;
579 
580  if (tp.item->key.obj_id == address) {
582  log_tree_checksum_error(Vcb, address, devid, tp.item->key.offset, ei2 ? ei2->level : (uint8_t)tp.item->key.offset, ei2 ? &ei2->firstitem : NULL);
583  else if (tp.item->key.obj_type == TYPE_EXTENT_DATA_REF) {
584  EXTENT_DATA_REF* edr;
585 
586  if (tp.item->size < sizeof(EXTENT_DATA_REF)) {
587  ERR("(%I64x,%x,%I64x) was %u bytes, expected %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
588  tp.item->size, sizeof(EXTENT_DATA_REF));
589  break;
590  }
591 
592  edr = (EXTENT_DATA_REF*)tp.item->data;
593 
594  log_file_checksum_error(Vcb, address, devid, edr->root, edr->objid, edr->offset + address - tp.item->key.obj_id);
595  } else if (tp.item->key.obj_type == TYPE_SHARED_BLOCK_REF)
597  else if (tp.item->key.obj_type == TYPE_SHARED_DATA_REF)
599  } else
600  break;
601  } while (true);
602  }
603 }
604 
605 static void log_error(device_extension* Vcb, uint64_t addr, uint64_t devid, bool metadata, bool recoverable, bool parity) {
606  if (recoverable) {
607  scrub_error* err;
608 
609  if (parity) {
610  ERR("recovering from parity error at %I64x on device %I64x\n", addr, devid);
611  } else {
612  if (metadata)
613  ERR("recovering from metadata checksum error at %I64x on device %I64x\n", addr, devid);
614  else
615  ERR("recovering from data checksum error at %I64x on device %I64x\n", addr, devid);
616  }
617 
619  if (!err) {
620  ERR("out of memory\n");
621  return;
622  }
623 
624  err->address = addr;
625  err->device = devid;
626  err->recovered = true;
627  err->is_metadata = metadata;
628  err->parity = parity;
629 
630  if (metadata)
631  RtlZeroMemory(&err->metadata, sizeof(err->metadata));
632  else
633  RtlZeroMemory(&err->data, sizeof(err->data));
634 
635  ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, true);
636 
637  Vcb->scrub.num_errors++;
638  InsertTailList(&Vcb->scrub.errors, &err->list_entry);
639 
640  ExReleaseResourceLite(&Vcb->scrub.stats_lock);
641  } else {
642  if (metadata)
643  ERR("unrecoverable metadata checksum error at %I64x\n", addr);
644  else
645  ERR("unrecoverable data checksum error at %I64x\n", addr);
646 
648  }
649 }
650 
651 _Function_class_(IO_COMPLETION_ROUTINE)
652 static NTSTATUS __stdcall scrub_read_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
653  scrub_context_stripe* stripe = conptr;
655  ULONG left = InterlockedDecrement(&context->stripes_left);
656 
658 
659  stripe->iosb = Irp->IoStatus;
660 
661  if (left == 0)
662  KeSetEvent(&context->Event, 0, false);
663 
665 }
666 
669  bool csum_error = false;
670  ULONG i;
671  CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
672  uint16_t present_devices = 0;
673 
674  if (csum) {
675  ULONG good_stripe = 0xffffffff;
676 
677  for (i = 0; i < c->chunk_item->num_stripes; i++) {
678  if (c->devices[i]->devobj) {
679  present_devices++;
680 
681  // if first stripe is okay, we only need to check that the others are identical to it
682  if (good_stripe != 0xffffffff) {
683  if (RtlCompareMemory(context->stripes[i].buf, context->stripes[good_stripe].buf,
684  context->stripes[good_stripe].length) != context->stripes[i].length) {
685  context->stripes[i].csum_error = true;
686  csum_error = true;
688  }
689  } else {
690  Status = check_csum(Vcb, context->stripes[i].buf, context->stripes[i].length / Vcb->superblock.sector_size, csum);
691  if (Status == STATUS_CRC_ERROR) {
692  context->stripes[i].csum_error = true;
693  csum_error = true;
695  } else if (!NT_SUCCESS(Status)) {
696  ERR("check_csum returned %08lx\n", Status);
697  return Status;
698  } else
699  good_stripe = i;
700  }
701  }
702  }
703  } else {
704  ULONG good_stripe = 0xffffffff;
705 
706  for (i = 0; i < c->chunk_item->num_stripes; i++) {
707  ULONG j;
708 
709  if (c->devices[i]->devobj) {
710  // if first stripe is okay, we only need to check that the others are identical to it
711  if (good_stripe != 0xffffffff) {
712  if (RtlCompareMemory(context->stripes[i].buf, context->stripes[good_stripe].buf,
713  context->stripes[good_stripe].length) != context->stripes[i].length) {
714  context->stripes[i].csum_error = true;
715  csum_error = true;
717  }
718  } else {
719  for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) {
720  tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size];
721 
722  if (!check_tree_checksum(Vcb, th) || th->address != offset + UInt32x32To64(j, Vcb->superblock.node_size)) {
723  context->stripes[i].csum_error = true;
724  csum_error = true;
726  }
727  }
728 
729  if (!context->stripes[i].csum_error)
730  good_stripe = i;
731  }
732  }
733  }
734  }
735 
736  if (!csum_error)
737  return STATUS_SUCCESS;
738 
739  // handle checksum error
740 
741  for (i = 0; i < c->chunk_item->num_stripes; i++) {
742  if (context->stripes[i].csum_error) {
743  if (csum) {
744  context->stripes[i].bad_csums = ExAllocatePoolWithTag(PagedPool, context->stripes[i].length * Vcb->csum_size / Vcb->superblock.sector_size, ALLOC_TAG);
745  if (!context->stripes[i].bad_csums) {
746  ERR("out of memory\n");
748  }
749 
750  do_calc_job(Vcb, context->stripes[i].buf, context->stripes[i].length / Vcb->superblock.sector_size, context->stripes[i].bad_csums);
751  } else {
752  ULONG j;
753 
754  context->stripes[i].bad_csums = ExAllocatePoolWithTag(PagedPool, context->stripes[i].length * Vcb->csum_size / Vcb->superblock.node_size, ALLOC_TAG);
755  if (!context->stripes[i].bad_csums) {
756  ERR("out of memory\n");
758  }
759 
760  for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) {
761  tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size];
762 
763  get_tree_checksum(Vcb, th, (uint8_t*)context->stripes[i].bad_csums + (Vcb->csum_size * j));
764  }
765  }
766  }
767  }
768 
769  if (present_devices > 1) {
770  ULONG good_stripe = 0xffffffff;
771 
772  for (i = 0; i < c->chunk_item->num_stripes; i++) {
773  if (c->devices[i]->devobj && !context->stripes[i].csum_error) {
774  good_stripe = i;
775  break;
776  }
777  }
778 
779  if (good_stripe != 0xffffffff) {
780  // log
781 
782  for (i = 0; i < c->chunk_item->num_stripes; i++) {
783  if (context->stripes[i].csum_error) {
784  ULONG j;
785 
786  if (csum) {
787  for (j = 0; j < context->stripes[i].length / Vcb->superblock.sector_size; j++) {
788  if (RtlCompareMemory((uint8_t*)context->stripes[i].bad_csums + (j * Vcb->csum_size), (uint8_t*)csum + (j + Vcb->csum_size), Vcb->csum_size) != Vcb->csum_size) {
789  uint64_t addr = offset + UInt32x32To64(j, Vcb->superblock.sector_size);
790 
791  log_error(Vcb, addr, c->devices[i]->devitem.dev_id, false, true, false);
793  }
794  }
795  } else {
796  for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) {
797  tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size];
798  uint64_t addr = offset + UInt32x32To64(j, Vcb->superblock.node_size);
799 
800  if (RtlCompareMemory((uint8_t*)context->stripes[i].bad_csums + (j * Vcb->csum_size), th, Vcb->csum_size) != Vcb->csum_size || th->address != addr) {
801  log_error(Vcb, addr, c->devices[i]->devitem.dev_id, true, true, false);
803  }
804  }
805  }
806  }
807  }
808 
809  // write good data over bad
810 
811  for (i = 0; i < c->chunk_item->num_stripes; i++) {
812  if (context->stripes[i].csum_error && !c->devices[i]->readonly) {
813  Status = write_data_phys(c->devices[i]->devobj, c->devices[i]->fileobj, cis[i].offset + offset - c->offset,
814  context->stripes[good_stripe].buf, context->stripes[i].length);
815 
816  if (!NT_SUCCESS(Status)) {
817  ERR("write_data_phys returned %08lx\n", Status);
819  return Status;
820  }
821  }
822  }
823 
824  return STATUS_SUCCESS;
825  }
826 
827  // if csum errors on all stripes, check sector by sector
828 
829  for (i = 0; i < c->chunk_item->num_stripes; i++) {
830  ULONG j;
831 
832  if (c->devices[i]->devobj) {
833  if (csum) {
834  for (j = 0; j < context->stripes[i].length / Vcb->superblock.sector_size; j++) {
835  if (RtlCompareMemory((uint8_t*)context->stripes[i].bad_csums + (j * Vcb->csum_size), (uint8_t*)csum + (j * Vcb->csum_size), Vcb->csum_size) != Vcb->csum_size) {
836  ULONG k;
837  uint64_t addr = offset + UInt32x32To64(j, Vcb->superblock.sector_size);
838  bool recovered = false;
839 
840  for (k = 0; k < c->chunk_item->num_stripes; k++) {
841  if (i != k && c->devices[k]->devobj &&
842  RtlCompareMemory((uint8_t*)context->stripes[k].bad_csums + (j * Vcb->csum_size),
843  (uint8_t*)csum + (j * Vcb->csum_size), Vcb->csum_size) == Vcb->csum_size) {
844  log_error(Vcb, addr, c->devices[i]->devitem.dev_id, false, true, false);
846 
847  RtlCopyMemory(context->stripes[i].buf + (j * Vcb->superblock.sector_size),
848  context->stripes[k].buf + (j * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
849 
850  recovered = true;
851  break;
852  }
853  }
854 
855  if (!recovered) {
856  log_error(Vcb, addr, c->devices[i]->devitem.dev_id, false, false, false);
858  }
859  }
860  }
861  } else {
862  for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) {
863  tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size];
864  uint64_t addr = offset + UInt32x32To64(j, Vcb->superblock.node_size);
865 
866  if (RtlCompareMemory((uint8_t*)context->stripes[i].bad_csums + (j * Vcb->csum_size), th, Vcb->csum_size) != Vcb->csum_size || th->address != addr) {
867  ULONG k;
868  bool recovered = false;
869 
870  for (k = 0; k < c->chunk_item->num_stripes; k++) {
871  if (i != k && c->devices[k]->devobj) {
872  tree_header* th2 = (tree_header*)&context->stripes[k].buf[j * Vcb->superblock.node_size];
873 
874  if (RtlCompareMemory((uint8_t*)context->stripes[k].bad_csums + (j * Vcb->csum_size), th2, Vcb->csum_size) == Vcb->csum_size && th2->address == addr) {
875  log_error(Vcb, addr, c->devices[i]->devitem.dev_id, true, true, false);
877 
878  RtlCopyMemory(th, th2, Vcb->superblock.node_size);
879 
880  recovered = true;
881  break;
882  }
883  }
884  }
885 
886  if (!recovered) {
887  log_error(Vcb, addr, c->devices[i]->devitem.dev_id, true, false, false);
889  }
890  }
891  }
892  }
893  }
894  }
895 
896  // write good data over bad
897 
898  for (i = 0; i < c->chunk_item->num_stripes; i++) {
899  if (c->devices[i]->devobj && !c->devices[i]->readonly) {
900  Status = write_data_phys(c->devices[i]->devobj, c->devices[i]->fileobj, cis[i].offset + offset - c->offset,
901  context->stripes[i].buf, context->stripes[i].length);
902  if (!NT_SUCCESS(Status)) {
903  ERR("write_data_phys returned %08lx\n", Status);
905  return Status;
906  }
907  }
908  }
909 
910  return STATUS_SUCCESS;
911  }
912 
913  for (i = 0; i < c->chunk_item->num_stripes; i++) {
914  if (c->devices[i]->devobj) {
915  ULONG j;
916 
917  if (csum) {
918  for (j = 0; j < context->stripes[i].length / Vcb->superblock.sector_size; j++) {
919  if (RtlCompareMemory((uint8_t*)context->stripes[i].bad_csums + (j * Vcb->csum_size), (uint8_t*)csum + (j + Vcb->csum_size), Vcb->csum_size) != Vcb->csum_size) {
920  uint64_t addr = offset + UInt32x32To64(j, Vcb->superblock.sector_size);
921 
922  log_error(Vcb, addr, c->devices[i]->devitem.dev_id, false, false, false);
923  }
924  }
925  } else {
926  for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) {
927  tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size];
928  uint64_t addr = offset + UInt32x32To64(j, Vcb->superblock.node_size);
929 
930  if (RtlCompareMemory((uint8_t*)context->stripes[i].bad_csums + (j * Vcb->csum_size), th, Vcb->csum_size) != Vcb->csum_size || th->address != addr)
931  log_error(Vcb, addr, c->devices[i]->devitem.dev_id, true, false, false);
932  }
933  }
934  }
935  }
936 
937  return STATUS_SUCCESS;
938 }
939 
941  ULONG j;
943  uint32_t pos, *stripeoff;
944 
945  pos = 0;
946  stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * c->chunk_item->num_stripes, ALLOC_TAG);
947  if (!stripeoff) {
948  ERR("out of memory\n");
950  }
951 
952  RtlZeroMemory(stripeoff, sizeof(uint32_t) * c->chunk_item->num_stripes);
953 
954  stripe = startoffstripe;
955  while (pos < length) {
956  uint32_t readlen;
957 
958  if (pos == 0)
959  readlen = (uint32_t)min(context->stripes[stripe].length, c->chunk_item->stripe_length - (context->stripes[stripe].start % c->chunk_item->stripe_length));
960  else
961  readlen = min(length - pos, (uint32_t)c->chunk_item->stripe_length);
962 
963  if (csum) {
964  for (j = 0; j < readlen; j += Vcb->superblock.sector_size) {
965  if (!check_sector_csum(Vcb, context->stripes[stripe].buf + stripeoff[stripe], (uint8_t*)csum + (pos * Vcb->csum_size / Vcb->superblock.sector_size))) {
966  uint64_t addr = offset + pos;
967 
968  log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, false, false, false);
970  }
971 
972  pos += Vcb->superblock.sector_size;
973  stripeoff[stripe] += Vcb->superblock.sector_size;
974  }
975  } else {
976  for (j = 0; j < readlen; j += Vcb->superblock.node_size) {
977  tree_header* th = (tree_header*)(context->stripes[stripe].buf + stripeoff[stripe]);
978  uint64_t addr = offset + pos;
979 
980  if (!check_tree_checksum(Vcb, th) || th->address != addr) {
981  log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, true, false, false);
983  }
984 
985  pos += Vcb->superblock.node_size;
986  stripeoff[stripe] += Vcb->superblock.node_size;
987  }
988  }
989 
990  stripe = (stripe + 1) % c->chunk_item->num_stripes;
991  }
992 
993  ExFreePool(stripeoff);
994 
995  return STATUS_SUCCESS;
996 }
997 
999  ULONG j;
1000  uint16_t stripe, sub_stripes = max(c->chunk_item->sub_stripes, 1);
1001  uint32_t pos, *stripeoff;
1002  bool csum_error = false;
1003  NTSTATUS Status;
1004 
1005  pos = 0;
1006  stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * c->chunk_item->num_stripes / sub_stripes, ALLOC_TAG);
1007  if (!stripeoff) {
1008  ERR("out of memory\n");
1010  }
1011 
1012  RtlZeroMemory(stripeoff, sizeof(uint32_t) * c->chunk_item->num_stripes / sub_stripes);
1013 
1014  stripe = startoffstripe;
1015  while (pos < length) {
1016  uint32_t readlen;
1017 
1018  if (pos == 0)
1019  readlen = (uint32_t)min(context->stripes[stripe * sub_stripes].length,
1020  c->chunk_item->stripe_length - (context->stripes[stripe * sub_stripes].start % c->chunk_item->stripe_length));
1021  else
1022  readlen = min(length - pos, (uint32_t)c->chunk_item->stripe_length);
1023 
1024  if (csum) {
1025  ULONG good_stripe = 0xffffffff;
1026  uint16_t k;
1027 
1028  for (k = 0; k < sub_stripes; k++) {
1029  if (c->devices[(stripe * sub_stripes) + k]->devobj) {
1030  // if first stripe is okay, we only need to check that the others are identical to it
1031  if (good_stripe != 0xffffffff) {
1032  if (RtlCompareMemory(context->stripes[(stripe * sub_stripes) + k].buf + stripeoff[stripe],
1033  context->stripes[(stripe * sub_stripes) + good_stripe].buf + stripeoff[stripe],
1034  readlen) != readlen) {
1035  context->stripes[(stripe * sub_stripes) + k].csum_error = true;
1036  csum_error = true;
1037  log_device_error(Vcb, c->devices[(stripe * sub_stripes) + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1038  }
1039  } else {
1040  for (j = 0; j < readlen; j += Vcb->superblock.sector_size) {
1041  if (!check_sector_csum(Vcb, context->stripes[(stripe * sub_stripes) + k].buf + stripeoff[stripe] + j,
1042  (uint8_t*)csum + ((pos + j) * Vcb->csum_size / Vcb->superblock.sector_size))) {
1043  csum_error = true;
1044  context->stripes[(stripe * sub_stripes) + k].csum_error = true;
1045  log_device_error(Vcb, c->devices[(stripe * sub_stripes) + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1046  break;
1047  }
1048  }
1049 
1050  if (!context->stripes[(stripe * sub_stripes) + k].csum_error)
1051  good_stripe = k;
1052  }
1053  }
1054  }
1055 
1056  pos += readlen;
1057  stripeoff[stripe] += readlen;
1058  } else {
1059  ULONG good_stripe = 0xffffffff;
1060  uint16_t k;
1061 
1062  for (k = 0; k < sub_stripes; k++) {
1063  if (c->devices[(stripe * sub_stripes) + k]->devobj) {
1064  // if first stripe is okay, we only need to check that the others are identical to it
1065  if (good_stripe != 0xffffffff) {
1066  if (RtlCompareMemory(context->stripes[(stripe * sub_stripes) + k].buf + stripeoff[stripe],
1067  context->stripes[(stripe * sub_stripes) + good_stripe].buf + stripeoff[stripe],
1068  readlen) != readlen) {
1069  context->stripes[(stripe * sub_stripes) + k].csum_error = true;
1070  csum_error = true;
1071  log_device_error(Vcb, c->devices[(stripe * sub_stripes) + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1072  }
1073  } else {
1074  for (j = 0; j < readlen; j += Vcb->superblock.node_size) {
1075  tree_header* th = (tree_header*)(context->stripes[(stripe * sub_stripes) + k].buf + stripeoff[stripe] + j);
1076  uint64_t addr = offset + pos + j;
1077 
1078  if (!check_tree_checksum(Vcb, th) || th->address != addr) {
1079  csum_error = true;
1080  context->stripes[(stripe * sub_stripes) + k].csum_error = true;
1081  log_device_error(Vcb, c->devices[(stripe * sub_stripes) + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1082  break;
1083  }
1084  }
1085 
1086  if (!context->stripes[(stripe * sub_stripes) + k].csum_error)
1087  good_stripe = k;
1088  }
1089  }
1090  }
1091 
1092  pos += readlen;
1093  stripeoff[stripe] += readlen;
1094  }
1095 
1096  stripe = (stripe + 1) % (c->chunk_item->num_stripes / sub_stripes);
1097  }
1098 
1099  if (!csum_error) {
1101  goto end;
1102  }
1103 
1104  for (j = 0; j < c->chunk_item->num_stripes; j += sub_stripes) {
1105  ULONG goodstripe = 0xffffffff;
1106  uint16_t k;
1107  bool hasbadstripe = false;
1108 
1109  if (context->stripes[j].length == 0)
1110  continue;
1111 
1112  for (k = 0; k < sub_stripes; k++) {
1113  if (c->devices[j + k]->devobj) {
1114  if (!context->stripes[j + k].csum_error)
1115  goodstripe = k;
1116  else
1117  hasbadstripe = true;
1118  }
1119  }
1120 
1121  if (hasbadstripe) {
1122  if (goodstripe != 0xffffffff) {
1123  for (k = 0; k < sub_stripes; k++) {
1124  if (c->devices[j + k]->devobj && context->stripes[j + k].csum_error) {
1125  uint32_t so = 0;
1126  bool recovered = false;
1127 
1128  pos = 0;
1129 
1130  stripe = startoffstripe;
1131  while (pos < length) {
1132  uint32_t readlen;
1133 
1134  if (pos == 0)
1135  readlen = (uint32_t)min(context->stripes[stripe * sub_stripes].length,
1136  c->chunk_item->stripe_length - (context->stripes[stripe * sub_stripes].start % c->chunk_item->stripe_length));
1137  else
1138  readlen = min(length - pos, (uint32_t)c->chunk_item->stripe_length);
1139 
1140  if (stripe == j / sub_stripes) {
1141  if (csum) {
1142  ULONG l;
1143 
1144  for (l = 0; l < readlen; l += Vcb->superblock.sector_size) {
1145  if (RtlCompareMemory(context->stripes[j + k].buf + so,
1146  context->stripes[j + goodstripe].buf + so,
1147  Vcb->superblock.sector_size) != Vcb->superblock.sector_size) {
1148  uint64_t addr = offset + pos;
1149 
1150  log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, false, true, false);
1151 
1152  recovered = true;
1153  }
1154 
1155  pos += Vcb->superblock.sector_size;
1156  so += Vcb->superblock.sector_size;
1157  }
1158  } else {
1159  ULONG l;
1160 
1161  for (l = 0; l < readlen; l += Vcb->superblock.node_size) {
1162  if (RtlCompareMemory(context->stripes[j + k].buf + so,
1163  context->stripes[j + goodstripe].buf + so,
1164  Vcb->superblock.node_size) != Vcb->superblock.node_size) {
1165  uint64_t addr = offset + pos;
1166 
1167  log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, true, true, false);
1168 
1169  recovered = true;
1170  }
1171 
1172  pos += Vcb->superblock.node_size;
1173  so += Vcb->superblock.node_size;
1174  }
1175  }
1176  } else
1177  pos += readlen;
1178 
1179  stripe = (stripe + 1) % (c->chunk_item->num_stripes / sub_stripes);
1180  }
1181 
1182  if (recovered) {
1183  // write good data over bad
1184 
1185  if (!c->devices[j + k]->readonly) {
1186  CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
1187 
1188  Status = write_data_phys(c->devices[j + k]->devobj, c->devices[j + k]->fileobj, cis[j + k].offset + offset - c->offset,
1189  context->stripes[j + goodstripe].buf, context->stripes[j + goodstripe].length);
1190 
1191  if (!NT_SUCCESS(Status)) {
1192  ERR("write_data_phys returned %08lx\n", Status);
1194  goto end;
1195  }
1196  }
1197  }
1198  }
1199  }
1200  } else {
1201  uint32_t so = 0;
1202  bool recovered = false;
1203 
1204  if (csum) {
1205  for (k = 0; k < sub_stripes; k++) {
1206  if (c->devices[j + k]->devobj) {
1207  context->stripes[j + k].bad_csums = ExAllocatePoolWithTag(PagedPool, context->stripes[j + k].length * Vcb->csum_size / Vcb->superblock.sector_size,
1208  ALLOC_TAG);
1209  if (!context->stripes[j + k].bad_csums) {
1210  ERR("out of memory\n");
1212  goto end;
1213  }
1214 
1215  do_calc_job(Vcb, context->stripes[j + k].buf, context->stripes[j + k].length / Vcb->superblock.sector_size, context->stripes[j + k].bad_csums);
1216  }
1217  }
1218  } else {
1219  for (k = 0; k < sub_stripes; k++) {
1220  if (c->devices[j + k]->devobj) {
1221  ULONG l;
1222 
1223  context->stripes[j + k].bad_csums = ExAllocatePoolWithTag(PagedPool, context->stripes[j + k].length * Vcb->csum_size / Vcb->superblock.node_size,
1224  ALLOC_TAG);
1225  if (!context->stripes[j + k].bad_csums) {
1226  ERR("out of memory\n");
1228  goto end;
1229  }
1230 
1231  for (l = 0; l < context->stripes[j + k].length / Vcb->superblock.node_size; l++) {
1232  tree_header* th = (tree_header*)&context->stripes[j + k].buf[l * Vcb->superblock.node_size];
1233 
1234  get_tree_checksum(Vcb, th, (uint8_t*)context->stripes[j + k].bad_csums + (Vcb->csum_size * l));
1235  }
1236  }
1237  }
1238  }
1239 
1240  pos = 0;
1241 
1242  stripe = startoffstripe;
1243  while (pos < length) {
1244  uint32_t readlen;
1245 
1246  if (pos == 0)
1247  readlen = (uint32_t)min(context->stripes[stripe * sub_stripes].length,
1248  c->chunk_item->stripe_length - (context->stripes[stripe * sub_stripes].start % c->chunk_item->stripe_length));
1249  else
1250  readlen = min(length - pos, (uint32_t)c->chunk_item->stripe_length);
1251 
1252  if (stripe == j / sub_stripes) {
1253  ULONG l;
1254 
1255  if (csum) {
1256  for (l = 0; l < readlen; l += Vcb->superblock.sector_size) {
1257  bool has_error = false;
1258 
1259  goodstripe = 0xffffffff;
1260  for (k = 0; k < sub_stripes; k++) {
1261  if (c->devices[j + k]->devobj) {
1262  if (RtlCompareMemory((uint8_t*)context->stripes[j + k].bad_csums + (so * Vcb->csum_size / Vcb->superblock.sector_size),
1263  (uint8_t*)csum + (pos * Vcb->csum_size / Vcb->superblock.sector_size),
1264  Vcb->csum_size) != Vcb->csum_size) {
1265  has_error = true;
1266  } else
1267  goodstripe = k;
1268  }
1269  }
1270 
1271  if (has_error) {
1272  if (goodstripe != 0xffffffff) {
1273  for (k = 0; k < sub_stripes; k++) {
1274  if (c->devices[j + k]->devobj &&
1275  RtlCompareMemory((uint8_t*)context->stripes[j + k].bad_csums + (so * Vcb->csum_size / Vcb->superblock.sector_size),
1276  (uint8_t*)csum + (pos * Vcb->csum_size / Vcb->superblock.sector_size),
1277  Vcb->csum_size) != Vcb->csum_size) {
1278  uint64_t addr = offset + pos;
1279 
1280  log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, false, true, false);
1281 
1282  recovered = true;
1283 
1284  RtlCopyMemory(context->stripes[j + k].buf + so, context->stripes[j + goodstripe].buf + so,
1285  Vcb->superblock.sector_size);
1286  }
1287  }
1288  } else {
1289  uint64_t addr = offset + pos;
1290 
1291  for (k = 0; k < sub_stripes; k++) {
1292  if (c->devices[j + j]->devobj) {
1293  log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, false, false, false);
1295  }
1296  }
1297  }
1298  }
1299 
1300  pos += Vcb->superblock.sector_size;
1301  so += Vcb->superblock.sector_size;
1302  }
1303  } else {
1304  for (l = 0; l < readlen; l += Vcb->superblock.node_size) {
1305  for (k = 0; k < sub_stripes; k++) {
1306  if (c->devices[j + k]->devobj) {
1307  tree_header* th = (tree_header*)&context->stripes[j + k].buf[so];
1308  uint64_t addr = offset + pos;
1309 
1310  if (RtlCompareMemory((uint8_t*)context->stripes[j + k].bad_csums + (so * Vcb->csum_size / Vcb->superblock.node_size), th, Vcb->csum_size) != Vcb->csum_size || th->address != addr) {
1311  ULONG m;
1312 
1313  recovered = false;
1314 
1315  for (m = 0; m < sub_stripes; m++) {
1316  if (m != k) {
1317  tree_header* th2 = (tree_header*)&context->stripes[j + m].buf[so];
1318 
1319  if (RtlCompareMemory((uint8_t*)context->stripes[j + m].bad_csums + (so * Vcb->csum_size / Vcb->superblock.node_size), th2, Vcb->csum_size) == Vcb->csum_size && th2->address == addr) {
1320  log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, true, true, false);
1321 
1322  RtlCopyMemory(th, th2, Vcb->superblock.node_size);
1323 
1324  recovered = true;
1325  break;
1326  } else
1328  }
1329  }
1330 
1331  if (!recovered)
1332  log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, true, false, false);
1333  }
1334  }
1335  }
1336 
1337  pos += Vcb->superblock.node_size;
1338  so += Vcb->superblock.node_size;
1339  }
1340  }
1341  } else
1342  pos += readlen;
1343 
1344  stripe = (stripe + 1) % (c->chunk_item->num_stripes / sub_stripes);
1345  }
1346 
1347  if (recovered) {
1348  // write good data over bad
1349 
1350  for (k = 0; k < sub_stripes; k++) {
1351  if (c->devices[j + k]->devobj && !c->devices[j + k]->readonly) {
1352  CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
1353 
1354  Status = write_data_phys(c->devices[j + k]->devobj, c->devices[j + k]->fileobj, cis[j + k].offset + offset - c->offset,
1355  context->stripes[j + k].buf, context->stripes[j + k].length);
1356 
1357  if (!NT_SUCCESS(Status)) {
1358  ERR("write_data_phys returned %08lx\n", Status);
1360  goto end;
1361  }
1362  }
1363  }
1364  }
1365  }
1366  }
1367  }
1368 
1370 
1371 end:
1372  ExFreePool(stripeoff);
1373 
1374  return Status;
1375 }
1376 
1378  ULONG i;
1380  CHUNK_ITEM_STRIPE* cis;
1381  NTSTATUS Status;
1382  uint16_t startoffstripe, num_missing, allowed_missing;
1383 
1384  TRACE("(%p, %p, %lx, %I64x, %x, %p)\n", Vcb, c, type, offset, size, csum);
1385 
1386  context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(scrub_context_stripe) * c->chunk_item->num_stripes, ALLOC_TAG);
1387  if (!context.stripes) {
1388  ERR("out of memory\n");
1390  goto end;
1391  }
1392 
1393  RtlZeroMemory(context.stripes, sizeof(scrub_context_stripe) * c->chunk_item->num_stripes);
1394 
1395  context.stripes_left = 0;
1396 
1397  cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
1398 
1399  if (type == BLOCK_FLAG_RAID0) {
1400  uint64_t startoff, endoff;
1401  uint16_t endoffstripe;
1402 
1403  get_raid0_offset(offset - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &startoff, &startoffstripe);
1404  get_raid0_offset(offset + size - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &endoff, &endoffstripe);
1405 
1406  for (i = 0; i < c->chunk_item->num_stripes; i++) {
1407  if (startoffstripe > i)
1408  context.stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1409  else if (startoffstripe == i)
1410  context.stripes[i].start = startoff;
1411  else
1412  context.stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length);
1413 
1414  if (endoffstripe > i)
1415  context.stripes[i].length = (uint32_t)(endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length - context.stripes[i].start);
1416  else if (endoffstripe == i)
1417  context.stripes[i].length = (uint32_t)(endoff + 1 - context.stripes[i].start);
1418  else
1419  context.stripes[i].length = (uint32_t)(endoff - (endoff % c->chunk_item->stripe_length) - context.stripes[i].start);
1420  }
1421 
1422  allowed_missing = 0;
1423  } else if (type == BLOCK_FLAG_RAID10) {
1424  uint64_t startoff, endoff;
1425  uint16_t endoffstripe, j, sub_stripes = max(c->chunk_item->sub_stripes, 1);
1426 
1427  get_raid0_offset(offset - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes / sub_stripes, &startoff, &startoffstripe);
1428  get_raid0_offset(offset + size - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes / sub_stripes, &endoff, &endoffstripe);
1429 
1430  if ((c->chunk_item->num_stripes % sub_stripes) != 0) {
1431  ERR("chunk %I64x: num_stripes %x was not a multiple of sub_stripes %x!\n", c->offset, c->chunk_item->num_stripes, sub_stripes);
1433  goto end;
1434  }
1435 
1436  startoffstripe *= sub_stripes;
1437  endoffstripe *= sub_stripes;
1438 
1439  for (i = 0; i < c->chunk_item->num_stripes; i += sub_stripes) {
1440  if (startoffstripe > i)
1441  context.stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1442  else if (startoffstripe == i)
1443  context.stripes[i].start = startoff;
1444  else
1445  context.stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length);
1446 
1447  if (endoffstripe > i)
1448  context.stripes[i].length = (uint32_t)(endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length - context.stripes[i].start);
1449  else if (endoffstripe == i)
1450  context.stripes[i].length = (uint32_t)(endoff + 1 - context.stripes[i].start);
1451  else
1452  context.stripes[i].length = (uint32_t)(endoff - (endoff % c->chunk_item->stripe_length) - context.stripes[i].start);
1453 
1454  for (j = 1; j < sub_stripes; j++) {
1455  context.stripes[i+j].start = context.stripes[i].start;
1456  context.stripes[i+j].length = context.stripes[i].length;
1457  }
1458  }
1459 
1460  startoffstripe /= sub_stripes;
1461  allowed_missing = 1;
1462  } else
1463  allowed_missing = c->chunk_item->num_stripes - 1;
1464 
1465  num_missing = 0;
1466 
1467  for (i = 0; i < c->chunk_item->num_stripes; i++) {
1469 
1470  context.stripes[i].context = (struct _scrub_context*)&context;
1471 
1472  if (type == BLOCK_FLAG_DUPLICATE) {
1473  context.stripes[i].start = offset - c->offset;
1474  context.stripes[i].length = size;
1475  } else if (type != BLOCK_FLAG_RAID0 && type != BLOCK_FLAG_RAID10) {
1476  ERR("unexpected chunk type %lx\n", type);
1478  goto end;
1479  }
1480 
1481  if (!c->devices[i]->devobj) {
1482  num_missing++;
1483 
1484  if (num_missing > allowed_missing) {
1485  ERR("too many missing devices (at least %u, maximum allowed %u)\n", num_missing, allowed_missing);
1487  goto end;
1488  }
1489  } else if (context.stripes[i].length > 0) {
1490  context.stripes[i].buf = ExAllocatePoolWithTag(NonPagedPool, context.stripes[i].length, ALLOC_TAG);
1491 
1492  if (!context.stripes[i].buf) {
1493  ERR("out of memory\n");
1495  goto end;
1496  }
1497 
1498  context.stripes[i].Irp = IoAllocateIrp(c->devices[i]->devobj->StackSize, false);
1499 
1500  if (!context.stripes[i].Irp) {
1501  ERR("IoAllocateIrp failed\n");
1503  goto end;
1504  }
1505 
1506  IrpSp = IoGetNextIrpStackLocation(context.stripes[i].Irp);
1508  IrpSp->FileObject = c->devices[i]->fileobj;
1509 
1510  if (c->devices[i]->devobj->Flags & DO_BUFFERED_IO) {
1511  context.stripes[i].Irp->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, context.stripes[i].length, ALLOC_TAG);
1512  if (!context.stripes[i].Irp->AssociatedIrp.SystemBuffer) {
1513  ERR("out of memory\n");
1515  goto end;
1516  }
1517 
1519 
1520  context.stripes[i].Irp->UserBuffer = context.stripes[i].buf;
1521  } else if (c->devices[i]->devobj->Flags & DO_DIRECT_IO) {
1522  context.stripes[i].Irp->MdlAddress = IoAllocateMdl(context.stripes[i].buf, context.stripes[i].length, false, false, NULL);
1523  if (!context.stripes[i].Irp->MdlAddress) {
1524  ERR("IoAllocateMdl failed\n");
1526  goto end;
1527  }
1528 
1530 
1531  _SEH2_TRY {
1532  MmProbeAndLockPages(context.stripes[i].Irp->MdlAddress, KernelMode, IoWriteAccess);
1535  } _SEH2_END;
1536 
1537  if (!NT_SUCCESS(Status)) {
1538  ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
1539  IoFreeMdl(context.stripes[i].Irp->MdlAddress);
1540  context.stripes[i].Irp->MdlAddress = NULL;
1541  goto end;
1542  }
1543  } else
1544  context.stripes[i].Irp->UserBuffer = context.stripes[i].buf;
1545 
1546  IrpSp->Parameters.Read.Length = context.stripes[i].length;
1547  IrpSp->Parameters.Read.ByteOffset.QuadPart = context.stripes[i].start + cis[i].offset;
1548 
1549  context.stripes[i].Irp->UserIosb = &context.stripes[i].iosb;
1550 
1551  IoSetCompletionRoutine(context.stripes[i].Irp, scrub_read_completion, &context.stripes[i], true, true, true);
1552 
1553  context.stripes_left++;
1554 
1555  Vcb->scrub.data_scrubbed += context.stripes[i].length;
1556  }
1557  }
1558 
1559  if (context.stripes_left == 0) {
1560  ERR("error - not reading any stripes\n");
1562  goto end;
1563  }
1564 
1566 
1567  for (i = 0; i < c->chunk_item->num_stripes; i++) {
1568  if (c->devices[i]->devobj && context.stripes[i].length > 0)
1569  IoCallDriver(c->devices[i]->devobj, context.stripes[i].Irp);
1570  }
1571 
1573 
1574  // return an error if any of the stripes returned an error
1575  for (i = 0; i < c->chunk_item->num_stripes; i++) {
1576  if (!NT_SUCCESS(context.stripes[i].iosb.Status)) {
1577  Status = context.stripes[i].iosb.Status;
1579  goto end;
1580  }
1581  }
1582 
1583  if (type == BLOCK_FLAG_DUPLICATE) {
1585  if (!NT_SUCCESS(Status)) {
1586  ERR("scrub_extent_dup returned %08lx\n", Status);
1587  goto end;
1588  }
1589  } else if (type == BLOCK_FLAG_RAID0) {
1590  Status = scrub_extent_raid0(Vcb, c, offset, size, startoffstripe, csum, &context);
1591  if (!NT_SUCCESS(Status)) {
1592  ERR("scrub_extent_raid0 returned %08lx\n", Status);
1593  goto end;
1594  }
1595  } else if (type == BLOCK_FLAG_RAID10) {
1596  Status = scrub_extent_raid10(Vcb, c, offset, size, startoffstripe, csum, &context);
1597  if (!NT_SUCCESS(Status)) {
1598  ERR("scrub_extent_raid10 returned %08lx\n", Status);
1599  goto end;
1600  }
1601  }
1602 
1603 end:
1604  if (context.stripes) {
1605  for (i = 0; i < c->chunk_item->num_stripes; i++) {
1606  if (context.stripes[i].Irp) {
1607  if (c->devices[i]->devobj->Flags & DO_DIRECT_IO && context.stripes[i].Irp->MdlAddress) {
1608  MmUnlockPages(context.stripes[i].Irp->MdlAddress);
1609  IoFreeMdl(context.stripes[i].Irp->MdlAddress);
1610  }
1611  IoFreeIrp(context.stripes[i].Irp);
1612  }
1613 
1614  if (context.stripes[i].buf)
1615  ExFreePool(context.stripes[i].buf);
1616 
1617  if (context.stripes[i].bad_csums)
1618  ExFreePool(context.stripes[i].bad_csums);
1619  }
1620 
1621  ExFreePool(context.stripes);
1622  }
1623 
1624  return Status;
1625 }
1626 
1628  NTSTATUS Status;
1629  ULONG runlength, index;
1630 
1631  runlength = RtlFindFirstRunClear(bmp, &index);
1632 
1633  while (runlength != 0) {
1634  if (index >= bmplen)
1635  break;
1636 
1637  if (index + runlength >= bmplen) {
1638  runlength = bmplen - index;
1639 
1640  if (runlength == 0)
1641  break;
1642  }
1643 
1644  do {
1645  ULONG rl;
1646 
1647  if (runlength * Vcb->superblock.sector_size > SCRUB_UNIT)
1648  rl = SCRUB_UNIT / Vcb->superblock.sector_size;
1649  else
1650  rl = runlength;
1651 
1652  Status = scrub_extent(Vcb, c, type, offset + UInt32x32To64(index, Vcb->superblock.sector_size),
1653  rl * Vcb->superblock.sector_size, (uint8_t*)csum + (index * Vcb->csum_size));
1654  if (!NT_SUCCESS(Status)) {
1655  ERR("scrub_data_extent_dup returned %08lx\n", Status);
1656  return Status;
1657  }
1658 
1659  runlength -= rl;
1660  index += rl;
1661  } while (runlength > 0);
1662 
1663  runlength = RtlFindNextForwardRunClear(bmp, index, &index);
1664  }
1665 
1666  return STATUS_SUCCESS;
1667 }
1668 
1669 typedef struct {
1672  void* context;
1675  bool rewrite, missing;
1679 
1680 typedef struct {
1687  void* csum;
1691 
1692 _Function_class_(IO_COMPLETION_ROUTINE)
1693 static NTSTATUS __stdcall scrub_read_completion_raid56(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
1696  LONG left = InterlockedDecrement(&context->stripes_left);
1697 
1699 
1700  stripe->iosb = Irp->IoStatus;
1701 
1702  if (left == 0)
1703  KeSetEvent(&context->Event, 0, false);
1704 
1706 }
1707 
1709  uint64_t num, uint16_t missing_devices) {
1710  ULONG sectors_per_stripe = (ULONG)(c->chunk_item->stripe_length / Vcb->superblock.sector_size), i, off;
1711  uint16_t stripe, parity = (bit_start + num + c->chunk_item->num_stripes - 1) % c->chunk_item->num_stripes;
1712  uint64_t stripeoff;
1713 
1714  stripe = (parity + 1) % c->chunk_item->num_stripes;
1715  off = (ULONG)(bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 1);
1716  stripeoff = num * sectors_per_stripe;
1717 
1718  if (missing_devices == 0)
1719  RtlCopyMemory(context->parity_scratch, &context->stripes[parity].buf[num * c->chunk_item->stripe_length], (ULONG)c->chunk_item->stripe_length);
1720 
1721  while (stripe != parity) {
1722  RtlClearAllBits(&context->stripes[stripe].error);
1723 
1724  for (i = 0; i < sectors_per_stripe; i++) {
1725  if (c->devices[stripe]->devobj && RtlCheckBit(&context->alloc, off)) {
1726  if (RtlCheckBit(&context->is_tree, off)) {
1727  tree_header* th = (tree_header*)&context->stripes[stripe].buf[stripeoff * Vcb->superblock.sector_size];
1728  uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size);
1729 
1730  if (!check_tree_checksum(Vcb, th) || th->address != addr) {
1731  RtlSetBits(&context->stripes[stripe].error, i, Vcb->superblock.node_size / Vcb->superblock.sector_size);
1733 
1734  if (missing_devices > 0)
1735  log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, true, false, false);
1736  }
1737 
1738  off += Vcb->superblock.node_size / Vcb->superblock.sector_size;
1739  stripeoff += Vcb->superblock.node_size / Vcb->superblock.sector_size;
1740  i += (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1;
1741 
1742  continue;
1743  } else if (RtlCheckBit(&context->has_csum, off)) {
1744  if (!check_sector_csum(Vcb, context->stripes[stripe].buf + (stripeoff * Vcb->superblock.sector_size), (uint8_t*)context->csum + (Vcb->csum_size * off))) {
1745  RtlSetBit(&context->stripes[stripe].error, i);
1747 
1748  if (missing_devices > 0) {
1749  uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size);
1750 
1751  log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, false, false, false);
1752  }
1753  }
1754  }
1755  }
1756 
1757  off++;
1758  stripeoff++;
1759  }
1760 
1761  if (missing_devices == 0)
1762  do_xor(context->parity_scratch, &context->stripes[stripe].buf[num * c->chunk_item->stripe_length], (ULONG)c->chunk_item->stripe_length);
1763 
1764  stripe = (stripe + 1) % c->chunk_item->num_stripes;
1765  stripeoff = num * sectors_per_stripe;
1766  }
1767 
1768  // check parity
1769 
1770  if (missing_devices == 0) {
1771  RtlClearAllBits(&context->stripes[parity].error);
1772 
1773  for (i = 0; i < sectors_per_stripe; i++) {
1774  ULONG o, j;
1775 
1776  o = i * Vcb->superblock.sector_size;
1777  for (j = 0; j < Vcb->superblock.sector_size; j++) { // FIXME - use SSE
1778  if (context->parity_scratch[o] != 0) {
1779  RtlSetBit(&context->stripes[parity].error, i);
1780  break;
1781  }
1782  o++;
1783  }
1784  }
1785  }
1786 
1787  // log and fix errors
1788 
1789  if (missing_devices > 0)
1790  return;
1791 
1792  for (i = 0; i < sectors_per_stripe; i++) {
1793  ULONG num_errors = 0, bad_off;
1794  uint64_t bad_stripe;
1795  bool alloc = false;
1796 
1797  stripe = (parity + 1) % c->chunk_item->num_stripes;
1798  off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 1)) + i;
1799 
1800  while (stripe != parity) {
1801  if (RtlCheckBit(&context->alloc, off)) {
1802  alloc = true;
1803 
1804  if (RtlCheckBit(&context->stripes[stripe].error, i)) {
1805  bad_stripe = stripe;
1806  bad_off = off;
1807  num_errors++;
1808  }
1809  }
1810 
1811  off += sectors_per_stripe;
1812  stripe = (stripe + 1) % c->chunk_item->num_stripes;
1813  }
1814 
1815  if (!alloc)
1816  continue;
1817 
1818  if (num_errors == 0 && !RtlCheckBit(&context->stripes[parity].error, i)) // everything fine
1819  continue;
1820 
1821  if (num_errors == 0 && RtlCheckBit(&context->stripes[parity].error, i)) { // parity error
1822  uint64_t addr;
1823 
1824  do_xor(&context->stripes[parity].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
1825  &context->parity_scratch[i * Vcb->superblock.sector_size],
1826  Vcb->superblock.sector_size);
1827 
1828  bad_off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 1)) + i;
1829  addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (bad_off * Vcb->superblock.sector_size);
1830 
1831  context->stripes[parity].rewrite = true;
1832 
1833  log_error(Vcb, addr, c->devices[parity]->devitem.dev_id, false, true, true);
1835  } else if (num_errors == 1) {
1836  uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (bad_off * Vcb->superblock.sector_size);
1837 
1838  if (RtlCheckBit(&context->is_tree, bad_off)) {
1839  tree_header* th;
1840 
1841  do_xor(&context->parity_scratch[i * Vcb->superblock.sector_size],
1842  &context->stripes[bad_stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
1843  Vcb->superblock.node_size);
1844 
1845  th = (tree_header*)&context->parity_scratch[i * Vcb->superblock.sector_size];
1846 
1847  if (check_tree_checksum(Vcb, th) && th->address == addr) {
1848  RtlCopyMemory(&context->stripes[bad_stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
1849  &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.node_size);
1850 
1851  context->stripes[bad_stripe].rewrite = true;
1852 
1853  RtlClearBits(&context->stripes[bad_stripe].error, i + 1, (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1);
1854 
1855  log_error(Vcb, addr, c->devices[bad_stripe]->devitem.dev_id, true, true, false);
1856  } else
1857  log_error(Vcb, addr, c->devices[bad_stripe]->devitem.dev_id, true, false, false);
1858  } else {
1860 
1861  do_xor(&context->parity_scratch[i * Vcb->superblock.sector_size],
1862  &context->stripes[bad_stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
1863  Vcb->superblock.sector_size);
1864 
1865  get_sector_csum(Vcb, &context->parity_scratch[i * Vcb->superblock.sector_size], hash);
1866 
1867  if (RtlCompareMemory(hash, (uint8_t*)context->csum + (Vcb->csum_size * bad_off), Vcb->csum_size) == Vcb->csum_size) {
1868  RtlCopyMemory(&context->stripes[bad_stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
1869  &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size);
1870 
1871  context->stripes[bad_stripe].rewrite = true;
1872 
1873  log_error(Vcb, addr, c->devices[bad_stripe]->devitem.dev_id, false, true, false);
1874  } else
1875  log_error(Vcb, addr, c->devices[bad_stripe]->devitem.dev_id, false, false, false);
1876  }
1877  } else {
1878  stripe = (parity + 1) % c->chunk_item->num_stripes;
1879  off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 1)) + i;
1880 
1881  while (stripe != parity) {
1882  if (RtlCheckBit(&context->alloc, off)) {
1883  if (RtlCheckBit(&context->stripes[stripe].error, i)) {
1884  uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size);
1885 
1886  log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, RtlCheckBit(&context->is_tree, off), false, false);
1887  }
1888  }
1889 
1890  off += sectors_per_stripe;
1891  stripe = (stripe + 1) % c->chunk_item->num_stripes;
1892  }
1893  }
1894  }
1895 }
1896 
1898  uint64_t num, uint16_t missing_devices) {
1899  ULONG sectors_per_stripe = (ULONG)(c->chunk_item->stripe_length / Vcb->superblock.sector_size), i, off;
1900  uint16_t stripe, parity1 = (bit_start + num + c->chunk_item->num_stripes - 2) % c->chunk_item->num_stripes;
1901  uint16_t parity2 = (parity1 + 1) % c->chunk_item->num_stripes;
1902  uint64_t stripeoff;
1903 
1904  stripe = (parity1 + 2) % c->chunk_item->num_stripes;
1905  off = (ULONG)(bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2);
1906  stripeoff = num * sectors_per_stripe;
1907 
1908  if (c->devices[parity1]->devobj)
1909  RtlCopyMemory(context->parity_scratch, &context->stripes[parity1].buf[num * c->chunk_item->stripe_length], (ULONG)c->chunk_item->stripe_length);
1910 
1911  if (c->devices[parity2]->devobj)
1912  RtlZeroMemory(context->parity_scratch2, (ULONG)c->chunk_item->stripe_length);
1913 
1914  while (stripe != parity1) {
1915  RtlClearAllBits(&context->stripes[stripe].error);
1916 
1917  for (i = 0; i < sectors_per_stripe; i++) {
1918  if (c->devices[stripe]->devobj && RtlCheckBit(&context->alloc, off)) {
1919  if (RtlCheckBit(&context->is_tree, off)) {
1920  tree_header* th = (tree_header*)&context->stripes[stripe].buf[stripeoff * Vcb->superblock.sector_size];
1921  uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size);
1922 
1923  if (!check_tree_checksum(Vcb, th) || th->address != addr) {
1924  RtlSetBits(&context->stripes[stripe].error, i, Vcb->superblock.node_size / Vcb->superblock.sector_size);
1926 
1927  if (missing_devices == 2)
1928  log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, true, false, false);
1929  }
1930 
1931  off += Vcb->superblock.node_size / Vcb->superblock.sector_size;
1932  stripeoff += Vcb->superblock.node_size / Vcb->superblock.sector_size;
1933  i += (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1;
1934 
1935  continue;
1936  } else if (RtlCheckBit(&context->has_csum, off)) {
1938 
1939  get_sector_csum(Vcb, context->stripes[stripe].buf + (stripeoff * Vcb->superblock.sector_size), hash);
1940 
1941  if (RtlCompareMemory(hash, (uint8_t*)context->csum + (Vcb->csum_size * off), Vcb->csum_size) != Vcb->csum_size) {
1942  uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size);
1943 
1944  RtlSetBit(&context->stripes[stripe].error, i);
1946 
1947  if (missing_devices == 2)
1948  log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, false, false, false);
1949  }
1950  }
1951  }
1952 
1953  off++;
1954  stripeoff++;
1955  }
1956 
1957  if (c->devices[parity1]->devobj)
1958  do_xor(context->parity_scratch, &context->stripes[stripe].buf[num * c->chunk_item->stripe_length], (uint32_t)c->chunk_item->stripe_length);
1959 
1960  stripe = (stripe + 1) % c->chunk_item->num_stripes;
1961  stripeoff = num * sectors_per_stripe;
1962  }
1963 
1964  RtlClearAllBits(&context->stripes[parity1].error);
1965 
1966  if (missing_devices == 0 || (missing_devices == 1 && !c->devices[parity2]->devobj)) {
1967  // check parity 1
1968 
1969  for (i = 0; i < sectors_per_stripe; i++) {
1970  ULONG o, j;
1971 
1972  o = i * Vcb->superblock.sector_size;
1973  for (j = 0; j < Vcb->superblock.sector_size; j++) { // FIXME - use SSE
1974  if (context->parity_scratch[o] != 0) {
1975  RtlSetBit(&context->stripes[parity1].error, i);
1976  break;
1977  }
1978  o++;
1979  }
1980  }
1981  }
1982 
1983  RtlClearAllBits(&context->stripes[parity2].error);
1984 
1985  if (missing_devices == 0 || (missing_devices == 1 && !c->devices[parity1]->devobj)) {
1986  // check parity 2
1987 
1988  stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1);
1989 
1990  while (stripe != parity2) {
1991  galois_double(context->parity_scratch2, (uint32_t)c->chunk_item->stripe_length);
1992  do_xor(context->parity_scratch2, &context->stripes[stripe].buf[num * c->chunk_item->stripe_length], (uint32_t)c->chunk_item->stripe_length);
1993 
1994  stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
1995  }
1996 
1997  for (i = 0; i < sectors_per_stripe; i++) {
1998  if (RtlCompareMemory(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
1999  &context->parity_scratch2[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size) != Vcb->superblock.sector_size)
2000  RtlSetBit(&context->stripes[parity2].error, i);
2001  }
2002  }
2003 
2004  if (missing_devices == 2)
2005  return;
2006 
2007  // log and fix errors
2008 
2009  for (i = 0; i < sectors_per_stripe; i++) {
2010  ULONG num_errors = 0;
2011  uint64_t bad_stripe1, bad_stripe2;
2012  ULONG bad_off1, bad_off2;
2013  bool alloc = false;
2014 
2015  stripe = (parity1 + 2) % c->chunk_item->num_stripes;
2016  off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2)) + i;
2017 
2018  while (stripe != parity1) {
2019  if (RtlCheckBit(&context->alloc, off)) {
2020  alloc = true;
2021 
2022  if (!c->devices[stripe]->devobj || RtlCheckBit(&context->stripes[stripe].error, i)) {
2023  if (num_errors == 0) {
2024  bad_stripe1 = stripe;
2025  bad_off1 = off;
2026  } else if (num_errors == 1) {
2027  bad_stripe2 = stripe;
2028  bad_off2 = off;
2029  }
2030  num_errors++;
2031  }
2032  }
2033 
2034  off += sectors_per_stripe;
2035  stripe = (stripe + 1) % c->chunk_item->num_stripes;
2036  }
2037 
2038  if (!alloc)
2039  continue;
2040 
2041  if (num_errors == 0 && !RtlCheckBit(&context->stripes[parity1].error, i) && !RtlCheckBit(&context->stripes[parity2].error, i)) // everything fine
2042  continue;
2043 
2044  if (num_errors == 0) { // parity error
2045  uint64_t addr;
2046 
2047  if (RtlCheckBit(&context->stripes[parity1].error, i)) {
2048  do_xor(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2049  &context->parity_scratch[i * Vcb->superblock.sector_size],
2050  Vcb->superblock.sector_size);
2051 
2052  bad_off1 = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2)) + i;
2053  addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off1 * Vcb->superblock.sector_size);
2054 
2055  context->stripes[parity1].rewrite = true;
2056 
2057  log_error(Vcb, addr, c->devices[parity1]->devitem.dev_id, false, true, true);
2059  }
2060 
2061  if (RtlCheckBit(&context->stripes[parity2].error, i)) {
2062  RtlCopyMemory(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2063  &context->parity_scratch2[i * Vcb->superblock.sector_size],
2064  Vcb->superblock.sector_size);
2065 
2066  bad_off1 = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2)) + i;
2067  addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off1 * Vcb->superblock.sector_size);
2068 
2069  context->stripes[parity2].rewrite = true;
2070 
2071  log_error(Vcb, addr, c->devices[parity2]->devitem.dev_id, false, true, true);
2073  }
2074  } else if (num_errors == 1) {
2075  uint32_t len;
2076  uint16_t stripe_num, bad_stripe_num;
2077  uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off1 * Vcb->superblock.sector_size);
2078  uint8_t* scratch;
2079 
2080  len = RtlCheckBit(&context->is_tree, bad_off1)? Vcb->superblock.node_size : Vcb->superblock.sector_size;
2081 
2083  if (!scratch) {
2084  ERR("out of memory\n");
2085  return;
2086  }
2087 
2088  RtlZeroMemory(scratch, len);
2089 
2090  do_xor(&context->parity_scratch[i * Vcb->superblock.sector_size],
2091  &context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len);
2092 
2093  stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1);
2094 
2095  if (c->devices[parity2]->devobj) {
2096  stripe_num = c->chunk_item->num_stripes - 3;
2097  while (stripe != parity2) {
2098  galois_double(scratch, len);
2099 
2100  if (stripe != bad_stripe1)
2101  do_xor(scratch, &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len);
2102  else
2103  bad_stripe_num = stripe_num;
2104 
2105  stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2106  stripe_num--;
2107  }
2108 
2109  do_xor(scratch, &context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len);
2110 
2111  if (bad_stripe_num != 0)
2112  galois_divpower(scratch, (uint8_t)bad_stripe_num, len);
2113  }
2114 
2115  if (RtlCheckBit(&context->is_tree, bad_off1)) {
2116  uint8_t hash1[MAX_HASH_SIZE];
2117  uint8_t hash2[MAX_HASH_SIZE];
2118  tree_header *th1 = NULL, *th2 = NULL;
2119 
2120  if (c->devices[parity1]->devobj) {
2121  th1 = (tree_header*)&context->parity_scratch[i * Vcb->superblock.sector_size];
2122  get_tree_checksum(Vcb, th1, hash1);
2123  }
2124 
2125  if (c->devices[parity2]->devobj) {
2126  th2 = (tree_header*)scratch;
2127  get_tree_checksum(Vcb, th2, hash2);
2128  }
2129 
2130  if ((c->devices[parity1]->devobj && RtlCompareMemory(hash1, th1, Vcb->csum_size) == Vcb->csum_size && th1->address == addr) ||
2131  (c->devices[parity2]->devobj && RtlCompareMemory(hash2, th2, Vcb->csum_size) == Vcb->csum_size && th2->address == addr)) {
2132  if (!c->devices[parity1]->devobj || RtlCompareMemory(hash1, th1, Vcb->csum_size) != Vcb->csum_size || th1->address != addr) {
2133  RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2134  scratch, Vcb->superblock.node_size);
2135 
2136  if (c->devices[parity1]->devobj) {
2137  // fix parity 1
2138 
2139  stripe = (parity1 + 2) % c->chunk_item->num_stripes;
2140 
2141  RtlCopyMemory(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2142  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2143  Vcb->superblock.node_size);
2144 
2145  stripe = (stripe + 1) % c->chunk_item->num_stripes;
2146 
2147  while (stripe != parity1) {
2148  do_xor(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2149  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2150  Vcb->superblock.node_size);
2151 
2152  stripe = (stripe + 1) % c->chunk_item->num_stripes;
2153  }
2154 
2155  context->stripes[parity1].rewrite = true;
2156 
2157  log_error(Vcb, addr, c->devices[parity1]->devitem.dev_id, false, true, true);
2159  }
2160  } else {
2161  RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2162  &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.node_size);
2163 
2164  if (!c->devices[parity2]->devobj || RtlCompareMemory(hash2, th2, Vcb->csum_size) != Vcb->csum_size || th2->address != addr) {
2165  // fix parity 2
2166  stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1);
2167 
2168  if (c->devices[parity2]->devobj) {
2169  RtlCopyMemory(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2170  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2171  Vcb->superblock.node_size);
2172 
2173  stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2174 
2175  while (stripe != parity2) {
2176  galois_double(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], Vcb->superblock.node_size);
2177 
2178  do_xor(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2179  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2180  Vcb->superblock.node_size);
2181 
2182  stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2183  }
2184 
2185  context->stripes[parity2].rewrite = true;
2186 
2187  log_error(Vcb, addr, c->devices[parity2]->devitem.dev_id, false, true, true);
2189  }
2190  }
2191  }
2192 
2193  context->stripes[bad_stripe1].rewrite = true;
2194 
2195  RtlClearBits(&context->stripes[bad_stripe1].error, i + 1, (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1);
2196 
2197  log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, true, true, false);
2198  } else
2199  log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, true, false, false);
2200  } else {
2201  uint8_t hash1[MAX_HASH_SIZE];
2202  uint8_t hash2[MAX_HASH_SIZE];
2203 
2204  if (c->devices[parity1]->devobj)
2205  get_sector_csum(Vcb, &context->parity_scratch[i * Vcb->superblock.sector_size], hash1);
2206 
2207  if (c->devices[parity2]->devobj)
2208  get_sector_csum(Vcb, scratch, hash2);
2209 
2210  if ((c->devices[parity1]->devobj && RtlCompareMemory(hash1, (uint8_t*)context->csum + (bad_off1 * Vcb->csum_size), Vcb->csum_size) == Vcb->csum_size) ||
2211  (c->devices[parity2]->devobj && RtlCompareMemory(hash2, (uint8_t*)context->csum + (bad_off1 * Vcb->csum_size), Vcb->csum_size) == Vcb->csum_size)) {
2212  if (c->devices[parity2]->devobj && RtlCompareMemory(hash2, (uint8_t*)context->csum + (bad_off1 * Vcb->csum_size), Vcb->csum_size) == Vcb->csum_size) {
2213  RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2214  scratch, Vcb->superblock.sector_size);
2215 
2216  if (c->devices[parity1]->devobj && RtlCompareMemory(hash1, (uint8_t*)context->csum + (bad_off1 * Vcb->csum_size), Vcb->csum_size) != Vcb->csum_size) {
2217  // fix parity 1
2218 
2219  stripe = (parity1 + 2) % c->chunk_item->num_stripes;
2220 
2221  RtlCopyMemory(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2222  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2223  Vcb->superblock.sector_size);
2224 
2225  stripe = (stripe + 1) % c->chunk_item->num_stripes;
2226 
2227  while (stripe != parity1) {
2228  do_xor(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2229  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2230  Vcb->superblock.sector_size);
2231 
2232  stripe = (stripe + 1) % c->chunk_item->num_stripes;
2233  }
2234 
2235  context->stripes[parity1].rewrite = true;
2236 
2237  log_error(Vcb, addr, c->devices[parity1]->devitem.dev_id, false, true, true);
2239  }
2240  } else {
2241  RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2242  &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size);
2243 
2244  if (c->devices[parity2]->devobj && RtlCompareMemory(hash2, (uint8_t*)context->csum + (bad_off1 * Vcb->csum_size), Vcb->csum_size) != Vcb->csum_size) {
2245  // fix parity 2
2246  stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1);
2247 
2248  RtlCopyMemory(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2249  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2250  Vcb->superblock.sector_size);
2251 
2252  stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2253 
2254  while (stripe != parity2) {
2255  galois_double(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], Vcb->superblock.sector_size);
2256 
2257  do_xor(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2258  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2259  Vcb->superblock.sector_size);
2260 
2261  stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2262  }
2263 
2264  context->stripes[parity2].rewrite = true;
2265 
2266  log_error(Vcb, addr, c->devices[parity2]->devitem.dev_id, false, true, true);
2268  }
2269  }
2270 
2271  context->stripes[bad_stripe1].rewrite = true;
2272 
2273  log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, false, true, false);
2274  } else
2275  log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, false, false, false);
2276  }
2277 
2278  ExFreePool(scratch);
2279  } else if (num_errors == 2 && missing_devices == 0) {
2280  uint16_t x, y, k;
2281  uint64_t addr;
2282  uint32_t len = (RtlCheckBit(&context->is_tree, bad_off1) || RtlCheckBit(&context->is_tree, bad_off2)) ? Vcb->superblock.node_size : Vcb->superblock.sector_size;
2283  uint8_t gyx, gx, denom, a, b, *p, *q, *pxy, *qxy;
2284  uint32_t j;
2285 
2286  stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1);
2287 
2288  // put qxy in parity_scratch
2289  // put pxy in parity_scratch2
2290 
2291  k = c->chunk_item->num_stripes - 3;
2292  if (stripe == bad_stripe1 || stripe == bad_stripe2) {
2293  RtlZeroMemory(&context->parity_scratch[i * Vcb->superblock.sector_size], len);
2294  RtlZeroMemory(&context->parity_scratch2[i * Vcb->superblock.sector_size], len);
2295 
2296  if (stripe == bad_stripe1)
2297  x = k;
2298  else
2299  y = k;
2300  } else {
2301  RtlCopyMemory(&context->parity_scratch[i * Vcb->superblock.sector_size],
2302  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len);
2303  RtlCopyMemory(&context->parity_scratch2[i * Vcb->superblock.sector_size],
2304  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len);
2305  }
2306 
2307  stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2308 
2309  k--;
2310  do {
2311  galois_double(&context->parity_scratch[i * Vcb->superblock.sector_size], len);
2312 
2313  if (stripe != bad_stripe1 && stripe != bad_stripe2) {
2314  do_xor(&context->parity_scratch[i * Vcb->superblock.sector_size],
2315  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len);
2316  do_xor(&context->parity_scratch2[i * Vcb->superblock.sector_size],
2317  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len);
2318  } else if (stripe == bad_stripe1)
2319  x = k;
2320  else if (stripe == bad_stripe2)
2321  y = k;
2322 
2323  stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2324  k--;
2325  } while (stripe != parity2);
2326 
2327  gyx = gpow2(y > x ? (y-x) : (255-x+y));
2328  gx = gpow2(255-x);
2329 
2330  denom = gdiv(1, gyx ^ 1);
2331  a = gmul(gyx, denom);
2332  b = gmul(gx, denom);
2333 
2334  p = &context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)];
2335  q = &context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)];
2336  pxy = &context->parity_scratch2[i * Vcb->superblock.sector_size];
2337  qxy = &context->parity_scratch[i * Vcb->superblock.sector_size];
2338 
2339  for (j = 0; j < len; j++) {
2340  *qxy = gmul(a, *p ^ *pxy) ^ gmul(b, *q ^ *qxy);
2341 
2342  p++;
2343  q++;
2344  pxy++;
2345  qxy++;
2346  }
2347 
2348  do_xor(&context->parity_scratch2[i * Vcb->superblock.sector_size], &context->parity_scratch[i * Vcb->superblock.sector_size], len);
2349  do_xor(&context->parity_scratch2[i * Vcb->superblock.sector_size], &context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)], len);
2350 
2351  addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off1 * Vcb->superblock.sector_size);
2352 
2353  if (RtlCheckBit(&context->is_tree, bad_off1)) {
2354  tree_header* th = (tree_header*)&context->parity_scratch[i * Vcb->superblock.sector_size];
2355 
2356  if (check_tree_checksum(Vcb, th) && th->address == addr) {
2357  RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2358  &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.node_size);
2359 
2360  context->stripes[bad_stripe1].rewrite = true;
2361 
2362  RtlClearBits(&context->stripes[bad_stripe1].error, i + 1, (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1);
2363 
2364  log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, true, true, false);
2365  } else
2366  log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, true, false, false);
2367  } else {
2368  if (check_sector_csum(Vcb, &context->parity_scratch[i * Vcb->superblock.sector_size], (uint8_t*)context->csum + (Vcb->csum_size * bad_off1))) {
2369  RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2370  &context->parity_scratch[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size);
2371 
2372  context->stripes[bad_stripe1].rewrite = true;
2373 
2374  log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, false, true, false);
2375  } else
2376  log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, false, false, false);
2377  }
2378 
2379  addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off2 * Vcb->superblock.sector_size);
2380 
2381  if (RtlCheckBit(&context->is_tree, bad_off2)) {
2382  tree_header* th = (tree_header*)&context->parity_scratch2[i * Vcb->superblock.sector_size];
2383 
2384  if (check_tree_checksum(Vcb, th) && th->address == addr) {
2385  RtlCopyMemory(&context->stripes[bad_stripe2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2386  &context->parity_scratch2[i * Vcb->superblock.sector_size], Vcb->superblock.node_size);
2387 
2388  context->stripes[bad_stripe2].rewrite = true;
2389 
2390  RtlClearBits(&context->stripes[bad_stripe2].error, i + 1, (Vcb->superblock.node_size / Vcb->superblock.sector_size) - 1);
2391 
2392  log_error(Vcb, addr, c->devices[bad_stripe2]->devitem.dev_id, true, true, false);
2393  } else
2394  log_error(Vcb, addr, c->devices[bad_stripe2]->devitem.dev_id, true, false, false);
2395  } else {
2396  if (check_sector_csum(Vcb, &context->parity_scratch2[i * Vcb->superblock.sector_size], (uint8_t*)context->csum + (Vcb->csum_size * bad_off2))) {
2397  RtlCopyMemory(&context->stripes[bad_stripe2].buf[(num * c->chunk_item->stripe_length) + (i * Vcb->superblock.sector_size)],
2398  &context->parity_scratch2[i * Vcb->superblock.sector_size], Vcb->superblock.sector_size);
2399 
2400  context->stripes[bad_stripe2].rewrite = true;
2401 
2402  log_error(Vcb, addr, c->devices[bad_stripe2]->devitem.dev_id, false, true, false);
2403  } else
2404  log_error(Vcb, addr, c->devices[bad_stripe2]->devitem.dev_id, false, false, false);
2405  }
2406  } else {
2407  stripe = (parity2 + 1) % c->chunk_item->num_stripes;
2408  off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2)) + i;
2409 
2410  while (stripe != parity1) {
2411  if (c->devices[stripe]->devobj && RtlCheckBit(&context->alloc, off)) {
2412  if (RtlCheckBit(&context->stripes[stripe].error, i)) {
2413  uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (off * Vcb->superblock.sector_size);
2414 
2415  log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, RtlCheckBit(&context->is_tree, off), false, false);
2416  }
2417  }
2418 
2419  off += sectors_per_stripe;
2420  stripe = (stripe + 1) % c->chunk_item->num_stripes;
2421  }
2422  }
2423  }
2424 }
2425 
2427  NTSTATUS Status;
2428  KEY searchkey;
2429  traverse_ptr tp;
2430  bool b;
2431  uint64_t run_start, run_end, full_stripe_len, stripe;
2432  uint32_t max_read, num_sectors;
2433  ULONG arrlen, *allocarr, *csumarr = NULL, *treearr, num_parity_stripes = c->chunk_item->type & BLOCK_FLAG_RAID6 ? 2 : 1;
2435  uint16_t i;
2436  CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
2437 
2438  TRACE("(%p, %p, %I64x, %I64x)\n", Vcb, c, stripe_start, stripe_end);
2439 
2440  full_stripe_len = (c->chunk_item->num_stripes - num_parity_stripes) * c->chunk_item->stripe_length;
2441  run_start = c->offset + (stripe_start * full_stripe_len);
2442  run_end = c->offset + ((stripe_end + 1) * full_stripe_len);
2443 
2444  searchkey.obj_id = run_start;
2445  searchkey.obj_type = TYPE_METADATA_ITEM;
2446  searchkey.offset = 0xffffffffffffffff;
2447 
2448  Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, NULL);
2449  if (!NT_SUCCESS(Status)) {
2450  ERR("find_item returned %08lx\n", Status);
2451  return Status;
2452  }
2453 
2454  num_sectors = (uint32_t)((stripe_end - stripe_start + 1) * full_stripe_len / Vcb->superblock.sector_size);
2455  arrlen = (ULONG)sector_align((num_sectors / 8) + 1, sizeof(ULONG));
2456 
2457  allocarr = ExAllocatePoolWithTag(PagedPool, arrlen, ALLOC_TAG);
2458  if (!allocarr) {
2459  ERR("out of memory\n");
2461  }
2462 
2463  treearr = ExAllocatePoolWithTag(PagedPool, arrlen, ALLOC_TAG);
2464  if (!treearr) {
2465  ERR("out of memory\n");
2466  ExFreePool(allocarr);
2468  }
2469 
2470  RtlInitializeBitMap(&context.alloc, allocarr, num_sectors);
2471  RtlClearAllBits(&context.alloc);
2472 
2473  RtlInitializeBitMap(&context.is_tree, treearr, num_sectors);
2474  RtlClearAllBits(&context.is_tree);
2475 
2476  context.parity_scratch = ExAllocatePoolWithTag(PagedPool, (ULONG)c->chunk_item->stripe_length, ALLOC_TAG);
2477  if (!context.parity_scratch) {
2478  ERR("out of memory\n");
2479  ExFreePool(allocarr);
2480  ExFreePool(treearr);
2482  }
2483 
2484  if (c->chunk_item->type & BLOCK_FLAG_DATA) {
2485  csumarr = ExAllocatePoolWithTag(PagedPool, arrlen, ALLOC_TAG);
2486  if (!csumarr) {
2487  ERR("out of memory\n");
2488  ExFreePool(allocarr);
2489  ExFreePool(treearr);
2490  ExFreePool(context.parity_scratch);
2492  }
2493 
2494  RtlInitializeBitMap(&context.has_csum, csumarr, num_sectors);
2495  RtlClearAllBits(&context.has_csum);
2496 
2497  context.csum = ExAllocatePoolWithTag(PagedPool, num_sectors * Vcb->csum_size, ALLOC_TAG);
2498  if (!context.csum) {
2499  ERR("out of memory\n");
2500  ExFreePool(allocarr);
2501  ExFreePool(treearr);
2502  ExFreePool(context.parity_scratch);
2503  ExFreePool(csumarr);
2505  }
2506  }
2507 
2508  if (c->chunk_item->type & BLOCK_FLAG_RAID6) {
2509  context.parity_scratch2 = ExAllocatePoolWithTag(PagedPool, (ULONG)c->chunk_item->stripe_length, ALLOC_TAG);
2510  if (!context.parity_scratch2) {
2511  ERR("out of memory\n");
2512  ExFreePool(allocarr);
2513  ExFreePool(treearr);
2514  ExFreePool(context.parity_scratch);
2515 
2516  if (c->chunk_item->type & BLOCK_FLAG_DATA) {
2517  ExFreePool(csumarr);
2518  ExFreePool(context.csum);
2519  }
2520 
2522  }
2523  }
2524 
2525  do {
2526  traverse_ptr next_tp;
2527 
2528  if (tp.item->key.obj_id >= run_end)
2529  break;
2530 
2532  uint64_t size = tp.item->key.obj_type == TYPE_METADATA_ITEM ? Vcb->superblock.node_size : tp.item->key.offset;
2533 
2534  if (tp.item->key.obj_id + size > run_start) {
2535  uint64_t extent_start = max(run_start, tp.item->key.obj_id);
2536  uint64_t extent_end = min(tp.item->key.obj_id + size, run_end);
2537  bool extent_is_tree = false;
2538 
2539  RtlSetBits(&context.alloc, (ULONG)((extent_start - run_start) / Vcb->superblock.sector_size), (ULONG)((extent_end - extent_start) / Vcb->superblock.sector_size));
2540 
2542  extent_is_tree = true;
2543  else {
2544  EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data;
2545 
2546  if (tp.item->size < sizeof(EXTENT_ITEM)) {
2547  ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
2549  goto end;
2550  }
2551 
2552  if (ei->flags & EXTENT_ITEM_TREE_BLOCK)
2553  extent_is_tree = true;
2554  }
2555 
2556  if (extent_is_tree)
2557  RtlSetBits(&context.is_tree, (ULONG)((extent_start - run_start) / Vcb->superblock.sector_size), (ULONG)((extent_end - extent_start) / Vcb->superblock.sector_size));
2558  else if (c->chunk_item->type & BLOCK_FLAG_DATA) {
2559  traverse_ptr tp2;
2560  bool b2;
2561 
2562  searchkey.obj_id = EXTENT_CSUM_ID;
2563  searchkey.obj_type = TYPE_EXTENT_CSUM;
2564  searchkey.offset = extent_start;
2565 
2566  Status = find_item(Vcb, Vcb->checksum_root, &tp2, &searchkey, false, NULL);
2567  if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) {
2568  ERR("find_item returned %08lx\n", Status);
2569  goto end;
2570  }
2571 
2572  do {
2573  traverse_ptr next_tp2;
2574 
2575  if (tp2.item->key.offset >= extent_end)
2576  break;
2577 
2578  if (tp2.item->key.offset >= extent_start) {
2579  uint64_t csum_start = max(extent_start, tp2.item->key.offset);
2580  uint64_t csum_end = min(extent_end, tp2.item->key.offset + (tp2.item->size * Vcb->superblock.sector_size / Vcb->csum_size));
2581 
2582  RtlSetBits(&context.has_csum, (ULONG)((csum_start - run_start) / Vcb->superblock.sector_size), (ULONG)((csum_end - csum_start) / Vcb->superblock.sector_size));
2583 
2584  RtlCopyMemory((uint8_t*)context.csum + ((csum_start - run_start) * Vcb->csum_size / Vcb->superblock.sector_size),
2585  tp2.item->data + ((csum_start - tp2.item->key.offset) * Vcb->csum_size / Vcb->superblock.sector_size),
2586  (ULONG)((csum_end - csum_start) * Vcb->csum_size / Vcb->superblock.sector_size));
2587  }
2588 
2589  b2 = find_next_item(Vcb, &tp2, &next_tp2, false, NULL);
2590 
2591  if (b2)
2592  tp2 = next_tp2;
2593  } while (b2);
2594  }
2595  }
2596  }
2597 
2598  b = find_next_item(Vcb, &tp, &next_tp, false, NULL);
2599 
2600  if (b)
2601  tp = next_tp;
2602  } while (b);
2603 
2604  context.stripes = ExAllocatePoolWithTag(PagedPool, sizeof(scrub_context_raid56_stripe) * c->chunk_item->num_stripes, ALLOC_TAG);
2605  if (!context.stripes) {
2606  ERR("out of memory\n");
2608  goto end;
2609  }
2610 
2611  max_read = (uint32_t)min(1048576 / c->chunk_item->stripe_length, stripe_end - stripe_start + 1); // only process 1 MB of data at a time
2612 
2613  for (i = 0; i < c->chunk_item->num_stripes; i++) {
2614  context.stripes[i].buf = ExAllocatePoolWithTag(PagedPool, (ULONG)(max_read * c->chunk_item->stripe_length), ALLOC_TAG);
2615  if (!context.stripes[i].buf) {
2616  uint64_t j;
2617 
2618  ERR("out of memory\n");
2619 
2620  for (j = 0; j < i; j++) {
2621  ExFreePool(context.stripes[j].buf);
2622  }
2623  ExFreePool(context.stripes);
2624 
2626  goto end;
2627  }
2628 
2629  context.stripes[i].errorarr = ExAllocatePoolWithTag(PagedPool, (ULONG)sector_align(((c->chunk_item->stripe_length / Vcb->superblock.sector_size) / 8) + 1, sizeof(ULONG)), ALLOC_TAG);
2630  if (!context.stripes[i].errorarr) {
2631  uint64_t j;
2632 
2633  ERR("out of memory\n");
2634 
2635  ExFreePool(context.stripes[i].buf);
2636 
2637  for (j = 0; j < i; j++) {
2638  ExFreePool(context.stripes[j].buf);
2639  }
2640  ExFreePool(context.stripes);
2641 
2643  goto end;
2644  }
2645 
2646  RtlInitializeBitMap(&context.stripes[i].error, context.stripes[i].errorarr, (ULONG)(c->chunk_item->stripe_length / Vcb->superblock.sector_size));
2647 
2648  context.stripes[i].context = &context;
2649  context.stripes[i].rewrite = false;
2650  }
2651 
2652  stripe = stripe_start;
2653 
2655 
2656  chunk_lock_range(Vcb, c, run_start, run_end - run_start);
2657 
2658  do {
2659  ULONG read_stripes;
2660  uint16_t missing_devices = 0;
2661  bool need_wait = false;
2662 
2663  if (max_read < stripe_end + 1 - stripe)
2664  read_stripes = max_read;
2665  else
2666  read_stripes = (ULONG)(stripe_end + 1 - stripe);
2667 
2668  context.stripes_left = c->chunk_item->num_stripes;
2669 
2670  // read megabyte by megabyte
2671  for (i = 0; i < c->chunk_item->num_stripes; i++) {
2672  if (c->devices[i]->devobj) {
2674 
2675  context.stripes[i].Irp = IoAllocateIrp(c->devices[i]->devobj->StackSize, false);
2676 
2677  if (!context.stripes[i].Irp) {
2678  ERR("IoAllocateIrp failed\n");
2680  goto end3;
2681  }
2682 
2683  context.stripes[i].Irp->MdlAddress = NULL;
2684 
2685  IrpSp = IoGetNextIrpStackLocation(context.stripes[i].Irp);
2687  IrpSp->FileObject = c->devices[i]->fileobj;
2688 
2689  if (c->devices[i]->devobj->Flags & DO_BUFFERED_IO) {
2690  context.stripes[i].Irp->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(read_stripes * c->chunk_item->stripe_length), ALLOC_TAG);
2691  if (!context.stripes[i].Irp->AssociatedIrp.SystemBuffer) {
2692  ERR("out of memory\n");
2694  goto end3;
2695  }
2696 
2698 
2699  context.stripes[i].Irp->UserBuffer = context.stripes[i].buf;
2700  } else if (c->devices[i]->devobj->Flags & DO_DIRECT_IO) {
2701  context.stripes[i].Irp->MdlAddress = IoAllocateMdl(context.stripes[i].buf, (ULONG)(read_stripes * c->chunk_item->stripe_length), false, false, NULL);
2702  if (!context.stripes[i].Irp->MdlAddress) {
2703  ERR("IoAllocateMdl failed\n");
2705  goto end3;
2706  }
2707 
2709 
2710  _SEH2_TRY {
2711  MmProbeAndLockPages(context.stripes[i].Irp->MdlAddress, KernelMode, IoWriteAccess);
2714  } _SEH2_END;
2715 
2716  if (!NT_SUCCESS(Status)) {
2717  ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
2718  IoFreeMdl(context.stripes[i].Irp->MdlAddress);
2719  goto end3;
2720  }
2721  } else
2722  context.stripes[i].Irp->UserBuffer = context.stripes[i].buf;
2723 
2724  context.stripes[i].offset = stripe * c->chunk_item->stripe_length;
2725 
2726  IrpSp->Parameters.Read.Length = (ULONG)(read_stripes * c->chunk_item->stripe_length);
2727  IrpSp->Parameters.Read.ByteOffset.QuadPart = cis[i].offset + context.stripes[i].offset;
2728 
2729  context.stripes[i].Irp->UserIosb = &context.stripes[i].iosb;
2730  context.stripes[i].missing = false;
2731 
2732  IoSetCompletionRoutine(context.stripes[i].Irp, scrub_read_completion_raid56, &context.stripes[i], true, true, true);
2733 
2734  Vcb->scrub.data_scrubbed += read_stripes * c->chunk_item->stripe_length;
2735  need_wait = true;
2736  } else {
2737  context.stripes[i].Irp = NULL;
2738  context.stripes[i].missing = true;
2739  missing_devices++;
2740  InterlockedDecrement(&context.stripes_left);
2741  }
2742  }
2743 
2744  if (c->chunk_item->type & BLOCK_FLAG_RAID5 && missing_devices > 1) {
2745  ERR("too many missing devices (%u, maximum 1)\n", missing_devices);
2747  goto end3;
2748  } else if (c->chunk_item->type & BLOCK_FLAG_RAID6 && missing_devices > 2) {
2749  ERR("too many missing devices (%u, maximum 2)\n", missing_devices);
2751  goto end3;
2752  }
2753 
2754  if (need_wait) {
2756 
2757  for (i = 0; i < c->chunk_item->num_stripes; i++) {
2758  if (c->devices[i]->devobj)
2759  IoCallDriver(c->devices[i]->devobj, context.stripes[i].Irp);
2760  }
2761 
2763  }
2764 
2765  // return an error if any of the stripes returned an error
2766  for (i = 0; i < c->chunk_item->num_stripes; i++) {
2767  if (!context.stripes[i].missing && !NT_SUCCESS(context.stripes[i].iosb.Status)) {
2768  Status = context.stripes[i].iosb.Status;
2770  goto end3;
2771  }
2772  }
2773 
2774  if (c->chunk_item->type & BLOCK_FLAG_RAID6) {
2775  for (i = 0; i < read_stripes; i++) {
2776  scrub_raid6_stripe(Vcb, c, &context, stripe_start, stripe, i, missing_devices);
2777  }
2778  } else {
2779  for (i = 0; i < read_stripes; i++) {
2780  scrub_raid5_stripe(Vcb, c, &context, stripe_start, stripe, i, missing_devices);
2781  }
2782  }
2783  stripe += read_stripes;
2784 
2785 end3:
2786  for (i = 0; i < c->chunk_item->num_stripes; i++) {
2787  if (context.stripes[i].Irp) {
2788  if (c->devices[i]->devobj->Flags & DO_DIRECT_IO && context.stripes[i].Irp->MdlAddress) {
2789  MmUnlockPages(context.stripes[i].Irp->MdlAddress);
2790  IoFreeMdl(context.stripes[i].Irp->MdlAddress);
2791  }
2792  IoFreeIrp(context.stripes[i].Irp);
2793  context.stripes[i].Irp = NULL;
2794 
2795  if (context.stripes[i].rewrite) {
2796  Status = write_data_phys(c->devices[i]->devobj, c->devices[i]->fileobj, cis[i].offset + context.stripes[i].offset,
2797  context.stripes[i].buf, (uint32_t)(read_stripes * c->chunk_item->stripe_length));
2798 
2799  if (!NT_SUCCESS(Status)) {
2800  ERR("write_data_phys returned %08lx\n", Status);
2802  goto end2;
2803  }
2804  }
2805  }
2806  }
2807 
2808  if (!NT_SUCCESS(Status))
2809  break;
2810  } while (stripe < stripe_end);
2811 
2812 end2:
2813  chunk_unlock_range(Vcb, c, run_start, run_end - run_start);
2814 
2815  for (i = 0; i < c->chunk_item->num_stripes; i++) {
2816  ExFreePool(context.stripes[i].buf);
2817  ExFreePool(context.stripes[i].errorarr);
2818  }
2819  ExFreePool(context.stripes);
2820 
2821 end:
2822  ExFreePool(treearr);
2823  ExFreePool(allocarr);
2824  ExFreePool(context.parity_scratch);
2825 
2826  if (c->chunk_item->type & BLOCK_FLAG_RAID6)
2827  ExFreePool(context.parity_scratch2);
2828 
2829  if (c->chunk_item->type & BLOCK_FLAG_DATA) {
2830  ExFreePool(csumarr);
2831  ExFreePool(context.csum);
2832  }
2833 
2834  return Status;
2835 }
2836 
2838  NTSTATUS Status;
2839  KEY searchkey;
2840  traverse_ptr tp;
2841  bool b;
2842  uint64_t full_stripe_len, stripe, stripe_start, stripe_end, total_data = 0;
2843  ULONG num_extents = 0, num_parity_stripes = c->chunk_item->type & BLOCK_FLAG_RAID6 ? 2 : 1;
2844 
2845  full_stripe_len = (c->chunk_item->num_stripes - num_parity_stripes) * c->chunk_item->stripe_length;
2846  stripe = (*offset - c->offset) / full_stripe_len;
2847 
2848  *offset = c->offset + (stripe * full_stripe_len);
2849 
2850  searchkey.obj_id = *offset;
2851  searchkey.obj_type = TYPE_METADATA_ITEM;
2852  searchkey.offset = 0xffffffffffffffff;
2853 
2854  Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, NULL);
2855  if (!NT_SUCCESS(Status)) {
2856  ERR("find_item returned %08lx\n", Status);
2857  return Status;
2858  }
2859 
2860  *changed = false;
2861 
2862  do {
2863  traverse_ptr next_tp;
2864 
2865  if (tp.item->key.obj_id >= c->offset + c->chunk_item->size)
2866  break;
2867 
2869  uint64_t size = tp.item->key.obj_type == TYPE_METADATA_ITEM ? Vcb->superblock.node_size : tp.item->key.offset;
2870 
2871  TRACE("%I64x\n", tp.item->key.obj_id);
2872 
2874  ERR("extent %I64x has size less than sector_size (%I64x < %x)\n", tp.item->key.obj_id, size, Vcb->superblock.sector_size);
2875  return STATUS_INTERNAL_ERROR;
2876  }
2877 
2878  stripe = (tp.item->key.obj_id - c->offset) / full_stripe_len;
2879 
2880  if (*changed) {
2881  if (stripe > stripe_end + 1) {
2882  Status = scrub_chunk_raid56_stripe_run(Vcb, c, stripe_start, stripe_end);
2883  if (!NT_SUCCESS(Status)) {
2884  ERR("scrub_chunk_raid56_stripe_run returned %08lx\n", Status);
2885  return Status;
2886  }
2887 
2888  stripe_start = stripe;
2889  }
2890  } else
2891  stripe_start = stripe;
2892 
2893  stripe_end = (tp.item->key.obj_id + size - 1 - c->offset) / full_stripe_len;
2894 
2895  *changed = true;
2896 
2897  total_data += size;
2898  num_extents++;
2899 
2900  // only do so much at a time
2901  if (num_extents >= 64 || total_data >= 0x8000000) // 128 MB
2902  break;
2903  }
2904 
2905  b = find_next_item(Vcb, &tp, &next_tp, false, NULL);
2906 
2907  if (b)
2908  tp = next_tp;
2909  } while (b);
2910 
2911  if (*changed) {
2912  Status = scrub_chunk_raid56_stripe_run(Vcb, c, stripe_start, stripe_end);
2913  if (!NT_SUCCESS(Status)) {
2914  ERR("scrub_chunk_raid56_stripe_run returned %08lx\n", Status);
2915  return Status;
2916  }
2917 
2918  *offset = c->offset + ((stripe_end + 1) * full_stripe_len);
2919  }
2920 
2921  return STATUS_SUCCESS;
2922 }
2923 
2925  NTSTATUS Status;
2926  KEY searchkey;
2927  traverse_ptr tp;
2928  bool b = false, tree_run = false;
2929  ULONG type, num_extents = 0;
2930  uint64_t total_data = 0, tree_run_start, tree_run_end;
2931 
2932  TRACE("chunk %I64x\n", c->offset);
2933 
2934  ExAcquireResourceSharedLite(&Vcb->tree_lock, true);
2935 
2936  if (c->chunk_item->type & BLOCK_FLAG_DUPLICATE)
2938  else if (c->chunk_item->type & BLOCK_FLAG_RAID0)
2940  else if (c->chunk_item->type & BLOCK_FLAG_RAID1)
2942  else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
2944  else if (c->chunk_item->type & BLOCK_FLAG_RAID5) {
2945  Status = scrub_chunk_raid56(Vcb, c, offset, changed);
2946  goto end;
2947  } else if (c->chunk_item->type & BLOCK_FLAG_RAID6) {
2948  Status = scrub_chunk_raid56(Vcb, c, offset, changed);
2949  goto end;
2950  } else if (c->chunk_item->type & BLOCK_FLAG_RAID1C3)
2952  else if (c->chunk_item->type & BLOCK_FLAG_RAID1C4)
2954  else // SINGLE
2956 
2957  searchkey.obj_id = *offset;
2958  searchkey.obj_type = TYPE_METADATA_ITEM;
2959  searchkey.offset = 0xffffffffffffffff;
2960 
2961  Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, NULL);
2962  if (!NT_SUCCESS(Status)) {
2963  ERR("error - find_item returned %08lx\n", Status);
2964  goto end;
2965  }
2966 
2967  do {
2968  traverse_ptr next_tp;
2969 
2970  if (tp.item->key.obj_id >= c->offset + c->chunk_item->size)
2971  break;
2972 
2974  uint64_t size = tp.item->key.obj_type == TYPE_METADATA_ITEM ? Vcb->superblock.node_size : tp.item->key.offset;
2975  bool is_tree;
2976  void* csum = NULL;
2977  RTL_BITMAP bmp;
2978  ULONG* bmparr = NULL, bmplen;
2979 
2980  TRACE("%I64x\n", tp.item->key.obj_id);
2981 
2982  is_tree = false;
2983 
2985  is_tree = true;
2986  else {
2987  EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data;
2988 
2989  if (tp.item->size < sizeof(EXTENT_ITEM)) {
2990  ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
2992  goto end;
2993  }
2994 
2995  if (ei->flags & EXTENT_ITEM_TREE_BLOCK)
2996  is_tree = true;
2997  }
2998 
3000  ERR("extent %I64x has size less than sector_size (%I64x < %x)\n", tp.item->key.obj_id, size, Vcb->superblock.sector_size);
3002  goto end;
3003  }
3004 
3005  // load csum
3006  if (!is_tree) {
3007  traverse_ptr tp2;
3008 
3009  csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(Vcb->csum_size * size / Vcb->superblock.sector_size), ALLOC_TAG);
3010  if (!csum) {
3011  ERR("out of memory\n");
3013  goto end;
3014  }
3015 
3016  bmplen = (ULONG)(size / Vcb->superblock.sector_size);
3017 
3018  bmparr = ExAllocatePoolWithTag(PagedPool, (ULONG)(sector_align((bmplen >> 3) + 1, sizeof(ULONG))), ALLOC_TAG);
3019  if (!bmparr) {
3020  ERR("out of memory\n");
3021  ExFreePool(csum);
3023  goto end;
3024  }
3025 
3026  RtlInitializeBitMap(&bmp, bmparr, bmplen);
3027  RtlSetAllBits(&bmp); // 1 = no csum, 0 = csum
3028 
3029  searchkey.obj_id = EXTENT_CSUM_ID;
3030  searchkey.obj_type = TYPE_EXTENT_CSUM;
3031  searchkey.offset = tp.item->key.obj_id;
3032 
3033  Status = find_item(Vcb, Vcb->checksum_root, &tp2, &searchkey, false, NULL);
3034  if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) {
3035  ERR("find_item returned %08lx\n", Status);
3036  ExFreePool(csum);
3037  ExFreePool(bmparr);
3038  goto end;
3039  }
3040 
3041  if (Status != STATUS_NOT_FOUND) {
3042  do {
3043  traverse_ptr next_tp2;
3044 
3045  if (tp2.item->key.obj_type == TYPE_EXTENT_CSUM) {
3046  if (tp2.item->key.offset >= tp.item->key.obj_id + size)
3047  break;
3048  else if (tp2.item->size >= Vcb->csum_size && tp2.item->key.offset + (tp2.item->size * Vcb->superblock.sector_size / Vcb->csum_size) >= tp.item->key.obj_id) {
3049  uint64_t cs = max(tp.item->key.obj_id, tp2.item->key.offset);
3050  uint64_t ce = min(tp.item->key.obj_id + size, tp2.item->key.offset + (tp2.item->size * Vcb->superblock.sector_size / Vcb->csum_size));
3051 
3052  RtlCopyMemory((uint8_t*)csum + ((cs - tp.item->key.obj_id) * Vcb->csum_size / Vcb->superblock.sector_size),
3053  tp2.item->data + ((cs - tp2.item->key.offset) * Vcb->csum_size / Vcb->superblock.sector_size),
3054  (ULONG)((ce - cs) * Vcb->csum_size / Vcb->superblock.sector_size));
3055 
3056  RtlClearBits(&bmp, (ULONG)((cs - tp.item->key.obj_id) / Vcb->superblock.sector_size), (ULONG)((ce - cs) / Vcb->superblock.sector_size));
3057 
3058  if (ce == tp.item->key.obj_id + size)
3059  break;
3060  }
3061  }
3062 
3063  if (find_next_item(Vcb, &tp2, &next_tp2, false, NULL))
3064  tp2 = next_tp2;
3065  else
3066  break;
3067  } while (true);
3068  }
3069  }
3070 
3071  if (tree_run) {
3072  if (!is_tree || tp.item->key.obj_id > tree_run_end) {
3073  Status = scrub_extent(Vcb, c, type, tree_run_start, (uint32_t)(tree_run_end - tree_run_start), NULL);
3074  if (!NT_SUCCESS(Status)) {
3075  ERR("scrub_extent returned %08lx\n", Status);
3076  goto end;
3077  }
3078 
3079  if (!is_tree)
3080  tree_run = false;
3081  else {
3082  tree_run_start = tp.item->key.obj_id;
3083  tree_run_end = tp.item->key.obj_id + Vcb->superblock.node_size;
3084  }
3085  } else
3086  tree_run_end = tp.item->key.obj_id + Vcb->superblock.node_size;
3087  } else if (is_tree) {
3088  tree_run = true;
3089  tree_run_start = tp.item->key.obj_id;
3090  tree_run_end = tp.item->key.obj_id + Vcb->superblock.node_size;
3091  }
3092 
3093  if (!is_tree) {
3094  Status = scrub_data_extent(Vcb, c, tp.item->key.obj_id, type, csum, &bmp, bmplen);
3095  if (!NT_SUCCESS(Status)) {
3096  ERR("scrub_data_extent returned %08lx\n", Status);
3097  ExFreePool(csum);
3098  ExFreePool(bmparr);
3099  goto end;
3100  }
3101 
3102  ExFreePool(csum);
3103  ExFreePool(bmparr);
3104  }
3105 
3106  *offset = tp.item->key.obj_id + size;
3107  *changed = true;
3108 
3109  total_data += size;
3110  num_extents++;
3111 
3112  // only do so much at a time
3113  if (num_extents >= 64 || total_data >= 0x8000000) // 128 MB
3114  break;
3115  }
3116 
3117  b = find_next_item(Vcb, &tp, &next_tp, false, NULL);
3118 
3119  if (b)
3120  tp = next_tp;
3121  } while (b);
3122 
3123  if (tree_run) {
3124  Status = scrub_extent(Vcb, c, type, tree_run_start, (uint32_t)(tree_run_end - tree_run_start), NULL);
3125  if (!NT_SUCCESS(Status)) {
3126  ERR("scrub_extent returned %08lx\n", Status);
3127  goto end;
3128  }
3129  }
3130 
3132 
3133 end:
3134  ExReleaseResourceLite(&Vcb->tree_lock);
3135 
3136  return Status;
3137 }
3138 
3139 _Function_class_(KSTART_ROUTINE)
3140 static void __stdcall scrub_thread(void* context) {
3142  LIST_ENTRY chunks, *le;
3143  NTSTATUS Status;
3145 
3146  KeInitializeEvent(&Vcb->scrub.finished, NotificationEvent, false);
3147 
3148  InitializeListHead(&chunks);
3149 
3150  ExAcquireResourceExclusiveLite(&Vcb->tree_lock, true);
3151 
3152  if (Vcb->need_write && !Vcb->readonly)
3153  Status = do_write(Vcb, NULL);
3154  else
3156 
3157  free_trees(Vcb);
3158 
3159  if (!NT_SUCCESS(Status)) {
3160  ExReleaseResourceLite(&Vcb->tree_lock);
3161  ERR("do_write returned %08lx\n", Status);
3162  Vcb->scrub.error = Status;
3163  goto end;
3164  }
3165 
3166  ExConvertExclusiveToSharedLite(&Vcb->tree_lock);
3167 
3168  ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, true);
3169 
3170  KeQuerySystemTime(&Vcb->scrub.start_time);
3171  Vcb->scrub.finish_time.QuadPart = 0;
3172  Vcb->scrub.resume_time.QuadPart = Vcb->scrub.start_time.QuadPart;
3173  Vcb->scrub.duration.QuadPart = 0;
3174  Vcb->scrub.total_chunks = 0;
3175  Vcb->scrub.chunks_left = 0;
3176  Vcb->scrub.data_scrubbed = 0;
3177  Vcb->scrub.num_errors = 0;
3178 
3179  while (!IsListEmpty(&Vcb->scrub.errors)) {
3181  ExFreePool(err);
3182  }
3183 
3184  ExAcquireResourceSharedLite(&Vcb->chunk_lock, true);
3185 
3186  le = Vcb->chunks.Flink;
3187  while (le != &Vcb->chunks) {
3189 
3191 
3192  if (!c->readonly) {
3193  InsertTailList(&chunks, &c->list_entry_balance);
3194  Vcb->scrub.total_chunks++;
3195  Vcb->scrub.chunks_left++;
3196  }
3197 
3199 
3200  le = le->Flink;
3201  }
3202 
3203  ExReleaseResourceLite(&Vcb->chunk_lock);
3204 
3205  ExReleaseResource(&Vcb->scrub.stats_lock);
3206 
3207  ExReleaseResourceLite(&Vcb->tree_lock);
3208 
3209  while (!IsListEmpty(&chunks)) {
3210  chunk* c = CONTAINING_RECORD(RemoveHeadList(&chunks), chunk, list_entry_balance);
3211  uint64_t offset = c->offset;
3212  bool changed;
3213 
3214  c->reloc = true;
3215 
3216  KeWaitForSingleObject(&Vcb->scrub.event, Executive, KernelMode, false, NULL);
3217 
3218  if (!Vcb->scrub.stopping) {
3219  do {
3220  changed = false;
3221 
3222  Status = scrub_chunk(Vcb, c, &offset, &changed);
3223  if (!NT_SUCCESS(Status)) {
3224  ERR("scrub_chunk returned %08lx\n", Status);
3225  Vcb->scrub.stopping = true;
3226  Vcb->scrub.error = Status;
3227  break;
3228  }
3229 
3230  if (offset == c->offset + c->chunk_item->size || Vcb->scrub.stopping)
3231  break;
3232 
3233  KeWaitForSingleObject(&Vcb->scrub.event, Executive, KernelMode, false, NULL);
3234  } while (changed);
3235  }
3236 
3237  ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, true);
3238 
3239  if (!Vcb->scrub.stopping)
3240  Vcb->scrub.chunks_left--;
3241 
3242  if (IsListEmpty(&chunks))
3243  KeQuerySystemTime(&Vcb->scrub.finish_time);
3244 
3245  ExReleaseResource(&Vcb->scrub.stats_lock);
3246 
3247  c->reloc = false;
3248  c->list_entry_balance.Flink = NULL;
3249  }
3250 
3252  Vcb->scrub.duration.QuadPart += time.QuadPart - Vcb->scrub.resume_time.QuadPart;
3253 
3254 end:
3255  ZwClose(Vcb->scrub.thread);
3256  Vcb->scrub.thread = NULL;
3257 
3258  KeSetEvent(&Vcb->scrub.finished, 0, false);
3259 }
3260 
3262  NTSTATUS Status;
3263  OBJECT_ATTRIBUTES oa;
3264 
3265  if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode))
3267 
3268  if (Vcb->locked) {
3269  WARN("cannot start scrub while locked\n");
3270  return STATUS_DEVICE_NOT_READY;
3271  }
3272 
3273  if (Vcb->balance.thread) {
3274  WARN("cannot start scrub while balance running\n");
3275  return STATUS_DEVICE_NOT_READY;
3276  }
3277 
3278  if (Vcb->scrub.thread) {
3279  WARN("scrub already running\n");
3280  return STATUS_DEVICE_NOT_READY;
3281  }
3282 
3283  if (Vcb->readonly)
3285 
3286  Vcb->scrub.stopping = false;
3287  Vcb->scrub.paused = false;
3288  Vcb->scrub.error = STATUS_SUCCESS;
3289  KeInitializeEvent(&Vcb->scrub.event, NotificationEvent, !Vcb->scrub.paused);
3290 
3292 
3293  Status = PsCreateSystemThread(&Vcb->scrub.thread, 0, &oa, NULL, NULL, scrub_thread, Vcb);
3294  if (!NT_SUCCESS(Status)) {
3295  ERR("PsCreateSystemThread returned %08lx\n", Status);
3296  return Status;
3297  }
3298 
3299  return STATUS_SUCCESS;
3300 }
3301 
3304  ULONG len;
3305  NTSTATUS Status;
3306  LIST_ENTRY* le;
3307  btrfs_scrub_error* bse = NULL;
3308 
3309  if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode))
3311 
3312  if (length < offsetof(btrfs_query_scrub, errors))
3313  return STATUS_BUFFER_TOO_SMALL;
3314 
3315  ExAcquireResourceSharedLite(&Vcb->scrub.stats_lock, true);
3316 
3317  if (Vcb->scrub.thread && Vcb->scrub.chunks_left > 0)
3318  bqs->status = Vcb->scrub.paused ? BTRFS_SCRUB_PAUSED : BTRFS_SCRUB_RUNNING;
3319  else
3320  bqs->status = BTRFS_SCRUB_STOPPED;
3321 
3322  bqs->start_time.QuadPart = Vcb->scrub.start_time.QuadPart;
3323  bqs->finish_time.QuadPart = Vcb->scrub.finish_time.QuadPart;
3324  bqs->chunks_left = Vcb->scrub.chunks_left;
3325  bqs->total_chunks = Vcb->scrub.total_chunks;
3326  bqs->data_scrubbed = Vcb->scrub.data_scrubbed;
3327 
3328  bqs->duration = Vcb->scrub.duration.QuadPart;
3329 
3330  if (bqs->status == BTRFS_SCRUB_RUNNING) {
3332 
3334  bqs->duration += time.QuadPart - Vcb->scrub.resume_time.QuadPart;
3335  }
3336 
3337  bqs->error = Vcb->scrub.error;
3338 
3339  bqs->num_errors = Vcb->scrub.num_errors;
3340 
3341  len = length - offsetof(btrfs_query_scrub, errors);
3342 
3343  le = Vcb->scrub.errors.Flink;
3344  while (le != &Vcb->scrub.errors) {
3346  ULONG errlen;
3347 
3348  if (err->is_metadata)
3349  errlen = offsetof(btrfs_scrub_error, metadata.firstitem) + sizeof(KEY);
3350  else
3351  errlen = offsetof(btrfs_scrub_error, data.filename) + err->data.filename_length;
3352 
3353  if (len < errlen) {
3355  goto end;
3356  }
3357 
3358  if (!bse)
3359  bse = &bqs->errors;
3360  else {
3361  ULONG lastlen;
3362 
3363  if (bse->is_metadata)
3364  lastlen = offsetof(btrfs_scrub_error, metadata.firstitem) + sizeof(KEY);
3365  else
3366  lastlen = offsetof(btrfs_scrub_error, data.filename) + bse->data.filename_length;
3367 
3368  bse->next_entry = lastlen;
3369  bse = (btrfs_scrub_error*)(((uint8_t*)bse) + lastlen);
3370  }
3371 
3372  bse->next_entry = 0;
3373  bse->address = err->address;
3374  bse->device = err->device;
3375  bse->recovered = err->recovered;
3376  bse->is_metadata = err->is_metadata;
3377  bse->parity = err->parity;
3378 
3379  if (err->is_metadata) {
3380  bse->metadata.root = err->metadata.root;
3381  bse->metadata.level = err->metadata.level;
3382  bse->metadata.firstitem = err->metadata.firstitem;
3383  } else {
3384  bse->data.subvol = err->data.subvol;
3385  bse->data.offset = err->data.offset;
3386  bse->data.filename_length = err->data.filename_length;
3387  RtlCopyMemory(bse->data.filename, err->data.filename, err->data.filename_length);
3388  }
3389 
3390  len -= errlen;
3391  le = le->Flink;
3392  }
3393 
3395 
3396 end:
3397  ExReleaseResourceLite(&Vcb->scrub.stats_lock);
3398 
3399  return Status;
3400 }
3401 
3404 
3405  if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode))
3407 
3408  if (!Vcb->scrub.thread)
3409  return STATUS_DEVICE_NOT_READY;
3410 
3411  if (Vcb->scrub.paused)
3412  return STATUS_DEVICE_NOT_READY;
3413 
3414  Vcb->scrub.paused = true;
3415  KeClearEvent(&Vcb->scrub.event);
3416 
3418  Vcb->scrub.duration.QuadPart += time.QuadPart - Vcb->scrub.resume_time.QuadPart;
3419 
3420  return STATUS_SUCCESS;
3421 }
3422 
3424  if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode))