ReactOS  0.4.15-dev-5488-ge316d61
scrub.c
Go to the documentation of this file.
1 /* Copyright (c) Mark Harmstone 2017
2  *
3  * This file is part of WinBtrfs.
4  *
5  * WinBtrfs is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public Licence as published by
7  * the Free Software Foundation, either version 3 of the Licence, or
8  * (at your option) any later version.
9  *
10  * WinBtrfs is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser General Public Licence for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public Licence
16  * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
17 
18 #include "btrfs_drv.h"
19 
20 #define SCRUB_UNIT 0x100000 // 1 MB
21 
22 struct _scrub_context;
23 
24 typedef struct {
31  bool csum_error;
32  void* bad_csums;
34 
35 typedef struct _scrub_context {
40 
41 typedef struct {
45 } path_part;
46 
48  LIST_ENTRY *le, parts;
49  root* r = NULL;
50  KEY searchkey;
52  uint64_t dir;
53  bool orig_subvol = true, not_in_tree = false;
57  ULONG utf16len;
58 
59  le = Vcb->roots.Flink;
60  while (le != &Vcb->roots) {
62 
63  if (r2->id == subvol) {
64  r = r2;
65  break;
66  }
67 
68  le = le->Flink;
69  }
70 
71  if (!r) {
72  ERR("could not find subvol %I64x\n", subvol);
73  return;
74  }
75 
77 
78  dir = inode;
79 
80  while (true) {
81  if (dir == r->root_item.objid) {
82  if (r == Vcb->root_fileref->fcb->subvol)
83  break;
84 
85  searchkey.obj_id = r->id;
86  searchkey.obj_type = TYPE_ROOT_BACKREF;
87  searchkey.offset = 0xffffffffffffffff;
88 
89  Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, NULL);
90  if (!NT_SUCCESS(Status)) {
91  ERR("find_item returned %08lx\n", Status);
92  goto end;
93  }
94 
95  if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
96  ROOT_REF* rr = (ROOT_REF*)tp.item->data;
97  path_part* pp;
98 
99  if (tp.item->size < sizeof(ROOT_REF)) {
100  ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(ROOT_REF));
101  goto end;
102  }
103 
104  if (tp.item->size < offsetof(ROOT_REF, name[0]) + rr->n) {
105  ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
106  tp.item->size, offsetof(ROOT_REF, name[0]) + rr->n);
107  goto end;
108  }
109 
111  if (!pp) {
112  ERR("out of memory\n");
113  goto end;
114  }
115 
116  pp->name.Buffer = rr->name;
117  pp->name.Length = pp->name.MaximumLength = rr->n;
118  pp->orig_subvol = false;
119 
120  InsertTailList(&parts, &pp->list_entry);
121 
122  r = NULL;
123 
124  le = Vcb->roots.Flink;
125  while (le != &Vcb->roots) {
127 
128  if (r2->id == tp.item->key.offset) {
129  r = r2;
130  break;
131  }
132 
133  le = le->Flink;
134  }
135 
136  if (!r) {
137  ERR("could not find subvol %I64x\n", tp.item->key.offset);
138  goto end;
139  }
140 
141  dir = rr->dir;
142  orig_subvol = false;
143  } else {
144  not_in_tree = true;
145  break;
146  }
147  } else {
148  searchkey.obj_id = dir;
149  searchkey.obj_type = TYPE_INODE_EXTREF;
150  searchkey.offset = 0xffffffffffffffff;
151 
152  Status = find_item(Vcb, r, &tp, &searchkey, false, NULL);
153  if (!NT_SUCCESS(Status)) {
154  ERR("find_item returned %08lx\n", Status);
155  goto end;
156  }
157 
158  if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == TYPE_INODE_REF) {
159  INODE_REF* ir = (INODE_REF*)tp.item->data;
160  path_part* pp;
161 
162  if (tp.item->size < sizeof(INODE_REF)) {
163  ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(INODE_REF));
164  goto end;
165  }
166 
167  if (tp.item->size < offsetof(INODE_REF, name[0]) + ir->n) {
168  ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
169  tp.item->size, offsetof(INODE_REF, name[0]) + ir->n);
170  goto end;
171  }
172 
174  if (!pp) {
175  ERR("out of memory\n");
176  goto end;
177  }
178 
179  pp->name.Buffer = ir->name;
180  pp->name.Length = pp->name.MaximumLength = ir->n;
181  pp->orig_subvol = orig_subvol;
182 
183  InsertTailList(&parts, &pp->list_entry);
184 
185  if (dir == tp.item->key.offset)
186  break;
187 
188  dir = tp.item->key.offset;
189  } else if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == TYPE_INODE_EXTREF) {
191  path_part* pp;
192 
193  if (tp.item->size < sizeof(INODE_EXTREF)) {
194  ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
195  tp.item->size, sizeof(INODE_EXTREF));
196  goto end;
197  }
198 
199  if (tp.item->size < offsetof(INODE_EXTREF, name[0]) + ier->n) {
200  ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
201  tp.item->size, offsetof(INODE_EXTREF, name[0]) + ier->n);
202  goto end;
203  }
204 
206  if (!pp) {
207  ERR("out of memory\n");
208  goto end;
209  }
210 
211  pp->name.Buffer = ier->name;
212  pp->name.Length = pp->name.MaximumLength = ier->n;
213  pp->orig_subvol = orig_subvol;
214 
215  InsertTailList(&parts, &pp->list_entry);
216 
217  if (dir == ier->dir)
218  break;
219 
220  dir = ier->dir;
221  } else {
222  ERR("could not find INODE_REF for inode %I64x in subvol %I64x\n", dir, r->id);
223  goto end;
224  }
225  }
226  }
227 
228  fn.MaximumLength = 0;
229 
230  if (not_in_tree) {
231  le = parts.Blink;
232  while (le != &parts) {
234  LIST_ENTRY* le2 = le->Blink;
235 
236  if (pp->orig_subvol)
237  break;
238 
240  ExFreePool(pp);
241 
242  le = le2;
243  }
244  }
245 
246  le = parts.Flink;
247  while (le != &parts) {
249 
250  fn.MaximumLength += pp->name.Length + 1;
251 
252  le = le->Flink;
253  }
254 
255  fn.Buffer = ExAllocatePoolWithTag(PagedPool, fn.MaximumLength, ALLOC_TAG);
256  if (!fn.Buffer) {
257  ERR("out of memory\n");
258  goto end;
259  }
260 
261  fn.Length = 0;
262 
263  le = parts.Blink;
264  while (le != &parts) {
266 
267  fn.Buffer[fn.Length] = '\\';
268  fn.Length++;
269 
270  RtlCopyMemory(&fn.Buffer[fn.Length], pp->name.Buffer, pp->name.Length);
271  fn.Length += pp->name.Length;
272 
273  le = le->Blink;
274  }
275 
276  if (not_in_tree)
277  ERR("subvol %I64x, %.*s, offset %I64x\n", subvol, fn.Length, fn.Buffer, offset);
278  else
279  ERR("%.*s, offset %I64x\n", fn.Length, fn.Buffer, offset);
280 
281  Status = utf8_to_utf16(NULL, 0, &utf16len, fn.Buffer, fn.Length);
282  if (!NT_SUCCESS(Status)) {
283  ERR("utf8_to_utf16 1 returned %08lx\n", Status);
284  ExFreePool(fn.Buffer);
285  goto end;
286  }
287 
289  if (!err) {
290  ERR("out of memory\n");
291  ExFreePool(fn.Buffer);
292  goto end;
293  }
294 
295  err->address = addr;
296  err->device = devid;
297  err->recovered = false;
298  err->is_metadata = false;
299  err->parity = false;
300 
301  err->data.subvol = not_in_tree ? subvol : 0;
302  err->data.offset = offset;
303  err->data.filename_length = (uint16_t)utf16len;
304 
305  Status = utf8_to_utf16(err->data.filename, utf16len, &utf16len, fn.Buffer, fn.Length);
306  if (!NT_SUCCESS(Status)) {
307  ERR("utf8_to_utf16 2 returned %08lx\n", Status);
308  ExFreePool(fn.Buffer);
309  ExFreePool(err);
310  goto end;
311  }
312 
313  ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, true);
314 
315  Vcb->scrub.num_errors++;
316  InsertTailList(&Vcb->scrub.errors, &err->list_entry);
317 
318  ExReleaseResourceLite(&Vcb->scrub.stats_lock);
319 
320  ExFreePool(fn.Buffer);
321 
322 end:
323  while (!IsListEmpty(&parts)) {
325 
326  ExFreePool(pp);
327  }
328 }
329 
331  tree_header* tree;
333  leaf_node* ln;
334  ULONG i;
335 
336  tree = ExAllocatePoolWithTag(PagedPool, Vcb->superblock.node_size, ALLOC_TAG);
337  if (!tree) {
338  ERR("out of memory\n");
339  return;
340  }
341 
342  Status = read_data(Vcb, treeaddr, Vcb->superblock.node_size, NULL, true, (uint8_t*)tree, NULL, NULL, NULL, 0, false, NormalPagePriority);
343  if (!NT_SUCCESS(Status)) {
344  ERR("read_data returned %08lx\n", Status);
345  goto end;
346  }
347 
348  if (tree->level != 0) {
349  ERR("tree level was %x, expected 0\n", tree->level);
350  goto end;
351  }
352 
353  ln = (leaf_node*)&tree[1];
354 
355  for (i = 0; i < tree->num_items; i++) {
356  if (ln[i].key.obj_type == TYPE_EXTENT_DATA && ln[i].size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
357  EXTENT_DATA* ed = (EXTENT_DATA*)((uint8_t*)tree + sizeof(tree_header) + ln[i].offset);
358  EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
359 
360  if (ed->type == EXTENT_TYPE_REGULAR && ed2->size != 0 && ed2->address == addr)
361  log_file_checksum_error(Vcb, addr, devid, tree->tree_id, ln[i].key.obj_id, ln[i].key.offset + addr - extent);
362  }
363  }
364 
365 end:
366  ExFreePool(tree);
367 }
368 
370  scrub_error* err;
371 
373  if (!err) {
374  ERR("out of memory\n");
375  return;
376  }
377 
378  err->address = addr;
379  err->device = devid;
380  err->recovered = false;
381  err->is_metadata = true;
382  err->parity = false;
383 
384  err->metadata.root = root;
385  err->metadata.level = level;
386 
387  if (firstitem) {
388  ERR("root %I64x, level %u, first item (%I64x,%x,%I64x)\n", root, level, firstitem->obj_id,
389  firstitem->obj_type, firstitem->offset);
390 
391  err->metadata.firstitem = *firstitem;
392  } else {
393  ERR("root %I64x, level %u\n", root, level);
394 
395  RtlZeroMemory(&err->metadata.firstitem, sizeof(KEY));
396  }
397 
398  ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, true);
399 
400  Vcb->scrub.num_errors++;
401  InsertTailList(&Vcb->scrub.errors, &err->list_entry);
402 
403  ExReleaseResourceLite(&Vcb->scrub.stats_lock);
404 }
405 
407  tree_header* tree;
409  internal_node* in;
410  ULONG i;
411 
412  tree = ExAllocatePoolWithTag(PagedPool, Vcb->superblock.node_size, ALLOC_TAG);
413  if (!tree) {
414  ERR("out of memory\n");
415  return;
416  }
417 
418  Status = read_data(Vcb, offset, Vcb->superblock.node_size, NULL, true, (uint8_t*)tree, NULL, NULL, NULL, 0, false, NormalPagePriority);
419  if (!NT_SUCCESS(Status)) {
420  ERR("read_data returned %08lx\n", Status);
421  goto end;
422  }
423 
424  if (tree->level == 0) {
425  ERR("tree level was 0\n");
426  goto end;
427  }
428 
429  in = (internal_node*)&tree[1];
430 
431  for (i = 0; i < tree->num_items; i++) {
432  if (in[i].address == address) {
433  log_tree_checksum_error(Vcb, address, devid, tree->tree_id, tree->level - 1, &in[i].key);
434  break;
435  }
436  }
437 
438 end:
439  ExFreePool(tree);
440 }
441 
443  KEY searchkey;
446  EXTENT_ITEM* ei;
447  EXTENT_ITEM2* ei2 = NULL;
448  uint8_t* ptr;
449  ULONG len;
450  uint64_t rc;
451 
452  // FIXME - still log even if rest of this function fails
453 
454  searchkey.obj_id = address;
455  searchkey.obj_type = TYPE_METADATA_ITEM;
456  searchkey.offset = 0xffffffffffffffff;
457 
458  Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, NULL);
459  if (!NT_SUCCESS(Status)) {
460  ERR("find_item returned %08lx\n", Status);
461  return;
462  }
463 
465  tp.item->key.obj_id >= address + Vcb->superblock.sector_size ||
467  (tp.item->key.obj_type == TYPE_METADATA_ITEM && tp.item->key.obj_id + Vcb->superblock.node_size <= address)
468  )
469  return;
470 
471  if (tp.item->size < sizeof(EXTENT_ITEM)) {
472  ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
473  return;
474  }
475 
476  ei = (EXTENT_ITEM*)tp.item->data;
477  ptr = (uint8_t*)&ei[1];
478  len = tp.item->size - sizeof(EXTENT_ITEM);
479 
481  if (tp.item->size < sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2)) {
482  ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
483  tp.item->size, sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2));
484  return;
485  }
486 
487  ei2 = (EXTENT_ITEM2*)ptr;
488 
489  ptr += sizeof(EXTENT_ITEM2);
490  len -= sizeof(EXTENT_ITEM2);
491  }
492 
493  rc = 0;
494 
495  while (len > 0) {
496  uint8_t type = *ptr;
497 
498  ptr++;
499  len--;
500 
501  if (type == TYPE_TREE_BLOCK_REF) {
502  TREE_BLOCK_REF* tbr;
503 
504  if (len < sizeof(TREE_BLOCK_REF)) {
505  ERR("TREE_BLOCK_REF takes up %Iu bytes, but only %lu remaining\n", sizeof(TREE_BLOCK_REF), len);
506  break;
507  }
508 
509  tbr = (TREE_BLOCK_REF*)ptr;
510 
511  log_tree_checksum_error(Vcb, address, devid, tbr->offset, ei2 ? ei2->level : (uint8_t)tp.item->key.offset, ei2 ? &ei2->firstitem : NULL);
512 
513  rc++;
514 
515  ptr += sizeof(TREE_BLOCK_REF);
516  len -= sizeof(TREE_BLOCK_REF);
517  } else if (type == TYPE_EXTENT_DATA_REF) {
518  EXTENT_DATA_REF* edr;
519 
520  if (len < sizeof(EXTENT_DATA_REF)) {
521  ERR("EXTENT_DATA_REF takes up %Iu bytes, but only %lu remaining\n", sizeof(EXTENT_DATA_REF), len);
522  break;
523  }
524 
525  edr = (EXTENT_DATA_REF*)ptr;
526 
527  log_file_checksum_error(Vcb, address, devid, edr->root, edr->objid, edr->offset + address - tp.item->key.obj_id);
528 
529  rc += edr->count;
530 
531  ptr += sizeof(EXTENT_DATA_REF);
532  len -= sizeof(EXTENT_DATA_REF);
533  } else if (type == TYPE_SHARED_BLOCK_REF) {
534  SHARED_BLOCK_REF* sbr;
535 
536  if (len < sizeof(SHARED_BLOCK_REF)) {
537  ERR("SHARED_BLOCK_REF takes up %Iu bytes, but only %lu remaining\n", sizeof(SHARED_BLOCK_REF), len);
538  break;
539  }
540 
541  sbr = (SHARED_BLOCK_REF*)ptr;
542 
544 
545  rc++;
546 
547  ptr += sizeof(SHARED_BLOCK_REF);
548  len -= sizeof(SHARED_BLOCK_REF);
549  } else if (type == TYPE_SHARED_DATA_REF) {
550  SHARED_DATA_REF* sdr;
551 
552  if (len < sizeof(SHARED_DATA_REF)) {
553  ERR("SHARED_DATA_REF takes up %Iu bytes, but only %lu remaining\n", sizeof(SHARED_DATA_REF), len);
554  break;
555  }
556 
557  sdr = (SHARED_DATA_REF*)ptr;
558 
560 
561  rc += sdr->count;
562 
563  ptr += sizeof(SHARED_DATA_REF);
564  len -= sizeof(SHARED_DATA_REF);
565  } else {
566  ERR("unknown extent type %x\n", type);
567  break;
568  }
569  }
570 
571  if (rc < ei->refcount) {
572  do {
573  traverse_ptr next_tp;
574 
575  if (find_next_item(Vcb, &tp, &next_tp, false, NULL))
576  tp = next_tp;
577  else
578  break;
579 
580  if (tp.item->key.obj_id == address) {
582  log_tree_checksum_error(Vcb, address, devid, tp.item->key.offset, ei2 ? ei2->level : (uint8_t)tp.item->key.offset, ei2 ? &ei2->firstitem : NULL);
583  else if (tp.item->key.obj_type == TYPE_EXTENT_DATA_REF) {
584  EXTENT_DATA_REF* edr;
585 
586  if (tp.item->size < sizeof(EXTENT_DATA_REF)) {
587  ERR("(%I64x,%x,%I64x) was %u bytes, expected %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
588  tp.item->size, sizeof(EXTENT_DATA_REF));
589  break;
590  }
591 
592  edr = (EXTENT_DATA_REF*)tp.item->data;
593 
594  log_file_checksum_error(Vcb, address, devid, edr->root, edr->objid, edr->offset + address - tp.item->key.obj_id);
595  } else if (tp.item->key.obj_type == TYPE_SHARED_BLOCK_REF)
597  else if (tp.item->key.obj_type == TYPE_SHARED_DATA_REF)
599  } else
600  break;
601  } while (true);
602  }
603 }
604 
605 static void log_error(device_extension* Vcb, uint64_t addr, uint64_t devid, bool metadata, bool recoverable, bool parity) {
606  if (recoverable) {
607  scrub_error* err;
608 
609  if (parity) {
610  ERR("recovering from parity error at %I64x on device %I64x\n", addr, devid);
611  } else {
612  if (metadata)
613  ERR("recovering from metadata checksum error at %I64x on device %I64x\n", addr, devid);
614  else
615  ERR("recovering from data checksum error at %I64x on device %I64x\n", addr, devid);
616  }
617 
619  if (!err) {
620  ERR("out of memory\n");
621  return;
622  }
623 
624  err->address = addr;
625  err->device = devid;
626  err->recovered = true;
627  err->is_metadata = metadata;
628  err->parity = parity;
629 
630  if (metadata)
631  RtlZeroMemory(&err->metadata, sizeof(err->metadata));
632  else
633  RtlZeroMemory(&err->data, sizeof(err->data));
634 
635  ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, true);
636 
637  Vcb->scrub.num_errors++;
638  InsertTailList(&Vcb->scrub.errors, &err->list_entry);
639 
640  ExReleaseResourceLite(&Vcb->scrub.stats_lock);
641  } else {
642  if (metadata)
643  ERR("unrecoverable metadata checksum error at %I64x\n", addr);
644  else
645  ERR("unrecoverable data checksum error at %I64x\n", addr);
646 
648  }
649 }
650 
651 _Function_class_(IO_COMPLETION_ROUTINE)
652 static NTSTATUS __stdcall scrub_read_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
653  scrub_context_stripe* stripe = conptr;
655  ULONG left = InterlockedDecrement(&context->stripes_left);
656 
658 
659  stripe->iosb = Irp->IoStatus;
660 
661  if (left == 0)
662  KeSetEvent(&context->Event, 0, false);
663 
665 }
666 
669  bool csum_error = false;
670  ULONG i;
671  CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
672  uint16_t present_devices = 0;
673 
674  if (csum) {
675  ULONG good_stripe = 0xffffffff;
676 
677  for (i = 0; i < c->chunk_item->num_stripes; i++) {
678  if (c->devices[i]->devobj) {
679  present_devices++;
680 
681  // if first stripe is okay, we only need to check that the others are identical to it
682  if (good_stripe != 0xffffffff) {
683  if (RtlCompareMemory(context->stripes[i].buf, context->stripes[good_stripe].buf,
684  context->stripes[good_stripe].length) != context->stripes[i].length) {
685  context->stripes[i].csum_error = true;
686  csum_error = true;
688  }
689  } else {
690  Status = check_csum(Vcb, context->stripes[i].buf, context->stripes[i].length >> Vcb->sector_shift, csum);
691  if (Status == STATUS_CRC_ERROR) {
692  context->stripes[i].csum_error = true;
693  csum_error = true;
695  } else if (!NT_SUCCESS(Status)) {
696  ERR("check_csum returned %08lx\n", Status);
697  return Status;
698  } else
699  good_stripe = i;
700  }
701  }
702  }
703  } else {
704  ULONG good_stripe = 0xffffffff;
705 
706  for (i = 0; i < c->chunk_item->num_stripes; i++) {
707  ULONG j;
708 
709  if (c->devices[i]->devobj) {
710  // if first stripe is okay, we only need to check that the others are identical to it
711  if (good_stripe != 0xffffffff) {
712  if (RtlCompareMemory(context->stripes[i].buf, context->stripes[good_stripe].buf,
713  context->stripes[good_stripe].length) != context->stripes[i].length) {
714  context->stripes[i].csum_error = true;
715  csum_error = true;
717  }
718  } else {
719  for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) {
720  tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size];
721 
722  if (!check_tree_checksum(Vcb, th) || th->address != offset + UInt32x32To64(j, Vcb->superblock.node_size)) {
723  context->stripes[i].csum_error = true;
724  csum_error = true;
726  }
727  }
728 
729  if (!context->stripes[i].csum_error)
730  good_stripe = i;
731  }
732  }
733  }
734  }
735 
736  if (!csum_error)
737  return STATUS_SUCCESS;
738 
739  // handle checksum error
740 
741  for (i = 0; i < c->chunk_item->num_stripes; i++) {
742  if (context->stripes[i].csum_error) {
743  if (csum) {
744  context->stripes[i].bad_csums = ExAllocatePoolWithTag(PagedPool, (context->stripes[i].length * Vcb->csum_size) >> Vcb->sector_shift, ALLOC_TAG);
745  if (!context->stripes[i].bad_csums) {
746  ERR("out of memory\n");
748  }
749 
750  do_calc_job(Vcb, context->stripes[i].buf, context->stripes[i].length >> Vcb->sector_shift, context->stripes[i].bad_csums);
751  } else {
752  ULONG j;
753 
754  context->stripes[i].bad_csums = ExAllocatePoolWithTag(PagedPool, (context->stripes[i].length * Vcb->csum_size) >> Vcb->sector_shift, ALLOC_TAG);
755  if (!context->stripes[i].bad_csums) {
756  ERR("out of memory\n");
758  }
759 
760  for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) {
761  tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size];
762 
763  get_tree_checksum(Vcb, th, (uint8_t*)context->stripes[i].bad_csums + (Vcb->csum_size * j));
764  }
765  }
766  }
767  }
768 
769  if (present_devices > 1) {
770  ULONG good_stripe = 0xffffffff;
771 
772  for (i = 0; i < c->chunk_item->num_stripes; i++) {
773  if (c->devices[i]->devobj && !context->stripes[i].csum_error) {
774  good_stripe = i;
775  break;
776  }
777  }
778 
779  if (good_stripe != 0xffffffff) {
780  // log
781 
782  for (i = 0; i < c->chunk_item->num_stripes; i++) {
783  if (context->stripes[i].csum_error) {
784  ULONG j;
785 
786  if (csum) {
787  for (j = 0; j < context->stripes[i].length >> Vcb->sector_shift; j++) {
788  if (RtlCompareMemory((uint8_t*)context->stripes[i].bad_csums + (j * Vcb->csum_size), (uint8_t*)csum + (j + Vcb->csum_size), Vcb->csum_size) != Vcb->csum_size) {
789  uint64_t addr = offset + ((uint64_t)j << Vcb->sector_shift);
790 
791  log_error(Vcb, addr, c->devices[i]->devitem.dev_id, false, true, false);
793  }
794  }
795  } else {
796  for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) {
797  tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size];
798  uint64_t addr = offset + UInt32x32To64(j, Vcb->superblock.node_size);
799 
800  if (RtlCompareMemory((uint8_t*)context->stripes[i].bad_csums + (j * Vcb->csum_size), th, Vcb->csum_size) != Vcb->csum_size || th->address != addr) {
801  log_error(Vcb, addr, c->devices[i]->devitem.dev_id, true, true, false);
803  }
804  }
805  }
806  }
807  }
808 
809  // write good data over bad
810 
811  for (i = 0; i < c->chunk_item->num_stripes; i++) {
812  if (context->stripes[i].csum_error && !c->devices[i]->readonly) {
813  Status = write_data_phys(c->devices[i]->devobj, c->devices[i]->fileobj, cis[i].offset + offset - c->offset,
814  context->stripes[good_stripe].buf, context->stripes[i].length);
815 
816  if (!NT_SUCCESS(Status)) {
817  ERR("write_data_phys returned %08lx\n", Status);
819  return Status;
820  }
821  }
822  }
823 
824  return STATUS_SUCCESS;
825  }
826 
827  // if csum errors on all stripes, check sector by sector
828 
829  for (i = 0; i < c->chunk_item->num_stripes; i++) {
830  if (c->devices[i]->devobj) {
831  if (csum) {
832  for (ULONG j = 0; j < context->stripes[i].length >> Vcb->sector_shift; j++) {
833  if (RtlCompareMemory((uint8_t*)context->stripes[i].bad_csums + (j * Vcb->csum_size), (uint8_t*)csum + (j * Vcb->csum_size), Vcb->csum_size) != Vcb->csum_size) {
834  ULONG k;
835  uint64_t addr = offset + ((uint64_t)j << Vcb->sector_shift);
836  bool recovered = false;
837 
838  for (k = 0; k < c->chunk_item->num_stripes; k++) {
839  if (i != k && c->devices[k]->devobj &&
840  RtlCompareMemory((uint8_t*)context->stripes[k].bad_csums + (j * Vcb->csum_size),
841  (uint8_t*)csum + (j * Vcb->csum_size), Vcb->csum_size) == Vcb->csum_size) {
842  log_error(Vcb, addr, c->devices[i]->devitem.dev_id, false, true, false);
844 
845  RtlCopyMemory(context->stripes[i].buf + (j << Vcb->sector_shift),
846  context->stripes[k].buf + (j << Vcb->sector_shift), Vcb->superblock.sector_size);
847 
848  recovered = true;
849  break;
850  }
851  }
852 
853  if (!recovered) {
854  log_error(Vcb, addr, c->devices[i]->devitem.dev_id, false, false, false);
856  }
857  }
858  }
859  } else {
860  for (ULONG j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) {
861  tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size];
862  uint64_t addr = offset + UInt32x32To64(j, Vcb->superblock.node_size);
863 
864  if (RtlCompareMemory((uint8_t*)context->stripes[i].bad_csums + (j * Vcb->csum_size), th, Vcb->csum_size) != Vcb->csum_size || th->address != addr) {
865  ULONG k;
866  bool recovered = false;
867 
868  for (k = 0; k < c->chunk_item->num_stripes; k++) {
869  if (i != k && c->devices[k]->devobj) {
870  tree_header* th2 = (tree_header*)&context->stripes[k].buf[j * Vcb->superblock.node_size];
871 
872  if (RtlCompareMemory((uint8_t*)context->stripes[k].bad_csums + (j * Vcb->csum_size), th2, Vcb->csum_size) == Vcb->csum_size && th2->address == addr) {
873  log_error(Vcb, addr, c->devices[i]->devitem.dev_id, true, true, false);
875 
876  RtlCopyMemory(th, th2, Vcb->superblock.node_size);
877 
878  recovered = true;
879  break;
880  }
881  }
882  }
883 
884  if (!recovered) {
885  log_error(Vcb, addr, c->devices[i]->devitem.dev_id, true, false, false);
887  }
888  }
889  }
890  }
891  }
892  }
893 
894  // write good data over bad
895 
896  for (i = 0; i < c->chunk_item->num_stripes; i++) {
897  if (c->devices[i]->devobj && !c->devices[i]->readonly) {
898  Status = write_data_phys(c->devices[i]->devobj, c->devices[i]->fileobj, cis[i].offset + offset - c->offset,
899  context->stripes[i].buf, context->stripes[i].length);
900  if (!NT_SUCCESS(Status)) {
901  ERR("write_data_phys returned %08lx\n", Status);
903  return Status;
904  }
905  }
906  }
907 
908  return STATUS_SUCCESS;
909  }
910 
911  for (i = 0; i < c->chunk_item->num_stripes; i++) {
912  if (c->devices[i]->devobj) {
913  ULONG j;
914 
915  if (csum) {
916  for (j = 0; j < context->stripes[i].length >> Vcb->sector_shift; j++) {
917  if (RtlCompareMemory((uint8_t*)context->stripes[i].bad_csums + (j * Vcb->csum_size), (uint8_t*)csum + (j + Vcb->csum_size), Vcb->csum_size) != Vcb->csum_size) {
918  uint64_t addr = offset + ((uint64_t)j << Vcb->sector_shift);
919 
920  log_error(Vcb, addr, c->devices[i]->devitem.dev_id, false, false, false);
921  }
922  }
923  } else {
924  for (j = 0; j < context->stripes[i].length / Vcb->superblock.node_size; j++) {
925  tree_header* th = (tree_header*)&context->stripes[i].buf[j * Vcb->superblock.node_size];
926  uint64_t addr = offset + UInt32x32To64(j, Vcb->superblock.node_size);
927 
928  if (RtlCompareMemory((uint8_t*)context->stripes[i].bad_csums + (j * Vcb->csum_size), th, Vcb->csum_size) != Vcb->csum_size || th->address != addr)
929  log_error(Vcb, addr, c->devices[i]->devitem.dev_id, true, false, false);
930  }
931  }
932  }
933  }
934 
935  return STATUS_SUCCESS;
936 }
937 
939  ULONG j;
941  uint32_t pos, *stripeoff;
942 
943  pos = 0;
944  stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * c->chunk_item->num_stripes, ALLOC_TAG);
945  if (!stripeoff) {
946  ERR("out of memory\n");
948  }
949 
950  RtlZeroMemory(stripeoff, sizeof(uint32_t) * c->chunk_item->num_stripes);
951 
952  stripe = startoffstripe;
953  while (pos < length) {
954  uint32_t readlen;
955 
956  if (pos == 0)
957  readlen = (uint32_t)min(context->stripes[stripe].length, c->chunk_item->stripe_length - (context->stripes[stripe].start % c->chunk_item->stripe_length));
958  else
959  readlen = min(length - pos, (uint32_t)c->chunk_item->stripe_length);
960 
961  if (csum) {
962  for (j = 0; j < readlen; j += Vcb->superblock.sector_size) {
963  if (!check_sector_csum(Vcb, context->stripes[stripe].buf + stripeoff[stripe], (uint8_t*)csum + ((pos * Vcb->csum_size) >> Vcb->sector_shift))) {
964  uint64_t addr = offset + pos;
965 
966  log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, false, false, false);
968  }
969 
970  pos += Vcb->superblock.sector_size;
971  stripeoff[stripe] += Vcb->superblock.sector_size;
972  }
973  } else {
974  for (j = 0; j < readlen; j += Vcb->superblock.node_size) {
975  tree_header* th = (tree_header*)(context->stripes[stripe].buf + stripeoff[stripe]);
976  uint64_t addr = offset + pos;
977 
978  if (!check_tree_checksum(Vcb, th) || th->address != addr) {
979  log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, true, false, false);
981  }
982 
983  pos += Vcb->superblock.node_size;
984  stripeoff[stripe] += Vcb->superblock.node_size;
985  }
986  }
987 
988  stripe = (stripe + 1) % c->chunk_item->num_stripes;
989  }
990 
991  ExFreePool(stripeoff);
992 
993  return STATUS_SUCCESS;
994 }
995 
997  ULONG j;
998  uint16_t stripe, sub_stripes = max(c->chunk_item->sub_stripes, 1);
999  uint32_t pos, *stripeoff;
1000  bool csum_error = false;
1001  NTSTATUS Status;
1002 
1003  pos = 0;
1004  stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * c->chunk_item->num_stripes / sub_stripes, ALLOC_TAG);
1005  if (!stripeoff) {
1006  ERR("out of memory\n");
1008  }
1009 
1010  RtlZeroMemory(stripeoff, sizeof(uint32_t) * c->chunk_item->num_stripes / sub_stripes);
1011 
1012  stripe = startoffstripe;
1013  while (pos < length) {
1014  uint32_t readlen;
1015 
1016  if (pos == 0)
1017  readlen = (uint32_t)min(context->stripes[stripe * sub_stripes].length,
1018  c->chunk_item->stripe_length - (context->stripes[stripe * sub_stripes].start % c->chunk_item->stripe_length));
1019  else
1020  readlen = min(length - pos, (uint32_t)c->chunk_item->stripe_length);
1021 
1022  if (csum) {
1023  ULONG good_stripe = 0xffffffff;
1024  uint16_t k;
1025 
1026  for (k = 0; k < sub_stripes; k++) {
1027  if (c->devices[(stripe * sub_stripes) + k]->devobj) {
1028  // if first stripe is okay, we only need to check that the others are identical to it
1029  if (good_stripe != 0xffffffff) {
1030  if (RtlCompareMemory(context->stripes[(stripe * sub_stripes) + k].buf + stripeoff[stripe],
1031  context->stripes[(stripe * sub_stripes) + good_stripe].buf + stripeoff[stripe],
1032  readlen) != readlen) {
1033  context->stripes[(stripe * sub_stripes) + k].csum_error = true;
1034  csum_error = true;
1035  log_device_error(Vcb, c->devices[(stripe * sub_stripes) + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1036  }
1037  } else {
1038  for (j = 0; j < readlen; j += Vcb->superblock.sector_size) {
1039  if (!check_sector_csum(Vcb, context->stripes[(stripe * sub_stripes) + k].buf + stripeoff[stripe] + j,
1040  (uint8_t*)csum + (((pos + j) * Vcb->csum_size) >> Vcb->sector_shift))) {
1041  csum_error = true;
1042  context->stripes[(stripe * sub_stripes) + k].csum_error = true;
1043  log_device_error(Vcb, c->devices[(stripe * sub_stripes) + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1044  break;
1045  }
1046  }
1047 
1048  if (!context->stripes[(stripe * sub_stripes) + k].csum_error)
1049  good_stripe = k;
1050  }
1051  }
1052  }
1053 
1054  pos += readlen;
1055  stripeoff[stripe] += readlen;
1056  } else {
1057  ULONG good_stripe = 0xffffffff;
1058  uint16_t k;
1059 
1060  for (k = 0; k < sub_stripes; k++) {
1061  if (c->devices[(stripe * sub_stripes) + k]->devobj) {
1062  // if first stripe is okay, we only need to check that the others are identical to it
1063  if (good_stripe != 0xffffffff) {
1064  if (RtlCompareMemory(context->stripes[(stripe * sub_stripes) + k].buf + stripeoff[stripe],
1065  context->stripes[(stripe * sub_stripes) + good_stripe].buf + stripeoff[stripe],
1066  readlen) != readlen) {
1067  context->stripes[(stripe * sub_stripes) + k].csum_error = true;
1068  csum_error = true;
1069  log_device_error(Vcb, c->devices[(stripe * sub_stripes) + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1070  }
1071  } else {
1072  for (j = 0; j < readlen; j += Vcb->superblock.node_size) {
1073  tree_header* th = (tree_header*)(context->stripes[(stripe * sub_stripes) + k].buf + stripeoff[stripe] + j);
1074  uint64_t addr = offset + pos + j;
1075 
1076  if (!check_tree_checksum(Vcb, th) || th->address != addr) {
1077  csum_error = true;
1078  context->stripes[(stripe * sub_stripes) + k].csum_error = true;
1079  log_device_error(Vcb, c->devices[(stripe * sub_stripes) + k], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
1080  break;
1081  }
1082  }
1083 
1084  if (!context->stripes[(stripe * sub_stripes) + k].csum_error)
1085  good_stripe = k;
1086  }
1087  }
1088  }
1089 
1090  pos += readlen;
1091  stripeoff[stripe] += readlen;
1092  }
1093 
1094  stripe = (stripe + 1) % (c->chunk_item->num_stripes / sub_stripes);
1095  }
1096 
1097  if (!csum_error) {
1099  goto end;
1100  }
1101 
1102  for (j = 0; j < c->chunk_item->num_stripes; j += sub_stripes) {
1103  ULONG goodstripe = 0xffffffff;
1104  uint16_t k;
1105  bool hasbadstripe = false;
1106 
1107  if (context->stripes[j].length == 0)
1108  continue;
1109 
1110  for (k = 0; k < sub_stripes; k++) {
1111  if (c->devices[j + k]->devobj) {
1112  if (!context->stripes[j + k].csum_error)
1113  goodstripe = k;
1114  else
1115  hasbadstripe = true;
1116  }
1117  }
1118 
1119  if (hasbadstripe) {
1120  if (goodstripe != 0xffffffff) {
1121  for (k = 0; k < sub_stripes; k++) {
1122  if (c->devices[j + k]->devobj && context->stripes[j + k].csum_error) {
1123  uint32_t so = 0;
1124  bool recovered = false;
1125 
1126  pos = 0;
1127 
1128  stripe = startoffstripe;
1129  while (pos < length) {
1130  uint32_t readlen;
1131 
1132  if (pos == 0)
1133  readlen = (uint32_t)min(context->stripes[stripe * sub_stripes].length,
1134  c->chunk_item->stripe_length - (context->stripes[stripe * sub_stripes].start % c->chunk_item->stripe_length));
1135  else
1136  readlen = min(length - pos, (uint32_t)c->chunk_item->stripe_length);
1137 
1138  if (stripe == j / sub_stripes) {
1139  if (csum) {
1140  ULONG l;
1141 
1142  for (l = 0; l < readlen; l += Vcb->superblock.sector_size) {
1143  if (RtlCompareMemory(context->stripes[j + k].buf + so,
1144  context->stripes[j + goodstripe].buf + so,
1145  Vcb->superblock.sector_size) != Vcb->superblock.sector_size) {
1146  uint64_t addr = offset + pos;
1147 
1148  log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, false, true, false);
1149 
1150  recovered = true;
1151  }
1152 
1153  pos += Vcb->superblock.sector_size;
1154  so += Vcb->superblock.sector_size;
1155  }
1156  } else {
1157  ULONG l;
1158 
1159  for (l = 0; l < readlen; l += Vcb->superblock.node_size) {
1160  if (RtlCompareMemory(context->stripes[j + k].buf + so,
1161  context->stripes[j + goodstripe].buf + so,
1162  Vcb->superblock.node_size) != Vcb->superblock.node_size) {
1163  uint64_t addr = offset + pos;
1164 
1165  log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, true, true, false);
1166 
1167  recovered = true;
1168  }
1169 
1170  pos += Vcb->superblock.node_size;
1171  so += Vcb->superblock.node_size;
1172  }
1173  }
1174  } else
1175  pos += readlen;
1176 
1177  stripe = (stripe + 1) % (c->chunk_item->num_stripes / sub_stripes);
1178  }
1179 
1180  if (recovered) {
1181  // write good data over bad
1182 
1183  if (!c->devices[j + k]->readonly) {
1184  CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
1185 
1186  Status = write_data_phys(c->devices[j + k]->devobj, c->devices[j + k]->fileobj, cis[j + k].offset + offset - c->offset,
1187  context->stripes[j + goodstripe].buf, context->stripes[j + goodstripe].length);
1188 
1189  if (!NT_SUCCESS(Status)) {
1190  ERR("write_data_phys returned %08lx\n", Status);
1192  goto end;
1193  }
1194  }
1195  }
1196  }
1197  }
1198  } else {
1199  uint32_t so = 0;
1200  bool recovered = false;
1201 
1202  if (csum) {
1203  for (k = 0; k < sub_stripes; k++) {
1204  if (c->devices[j + k]->devobj) {
1205  context->stripes[j + k].bad_csums = ExAllocatePoolWithTag(PagedPool, (context->stripes[j + k].length * Vcb->csum_size) >> Vcb->sector_shift,
1206  ALLOC_TAG);
1207  if (!context->stripes[j + k].bad_csums) {
1208  ERR("out of memory\n");
1210  goto end;
1211  }
1212 
1213  do_calc_job(Vcb, context->stripes[j + k].buf, context->stripes[j + k].length >> Vcb->sector_shift, context->stripes[j + k].bad_csums);
1214  }
1215  }
1216  } else {
1217  for (k = 0; k < sub_stripes; k++) {
1218  if (c->devices[j + k]->devobj) {
1219  ULONG l;
1220 
1221  context->stripes[j + k].bad_csums = ExAllocatePoolWithTag(PagedPool, context->stripes[j + k].length * Vcb->csum_size / Vcb->superblock.node_size,
1222  ALLOC_TAG);
1223  if (!context->stripes[j + k].bad_csums) {
1224  ERR("out of memory\n");
1226  goto end;
1227  }
1228 
1229  for (l = 0; l < context->stripes[j + k].length / Vcb->superblock.node_size; l++) {
1230  tree_header* th = (tree_header*)&context->stripes[j + k].buf[l * Vcb->superblock.node_size];
1231 
1232  get_tree_checksum(Vcb, th, (uint8_t*)context->stripes[j + k].bad_csums + (Vcb->csum_size * l));
1233  }
1234  }
1235  }
1236  }
1237 
1238  pos = 0;
1239 
1240  stripe = startoffstripe;
1241  while (pos < length) {
1242  uint32_t readlen;
1243 
1244  if (pos == 0)
1245  readlen = (uint32_t)min(context->stripes[stripe * sub_stripes].length,
1246  c->chunk_item->stripe_length - (context->stripes[stripe * sub_stripes].start % c->chunk_item->stripe_length));
1247  else
1248  readlen = min(length - pos, (uint32_t)c->chunk_item->stripe_length);
1249 
1250  if (stripe == j / sub_stripes) {
1251  ULONG l;
1252 
1253  if (csum) {
1254  for (l = 0; l < readlen; l += Vcb->superblock.sector_size) {
1255  bool has_error = false;
1256 
1257  goodstripe = 0xffffffff;
1258  for (k = 0; k < sub_stripes; k++) {
1259  if (c->devices[j + k]->devobj) {
1260  if (RtlCompareMemory((uint8_t*)context->stripes[j + k].bad_csums + ((so * Vcb->csum_size) >> Vcb->sector_shift),
1261  (uint8_t*)csum + ((pos * Vcb->csum_size) >> Vcb->sector_shift),
1262  Vcb->csum_size) != Vcb->csum_size) {
1263  has_error = true;
1264  } else
1265  goodstripe = k;
1266  }
1267  }
1268 
1269  if (has_error) {
1270  if (goodstripe != 0xffffffff) {
1271  for (k = 0; k < sub_stripes; k++) {
1272  if (c->devices[j + k]->devobj &&
1273  RtlCompareMemory((uint8_t*)context->stripes[j + k].bad_csums + ((so * Vcb->csum_size) >> Vcb->sector_shift),
1274  (uint8_t*)csum + ((pos * Vcb->csum_size) >> Vcb->sector_shift),
1275  Vcb->csum_size) != Vcb->csum_size) {
1276  uint64_t addr = offset + pos;
1277 
1278  log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, false, true, false);
1279 
1280  recovered = true;
1281 
1282  RtlCopyMemory(context->stripes[j + k].buf + so, context->stripes[j + goodstripe].buf + so,
1283  Vcb->superblock.sector_size);
1284  }
1285  }
1286  } else {
1287  uint64_t addr = offset + pos;
1288 
1289  for (k = 0; k < sub_stripes; k++) {
1290  if (c->devices[j + j]->devobj) {
1291  log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, false, false, false);
1293  }
1294  }
1295  }
1296  }
1297 
1298  pos += Vcb->superblock.sector_size;
1299  so += Vcb->superblock.sector_size;
1300  }
1301  } else {
1302  for (l = 0; l < readlen; l += Vcb->superblock.node_size) {
1303  for (k = 0; k < sub_stripes; k++) {
1304  if (c->devices[j + k]->devobj) {
1305  tree_header* th = (tree_header*)&context->stripes[j + k].buf[so];
1306  uint64_t addr = offset + pos;
1307 
1308  if (RtlCompareMemory((uint8_t*)context->stripes[j + k].bad_csums + (so * Vcb->csum_size / Vcb->superblock.node_size), th, Vcb->csum_size) != Vcb->csum_size || th->address != addr) {
1309  ULONG m;
1310 
1311  recovered = false;
1312 
1313  for (m = 0; m < sub_stripes; m++) {
1314  if (m != k) {
1315  tree_header* th2 = (tree_header*)&context->stripes[j + m].buf[so];
1316 
1317  if (RtlCompareMemory((uint8_t*)context->stripes[j + m].bad_csums + (so * Vcb->csum_size / Vcb->superblock.node_size), th2, Vcb->csum_size) == Vcb->csum_size && th2->address == addr) {
1318  log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, true, true, false);
1319 
1320  RtlCopyMemory(th, th2, Vcb->superblock.node_size);
1321 
1322  recovered = true;
1323  break;
1324  } else
1326  }
1327  }
1328 
1329  if (!recovered)
1330  log_error(Vcb, addr, c->devices[j + k]->devitem.dev_id, true, false, false);
1331  }
1332  }
1333  }
1334 
1335  pos += Vcb->superblock.node_size;
1336  so += Vcb->superblock.node_size;
1337  }
1338  }
1339  } else
1340  pos += readlen;
1341 
1342  stripe = (stripe + 1) % (c->chunk_item->num_stripes / sub_stripes);
1343  }
1344 
1345  if (recovered) {
1346  // write good data over bad
1347 
1348  for (k = 0; k < sub_stripes; k++) {
1349  if (c->devices[j + k]->devobj && !c->devices[j + k]->readonly) {
1350  CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
1351 
1352  Status = write_data_phys(c->devices[j + k]->devobj, c->devices[j + k]->fileobj, cis[j + k].offset + offset - c->offset,
1353  context->stripes[j + k].buf, context->stripes[j + k].length);
1354 
1355  if (!NT_SUCCESS(Status)) {
1356  ERR("write_data_phys returned %08lx\n", Status);
1358  goto end;
1359  }
1360  }
1361  }
1362  }
1363  }
1364  }
1365  }
1366 
1368 
1369 end:
1370  ExFreePool(stripeoff);
1371 
1372  return Status;
1373 }
1374 
1376  ULONG i;
1378  CHUNK_ITEM_STRIPE* cis;
1379  NTSTATUS Status;
1380  uint16_t startoffstripe = 0, num_missing, allowed_missing;
1381 
1382  TRACE("(%p, %p, %lx, %I64x, %x, %p)\n", Vcb, c, type, offset, size, csum);
1383 
1384  context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(scrub_context_stripe) * c->chunk_item->num_stripes, ALLOC_TAG);
1385  if (!context.stripes) {
1386  ERR("out of memory\n");
1388  goto end;
1389  }
1390 
1391  RtlZeroMemory(context.stripes, sizeof(scrub_context_stripe) * c->chunk_item->num_stripes);
1392 
1393  context.stripes_left = 0;
1394 
1395  cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
1396 
1397  if (type == BLOCK_FLAG_RAID0) {
1398  uint64_t startoff, endoff;
1399  uint16_t endoffstripe;
1400 
1401  get_raid0_offset(offset - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &startoff, &startoffstripe);
1402  get_raid0_offset(offset + size - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &endoff, &endoffstripe);
1403 
1404  for (i = 0; i < c->chunk_item->num_stripes; i++) {
1405  if (startoffstripe > i)
1406  context.stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1407  else if (startoffstripe == i)
1408  context.stripes[i].start = startoff;
1409  else
1410  context.stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length);
1411 
1412  if (endoffstripe > i)
1413  context.stripes[i].length = (uint32_t)(endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length - context.stripes[i].start);
1414  else if (endoffstripe == i)
1415  context.stripes[i].length = (uint32_t)(endoff + 1 - context.stripes[i].start);
1416  else
1417  context.stripes[i].length = (uint32_t)(endoff - (endoff % c->chunk_item->stripe_length) - context.stripes[i].start);
1418  }
1419 
1420  allowed_missing = 0;
1421  } else if (type == BLOCK_FLAG_RAID10) {
1422  uint64_t startoff, endoff;
1423  uint16_t endoffstripe, j, sub_stripes = max(c->chunk_item->sub_stripes, 1);
1424 
1425  get_raid0_offset(offset - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes / sub_stripes, &startoff, &startoffstripe);
1426  get_raid0_offset(offset + size - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes / sub_stripes, &endoff, &endoffstripe);
1427 
1428  if ((c->chunk_item->num_stripes % sub_stripes) != 0) {
1429  ERR("chunk %I64x: num_stripes %x was not a multiple of sub_stripes %x!\n", c->offset, c->chunk_item->num_stripes, sub_stripes);
1431  goto end;
1432  }
1433 
1434  startoffstripe *= sub_stripes;
1435  endoffstripe *= sub_stripes;
1436 
1437  for (i = 0; i < c->chunk_item->num_stripes; i += sub_stripes) {
1438  if (startoffstripe > i)
1439  context.stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1440  else if (startoffstripe == i)
1441  context.stripes[i].start = startoff;
1442  else
1443  context.stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length);
1444 
1445  if (endoffstripe > i)
1446  context.stripes[i].length = (uint32_t)(endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length - context.stripes[i].start);
1447  else if (endoffstripe == i)
1448  context.stripes[i].length = (uint32_t)(endoff + 1 - context.stripes[i].start);
1449  else
1450  context.stripes[i].length = (uint32_t)(endoff - (endoff % c->chunk_item->stripe_length) - context.stripes[i].start);
1451 
1452  for (j = 1; j < sub_stripes; j++) {
1453  context.stripes[i+j].start = context.stripes[i].start;
1454  context.stripes[i+j].length = context.stripes[i].length;
1455  }
1456  }
1457 
1458  startoffstripe /= sub_stripes;
1459  allowed_missing = 1;
1460  } else
1461  allowed_missing = c->chunk_item->num_stripes - 1;
1462 
1463  num_missing = 0;
1464 
1465  for (i = 0; i < c->chunk_item->num_stripes; i++) {
1467 
1468  context.stripes[i].context = (struct _scrub_context*)&context;
1469 
1470  if (type == BLOCK_FLAG_DUPLICATE) {
1471  context.stripes[i].start = offset - c->offset;
1472  context.stripes[i].length = size;
1473  } else if (type != BLOCK_FLAG_RAID0 && type != BLOCK_FLAG_RAID10) {
1474  ERR("unexpected chunk type %lx\n", type);
1476  goto end;
1477  }
1478 
1479  if (!c->devices[i]->devobj) {
1480  num_missing++;
1481 
1482  if (num_missing > allowed_missing) {
1483  ERR("too many missing devices (at least %u, maximum allowed %u)\n", num_missing, allowed_missing);
1485  goto end;
1486  }
1487  } else if (context.stripes[i].length > 0) {
1488  context.stripes[i].buf = ExAllocatePoolWithTag(NonPagedPool, context.stripes[i].length, ALLOC_TAG);
1489 
1490  if (!context.stripes[i].buf) {
1491  ERR("out of memory\n");
1493  goto end;
1494  }
1495 
1496  context.stripes[i].Irp = IoAllocateIrp(c->devices[i]->devobj->StackSize, false);
1497 
1498  if (!context.stripes[i].Irp) {
1499  ERR("IoAllocateIrp failed\n");
1501  goto end;
1502  }
1503 
1504  IrpSp = IoGetNextIrpStackLocation(context.stripes[i].Irp);
1506  IrpSp->FileObject = c->devices[i]->fileobj;
1507 
1508  if (c->devices[i]->devobj->Flags & DO_BUFFERED_IO) {
1509  context.stripes[i].Irp->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, context.stripes[i].length, ALLOC_TAG);
1510  if (!context.stripes[i].Irp->AssociatedIrp.SystemBuffer) {
1511  ERR("out of memory\n");
1513  goto end;
1514  }
1515 
1517 
1518  context.stripes[i].Irp->UserBuffer = context.stripes[i].buf;
1519  } else if (c->devices[i]->devobj->Flags & DO_DIRECT_IO) {
1520  context.stripes[i].Irp->MdlAddress = IoAllocateMdl(context.stripes[i].buf, context.stripes[i].length, false, false, NULL);
1521  if (!context.stripes[i].Irp->MdlAddress) {
1522  ERR("IoAllocateMdl failed\n");
1524  goto end;
1525  }
1526 
1528 
1529  _SEH2_TRY {
1530  MmProbeAndLockPages(context.stripes[i].Irp->MdlAddress, KernelMode, IoWriteAccess);
1533  } _SEH2_END;
1534 
1535  if (!NT_SUCCESS(Status)) {
1536  ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
1537  IoFreeMdl(context.stripes[i].Irp->MdlAddress);
1538  context.stripes[i].Irp->MdlAddress = NULL;
1539  goto end;
1540  }
1541  } else
1542  context.stripes[i].Irp->UserBuffer = context.stripes[i].buf;
1543 
1544  IrpSp->Parameters.Read.Length = context.stripes[i].length;
1545  IrpSp->Parameters.Read.ByteOffset.QuadPart = context.stripes[i].start + cis[i].offset;
1546 
1547  context.stripes[i].Irp->UserIosb = &context.stripes[i].iosb;
1548 
1549  IoSetCompletionRoutine(context.stripes[i].Irp, scrub_read_completion, &context.stripes[i], true, true, true);
1550 
1551  context.stripes_left++;
1552 
1553  Vcb->scrub.data_scrubbed += context.stripes[i].length;
1554  }
1555  }
1556 
1557  if (context.stripes_left == 0) {
1558  ERR("error - not reading any stripes\n");
1560  goto end;
1561  }
1562 
1564 
1565  for (i = 0; i < c->chunk_item->num_stripes; i++) {
1566  if (c->devices[i]->devobj && context.stripes[i].length > 0)
1567  IoCallDriver(c->devices[i]->devobj, context.stripes[i].Irp);
1568  }
1569 
1571 
1572  // return an error if any of the stripes returned an error
1573  for (i = 0; i < c->chunk_item->num_stripes; i++) {
1574  if (!NT_SUCCESS(context.stripes[i].iosb.Status)) {
1575  Status = context.stripes[i].iosb.Status;
1577  goto end;
1578  }
1579  }
1580 
1581  if (type == BLOCK_FLAG_DUPLICATE) {
1583  if (!NT_SUCCESS(Status)) {
1584  ERR("scrub_extent_dup returned %08lx\n", Status);
1585  goto end;
1586  }
1587  } else if (type == BLOCK_FLAG_RAID0) {
1588  Status = scrub_extent_raid0(Vcb, c, offset, size, startoffstripe, csum, &context);
1589  if (!NT_SUCCESS(Status)) {
1590  ERR("scrub_extent_raid0 returned %08lx\n", Status);
1591  goto end;
1592  }
1593  } else if (type == BLOCK_FLAG_RAID10) {
1594  Status = scrub_extent_raid10(Vcb, c, offset, size, startoffstripe, csum, &context);
1595  if (!NT_SUCCESS(Status)) {
1596  ERR("scrub_extent_raid10 returned %08lx\n", Status);
1597  goto end;
1598  }
1599  }
1600 
1601 end:
1602  if (context.stripes) {
1603  for (i = 0; i < c->chunk_item->num_stripes; i++) {
1604  if (context.stripes[i].Irp) {
1605  if (c->devices[i]->devobj->Flags & DO_DIRECT_IO && context.stripes[i].Irp->MdlAddress) {
1606  MmUnlockPages(context.stripes[i].Irp->MdlAddress);
1607  IoFreeMdl(context.stripes[i].Irp->MdlAddress);
1608  }
1609  IoFreeIrp(context.stripes[i].Irp);
1610  }
1611 
1612  if (context.stripes[i].buf)
1613  ExFreePool(context.stripes[i].buf);
1614 
1615  if (context.stripes[i].bad_csums)
1616  ExFreePool(context.stripes[i].bad_csums);
1617  }
1618 
1619  ExFreePool(context.stripes);
1620  }
1621 
1622  return Status;
1623 }
1624 
1626  NTSTATUS Status;
1627  ULONG runlength, index;
1628 
1629  runlength = RtlFindFirstRunClear(bmp, &index);
1630 
1631  while (runlength != 0) {
1632  if (index >= bmplen)
1633  break;
1634 
1635  if (index + runlength >= bmplen) {
1636  runlength = bmplen - index;
1637 
1638  if (runlength == 0)
1639  break;
1640  }
1641 
1642  do {
1643  ULONG rl;
1644 
1645  if (runlength << Vcb->sector_shift > SCRUB_UNIT)
1646  rl = SCRUB_UNIT >> Vcb->sector_shift;
1647  else
1648  rl = runlength;
1649 
1650  Status = scrub_extent(Vcb, c, type, offset + ((uint64_t)index << Vcb->sector_shift),
1651  rl << Vcb->sector_shift, (uint8_t*)csum + (index * Vcb->csum_size));
1652  if (!NT_SUCCESS(Status)) {
1653  ERR("scrub_data_extent_dup returned %08lx\n", Status);
1654  return Status;
1655  }
1656 
1657  runlength -= rl;
1658  index += rl;
1659  } while (runlength > 0);
1660 
1661  runlength = RtlFindNextForwardRunClear(bmp, index, &index);
1662  }
1663 
1664  return STATUS_SUCCESS;
1665 }
1666 
1667 typedef struct {
1670  void* context;
1673  bool rewrite, missing;
1677 
1678 typedef struct {
1685  void* csum;
1689 
1690 _Function_class_(IO_COMPLETION_ROUTINE)
1691 static NTSTATUS __stdcall scrub_read_completion_raid56(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
1694  LONG left = InterlockedDecrement(&context->stripes_left);
1695 
1697 
1698  stripe->iosb = Irp->IoStatus;
1699 
1700  if (left == 0)
1701  KeSetEvent(&context->Event, 0, false);
1702 
1704 }
1705 
1707  uint64_t num, uint16_t missing_devices) {
1708  ULONG sectors_per_stripe = (ULONG)(c->chunk_item->stripe_length >> Vcb->sector_shift), off;
1709  uint16_t stripe, parity = (bit_start + num + c->chunk_item->num_stripes - 1) % c->chunk_item->num_stripes;
1710  uint64_t stripeoff;
1711 
1712  stripe = (parity + 1) % c->chunk_item->num_stripes;
1713  off = (ULONG)(bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 1);
1714  stripeoff = num * sectors_per_stripe;
1715 
1716  if (missing_devices == 0)
1717  RtlCopyMemory(context->parity_scratch, &context->stripes[parity].buf[num * c->chunk_item->stripe_length], (ULONG)c->chunk_item->stripe_length);
1718 
1719  while (stripe != parity) {
1720  RtlClearAllBits(&context->stripes[stripe].error);
1721 
1722  for (ULONG i = 0; i < sectors_per_stripe; i++) {
1723  if (c->devices[stripe]->devobj && RtlCheckBit(&context->alloc, off)) {
1724  if (RtlCheckBit(&context->is_tree, off)) {
1725  tree_header* th = (tree_header*)&context->stripes[stripe].buf[stripeoff << Vcb->sector_shift];
1726  uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (off << Vcb->sector_shift);
1727 
1728  if (!check_tree_checksum(Vcb, th) || th->address != addr) {
1729  RtlSetBits(&context->stripes[stripe].error, i, Vcb->superblock.node_size >> Vcb->sector_shift);
1731 
1732  if (missing_devices > 0)
1733  log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, true, false, false);
1734  }
1735 
1736  off += Vcb->superblock.node_size >> Vcb->sector_shift;
1737  stripeoff += Vcb->superblock.node_size >> Vcb->sector_shift;
1738  i += (Vcb->superblock.node_size >> Vcb->sector_shift) - 1;
1739 
1740  continue;
1741  } else if (RtlCheckBit(&context->has_csum, off)) {
1742  if (!check_sector_csum(Vcb, context->stripes[stripe].buf + (stripeoff << Vcb->sector_shift), (uint8_t*)context->csum + (Vcb->csum_size * off))) {
1743  RtlSetBit(&context->stripes[stripe].error, i);
1745 
1746  if (missing_devices > 0) {
1747  uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (off << Vcb->sector_shift);
1748 
1749  log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, false, false, false);
1750  }
1751  }
1752  }
1753  }
1754 
1755  off++;
1756  stripeoff++;
1757  }
1758 
1759  if (missing_devices == 0)
1760  do_xor(context->parity_scratch, &context->stripes[stripe].buf[num * c->chunk_item->stripe_length], (ULONG)c->chunk_item->stripe_length);
1761 
1762  stripe = (stripe + 1) % c->chunk_item->num_stripes;
1763  stripeoff = num * sectors_per_stripe;
1764  }
1765 
1766  // check parity
1767 
1768  if (missing_devices == 0) {
1769  RtlClearAllBits(&context->stripes[parity].error);
1770 
1771  for (ULONG i = 0; i < sectors_per_stripe; i++) {
1772  ULONG o, j;
1773 
1774  o = i << Vcb->sector_shift;
1775  for (j = 0; j < Vcb->superblock.sector_size; j++) { // FIXME - use SSE
1776  if (context->parity_scratch[o] != 0) {
1777  RtlSetBit(&context->stripes[parity].error, i);
1778  break;
1779  }
1780  o++;
1781  }
1782  }
1783  }
1784 
1785  // log and fix errors
1786 
1787  if (missing_devices > 0)
1788  return;
1789 
1790  for (ULONG i = 0; i < sectors_per_stripe; i++) {
1791  ULONG num_errors = 0, bad_off = 0;
1792  uint64_t bad_stripe = 0;
1793  bool alloc = false;
1794 
1795  stripe = (parity + 1) % c->chunk_item->num_stripes;
1796  off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 1)) + i;
1797 
1798  while (stripe != parity) {
1799  if (RtlCheckBit(&context->alloc, off)) {
1800  alloc = true;
1801 
1802  if (RtlCheckBit(&context->stripes[stripe].error, i)) {
1803  bad_stripe = stripe;
1804  bad_off = off;
1805  num_errors++;
1806  }
1807  }
1808 
1809  off += sectors_per_stripe;
1810  stripe = (stripe + 1) % c->chunk_item->num_stripes;
1811  }
1812 
1813  if (!alloc)
1814  continue;
1815 
1816  if (num_errors == 0 && !RtlCheckBit(&context->stripes[parity].error, i)) // everything fine
1817  continue;
1818 
1819  if (num_errors == 0 && RtlCheckBit(&context->stripes[parity].error, i)) { // parity error
1820  uint64_t addr;
1821 
1822  do_xor(&context->stripes[parity].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)],
1823  &context->parity_scratch[i << Vcb->sector_shift],
1824  Vcb->superblock.sector_size);
1825 
1826  bad_off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 1)) + i;
1827  addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (bad_off << Vcb->sector_shift);
1828 
1829  context->stripes[parity].rewrite = true;
1830 
1831  log_error(Vcb, addr, c->devices[parity]->devitem.dev_id, false, true, true);
1833  } else if (num_errors == 1) {
1834  uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (bad_off << Vcb->sector_shift);
1835 
1836  if (RtlCheckBit(&context->is_tree, bad_off)) {
1837  tree_header* th;
1838 
1839  do_xor(&context->parity_scratch[i << Vcb->sector_shift],
1840  &context->stripes[bad_stripe].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)],
1841  Vcb->superblock.node_size);
1842 
1843  th = (tree_header*)&context->parity_scratch[i << Vcb->sector_shift];
1844 
1845  if (check_tree_checksum(Vcb, th) && th->address == addr) {
1846  RtlCopyMemory(&context->stripes[bad_stripe].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)],
1847  &context->parity_scratch[i << Vcb->sector_shift], Vcb->superblock.node_size);
1848 
1849  context->stripes[bad_stripe].rewrite = true;
1850 
1851  RtlClearBits(&context->stripes[bad_stripe].error, i + 1, (Vcb->superblock.node_size >> Vcb->sector_shift) - 1);
1852 
1853  log_error(Vcb, addr, c->devices[bad_stripe]->devitem.dev_id, true, true, false);
1854  } else
1855  log_error(Vcb, addr, c->devices[bad_stripe]->devitem.dev_id, true, false, false);
1856  } else {
1858 
1859  do_xor(&context->parity_scratch[i << Vcb->sector_shift],
1860  &context->stripes[bad_stripe].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)],
1861  Vcb->superblock.sector_size);
1862 
1863  get_sector_csum(Vcb, &context->parity_scratch[i << Vcb->sector_shift], hash);
1864 
1865  if (RtlCompareMemory(hash, (uint8_t*)context->csum + (Vcb->csum_size * bad_off), Vcb->csum_size) == Vcb->csum_size) {
1866  RtlCopyMemory(&context->stripes[bad_stripe].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)],
1867  &context->parity_scratch[i << Vcb->sector_shift], Vcb->superblock.sector_size);
1868 
1869  context->stripes[bad_stripe].rewrite = true;
1870 
1871  log_error(Vcb, addr, c->devices[bad_stripe]->devitem.dev_id, false, true, false);
1872  } else
1873  log_error(Vcb, addr, c->devices[bad_stripe]->devitem.dev_id, false, false, false);
1874  }
1875  } else {
1876  stripe = (parity + 1) % c->chunk_item->num_stripes;
1877  off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 1)) + i;
1878 
1879  while (stripe != parity) {
1880  if (RtlCheckBit(&context->alloc, off)) {
1881  if (RtlCheckBit(&context->stripes[stripe].error, i)) {
1882  uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 1) * c->chunk_item->stripe_length) + (off << Vcb->sector_shift);
1883 
1884  log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, RtlCheckBit(&context->is_tree, off), false, false);
1885  }
1886  }
1887 
1888  off += sectors_per_stripe;
1889  stripe = (stripe + 1) % c->chunk_item->num_stripes;
1890  }
1891  }
1892  }
1893 }
1894 
1896  uint64_t num, uint16_t missing_devices) {
1897  ULONG sectors_per_stripe = (ULONG)(c->chunk_item->stripe_length >> Vcb->sector_shift), off;
1898  uint16_t stripe, parity1 = (bit_start + num + c->chunk_item->num_stripes - 2) % c->chunk_item->num_stripes;
1899  uint16_t parity2 = (parity1 + 1) % c->chunk_item->num_stripes;
1900  uint64_t stripeoff;
1901 
1902  stripe = (parity1 + 2) % c->chunk_item->num_stripes;
1903  off = (ULONG)(bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2);
1904  stripeoff = num * sectors_per_stripe;
1905 
1906  if (c->devices[parity1]->devobj)
1907  RtlCopyMemory(context->parity_scratch, &context->stripes[parity1].buf[num * c->chunk_item->stripe_length], (ULONG)c->chunk_item->stripe_length);
1908 
1909  if (c->devices[parity2]->devobj)
1910  RtlZeroMemory(context->parity_scratch2, (ULONG)c->chunk_item->stripe_length);
1911 
1912  while (stripe != parity1) {
1913  RtlClearAllBits(&context->stripes[stripe].error);
1914 
1915  for (ULONG i = 0; i < sectors_per_stripe; i++) {
1916  if (c->devices[stripe]->devobj && RtlCheckBit(&context->alloc, off)) {
1917  if (RtlCheckBit(&context->is_tree, off)) {
1918  tree_header* th = (tree_header*)&context->stripes[stripe].buf[stripeoff << Vcb->sector_shift];
1919  uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (off << Vcb->sector_shift);
1920 
1921  if (!check_tree_checksum(Vcb, th) || th->address != addr) {
1922  RtlSetBits(&context->stripes[stripe].error, i, Vcb->superblock.node_size >> Vcb->sector_shift);
1924 
1925  if (missing_devices == 2)
1926  log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, true, false, false);
1927  }
1928 
1929  off += Vcb->superblock.node_size >> Vcb->sector_shift;
1930  stripeoff += Vcb->superblock.node_size >> Vcb->sector_shift;
1931  i += (Vcb->superblock.node_size >> Vcb->sector_shift) - 1;
1932 
1933  continue;
1934  } else if (RtlCheckBit(&context->has_csum, off)) {
1936 
1937  get_sector_csum(Vcb, context->stripes[stripe].buf + (stripeoff << Vcb->sector_shift), hash);
1938 
1939  if (RtlCompareMemory(hash, (uint8_t*)context->csum + (Vcb->csum_size * off), Vcb->csum_size) != Vcb->csum_size) {
1940  uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (off << Vcb->sector_shift);
1941 
1942  RtlSetBit(&context->stripes[stripe].error, i);
1944 
1945  if (missing_devices == 2)
1946  log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, false, false, false);
1947  }
1948  }
1949  }
1950 
1951  off++;
1952  stripeoff++;
1953  }
1954 
1955  if (c->devices[parity1]->devobj)
1956  do_xor(context->parity_scratch, &context->stripes[stripe].buf[num * c->chunk_item->stripe_length], (uint32_t)c->chunk_item->stripe_length);
1957 
1958  stripe = (stripe + 1) % c->chunk_item->num_stripes;
1959  stripeoff = num * sectors_per_stripe;
1960  }
1961 
1962  RtlClearAllBits(&context->stripes[parity1].error);
1963 
1964  if (missing_devices == 0 || (missing_devices == 1 && !c->devices[parity2]->devobj)) {
1965  // check parity 1
1966 
1967  for (ULONG i = 0; i < sectors_per_stripe; i++) {
1968  ULONG o, j;
1969 
1970  o = i << Vcb->sector_shift;
1971  for (j = 0; j < Vcb->superblock.sector_size; j++) { // FIXME - use SSE
1972  if (context->parity_scratch[o] != 0) {
1973  RtlSetBit(&context->stripes[parity1].error, i);
1974  break;
1975  }
1976  o++;
1977  }
1978  }
1979  }
1980 
1981  RtlClearAllBits(&context->stripes[parity2].error);
1982 
1983  if (missing_devices == 0 || (missing_devices == 1 && !c->devices[parity1]->devobj)) {
1984  // check parity 2
1985 
1986  stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1);
1987 
1988  while (stripe != parity2) {
1989  galois_double(context->parity_scratch2, (uint32_t)c->chunk_item->stripe_length);
1990  do_xor(context->parity_scratch2, &context->stripes[stripe].buf[num * c->chunk_item->stripe_length], (uint32_t)c->chunk_item->stripe_length);
1991 
1992  stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
1993  }
1994 
1995  for (ULONG i = 0; i < sectors_per_stripe; i++) {
1996  if (RtlCompareMemory(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)],
1997  &context->parity_scratch2[i << Vcb->sector_shift], Vcb->superblock.sector_size) != Vcb->superblock.sector_size)
1998  RtlSetBit(&context->stripes[parity2].error, i);
1999  }
2000  }
2001 
2002  if (missing_devices == 2)
2003  return;
2004 
2005  // log and fix errors
2006 
2007  for (ULONG i = 0; i < sectors_per_stripe; i++) {
2008  ULONG num_errors = 0;
2009  uint64_t bad_stripe1 = 0, bad_stripe2 = 0;
2010  ULONG bad_off1 = 0, bad_off2 = 0;
2011  bool alloc = false;
2012 
2013  stripe = (parity1 + 2) % c->chunk_item->num_stripes;
2014  off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2)) + i;
2015 
2016  while (stripe != parity1) {
2017  if (RtlCheckBit(&context->alloc, off)) {
2018  alloc = true;
2019 
2020  if (!c->devices[stripe]->devobj || RtlCheckBit(&context->stripes[stripe].error, i)) {
2021  if (num_errors == 0) {
2022  bad_stripe1 = stripe;
2023  bad_off1 = off;
2024  } else if (num_errors == 1) {
2025  bad_stripe2 = stripe;
2026  bad_off2 = off;
2027  }
2028  num_errors++;
2029  }
2030  }
2031 
2032  off += sectors_per_stripe;
2033  stripe = (stripe + 1) % c->chunk_item->num_stripes;
2034  }
2035 
2036  if (!alloc)
2037  continue;
2038 
2039  if (num_errors == 0 && !RtlCheckBit(&context->stripes[parity1].error, i) && !RtlCheckBit(&context->stripes[parity2].error, i)) // everything fine
2040  continue;
2041 
2042  if (num_errors == 0) { // parity error
2043  uint64_t addr;
2044 
2045  if (RtlCheckBit(&context->stripes[parity1].error, i)) {
2046  do_xor(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)],
2047  &context->parity_scratch[i << Vcb->sector_shift],
2048  Vcb->superblock.sector_size);
2049 
2050  bad_off1 = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2)) + i;
2051  addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off1 << Vcb->sector_shift);
2052 
2053  context->stripes[parity1].rewrite = true;
2054 
2055  log_error(Vcb, addr, c->devices[parity1]->devitem.dev_id, false, true, true);
2057  }
2058 
2059  if (RtlCheckBit(&context->stripes[parity2].error, i)) {
2060  RtlCopyMemory(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)],
2061  &context->parity_scratch2[i << Vcb->sector_shift],
2062  Vcb->superblock.sector_size);
2063 
2064  bad_off1 = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2)) + i;
2065  addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off1 << Vcb->sector_shift);
2066 
2067  context->stripes[parity2].rewrite = true;
2068 
2069  log_error(Vcb, addr, c->devices[parity2]->devitem.dev_id, false, true, true);
2071  }
2072  } else if (num_errors == 1) {
2073  uint32_t len;
2074  uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off1 << Vcb->sector_shift);
2075  uint8_t* scratch;
2076 
2077  len = RtlCheckBit(&context->is_tree, bad_off1) ? Vcb->superblock.node_size : Vcb->superblock.sector_size;
2078 
2080  if (!scratch) {
2081  ERR("out of memory\n");
2082  return;
2083  }
2084 
2085  RtlZeroMemory(scratch, len);
2086 
2087  do_xor(&context->parity_scratch[i << Vcb->sector_shift],
2088  &context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], len);
2089 
2090  stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1);
2091 
2092  if (c->devices[parity2]->devobj) {
2093  uint16_t stripe_num, bad_stripe_num = 0;
2094 
2095  stripe_num = c->chunk_item->num_stripes - 3;
2096  while (stripe != parity2) {
2097  galois_double(scratch, len);
2098 
2099  if (stripe != bad_stripe1)
2100  do_xor(scratch, &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], len);
2101  else
2102  bad_stripe_num = stripe_num;
2103 
2104  stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2105  stripe_num--;
2106  }
2107 
2108  do_xor(scratch, &context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], len);
2109 
2110  if (bad_stripe_num != 0)
2111  galois_divpower(scratch, (uint8_t)bad_stripe_num, len);
2112  }
2113 
2114  if (RtlCheckBit(&context->is_tree, bad_off1)) {
2115  uint8_t hash1[MAX_HASH_SIZE];
2116  uint8_t hash2[MAX_HASH_SIZE];
2117  tree_header *th1 = NULL, *th2 = NULL;
2118 
2119  if (c->devices[parity1]->devobj) {
2120  th1 = (tree_header*)&context->parity_scratch[i << Vcb->sector_shift];
2121  get_tree_checksum(Vcb, th1, hash1);
2122  }
2123 
2124  if (c->devices[parity2]->devobj) {
2125  th2 = (tree_header*)scratch;
2126  get_tree_checksum(Vcb, th2, hash2);
2127  }
2128 
2129  if ((c->devices[parity1]->devobj && RtlCompareMemory(hash1, th1, Vcb->csum_size) == Vcb->csum_size && th1->address == addr) ||
2130  (c->devices[parity2]->devobj && RtlCompareMemory(hash2, th2, Vcb->csum_size) == Vcb->csum_size && th2->address == addr)) {
2131  if (!c->devices[parity1]->devobj || RtlCompareMemory(hash1, th1, Vcb->csum_size) != Vcb->csum_size || th1->address != addr) {
2132  RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)],
2133  scratch, Vcb->superblock.node_size);
2134 
2135  if (c->devices[parity1]->devobj) {
2136  // fix parity 1
2137 
2138  stripe = (parity1 + 2) % c->chunk_item->num_stripes;
2139 
2140  RtlCopyMemory(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)],
2141  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)],
2142  Vcb->superblock.node_size);
2143 
2144  stripe = (stripe + 1) % c->chunk_item->num_stripes;
2145 
2146  while (stripe != parity1) {
2147  do_xor(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)],
2148  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)],
2149  Vcb->superblock.node_size);
2150 
2151  stripe = (stripe + 1) % c->chunk_item->num_stripes;
2152  }
2153 
2154  context->stripes[parity1].rewrite = true;
2155 
2156  log_error(Vcb, addr, c->devices[parity1]->devitem.dev_id, false, true, true);
2158  }
2159  } else {
2160  RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)],
2161  &context->parity_scratch[i << Vcb->sector_shift], Vcb->superblock.node_size);
2162 
2163  if (!c->devices[parity2]->devobj || RtlCompareMemory(hash2, th2, Vcb->csum_size) != Vcb->csum_size || th2->address != addr) {
2164  // fix parity 2
2165  stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1);
2166 
2167  if (c->devices[parity2]->devobj) {
2168  RtlCopyMemory(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)],
2169  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)],
2170  Vcb->superblock.node_size);
2171 
2172  stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2173 
2174  while (stripe != parity2) {
2175  galois_double(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], Vcb->superblock.node_size);
2176 
2177  do_xor(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)],
2178  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)],
2179  Vcb->superblock.node_size);
2180 
2181  stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2182  }
2183 
2184  context->stripes[parity2].rewrite = true;
2185 
2186  log_error(Vcb, addr, c->devices[parity2]->devitem.dev_id, false, true, true);
2188  }
2189  }
2190  }
2191 
2192  context->stripes[bad_stripe1].rewrite = true;
2193 
2194  RtlClearBits(&context->stripes[bad_stripe1].error, i + 1, (Vcb->superblock.node_size >> Vcb->sector_shift) - 1);
2195 
2196  log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, true, true, false);
2197  } else
2198  log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, true, false, false);
2199  } else {
2200  uint8_t hash1[MAX_HASH_SIZE];
2201  uint8_t hash2[MAX_HASH_SIZE];
2202 
2203  if (c->devices[parity1]->devobj)
2204  get_sector_csum(Vcb, &context->parity_scratch[i << Vcb->sector_shift], hash1);
2205 
2206  if (c->devices[parity2]->devobj)
2207  get_sector_csum(Vcb, scratch, hash2);
2208 
2209  if ((c->devices[parity1]->devobj && RtlCompareMemory(hash1, (uint8_t*)context->csum + (bad_off1 * Vcb->csum_size), Vcb->csum_size) == Vcb->csum_size) ||
2210  (c->devices[parity2]->devobj && RtlCompareMemory(hash2, (uint8_t*)context->csum + (bad_off1 * Vcb->csum_size), Vcb->csum_size) == Vcb->csum_size)) {
2211  if (c->devices[parity2]->devobj && RtlCompareMemory(hash2, (uint8_t*)context->csum + (bad_off1 * Vcb->csum_size), Vcb->csum_size) == Vcb->csum_size) {
2212  RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)],
2213  scratch, Vcb->superblock.sector_size);
2214 
2215  if (c->devices[parity1]->devobj && RtlCompareMemory(hash1, (uint8_t*)context->csum + (bad_off1 * Vcb->csum_size), Vcb->csum_size) != Vcb->csum_size) {
2216  // fix parity 1
2217 
2218  stripe = (parity1 + 2) % c->chunk_item->num_stripes;
2219 
2220  RtlCopyMemory(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)],
2221  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)],
2222  Vcb->superblock.sector_size);
2223 
2224  stripe = (stripe + 1) % c->chunk_item->num_stripes;
2225 
2226  while (stripe != parity1) {
2227  do_xor(&context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)],
2228  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)],
2229  Vcb->superblock.sector_size);
2230 
2231  stripe = (stripe + 1) % c->chunk_item->num_stripes;
2232  }
2233 
2234  context->stripes[parity1].rewrite = true;
2235 
2236  log_error(Vcb, addr, c->devices[parity1]->devitem.dev_id, false, true, true);
2238  }
2239  } else {
2240  RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)],
2241  &context->parity_scratch[i << Vcb->sector_shift], Vcb->superblock.sector_size);
2242 
2243  if (c->devices[parity2]->devobj && RtlCompareMemory(hash2, (uint8_t*)context->csum + (bad_off1 * Vcb->csum_size), Vcb->csum_size) != Vcb->csum_size) {
2244  // fix parity 2
2245  stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1);
2246 
2247  RtlCopyMemory(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)],
2248  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)],
2249  Vcb->superblock.sector_size);
2250 
2251  stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2252 
2253  while (stripe != parity2) {
2254  galois_double(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], Vcb->superblock.sector_size);
2255 
2256  do_xor(&context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)],
2257  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)],
2258  Vcb->superblock.sector_size);
2259 
2260  stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2261  }
2262 
2263  context->stripes[parity2].rewrite = true;
2264 
2265  log_error(Vcb, addr, c->devices[parity2]->devitem.dev_id, false, true, true);
2267  }
2268  }
2269 
2270  context->stripes[bad_stripe1].rewrite = true;
2271 
2272  log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, false, true, false);
2273  } else
2274  log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, false, false, false);
2275  }
2276 
2277  ExFreePool(scratch);
2278  } else if (num_errors == 2 && missing_devices == 0) {
2279  uint16_t x = 0, y = 0, k;
2280  uint64_t addr;
2281  uint32_t len = (RtlCheckBit(&context->is_tree, bad_off1) || RtlCheckBit(&context->is_tree, bad_off2)) ? Vcb->superblock.node_size : Vcb->superblock.sector_size;
2282  uint8_t gyx, gx, denom, a, b, *p, *q, *pxy, *qxy;
2283  uint32_t j;
2284 
2285  stripe = parity1 == 0 ? (c->chunk_item->num_stripes - 1) : (parity1 - 1);
2286 
2287  // put qxy in parity_scratch
2288  // put pxy in parity_scratch2
2289 
2290  k = c->chunk_item->num_stripes - 3;
2291  if (stripe == bad_stripe1 || stripe == bad_stripe2) {
2292  RtlZeroMemory(&context->parity_scratch[i << Vcb->sector_shift], len);
2293  RtlZeroMemory(&context->parity_scratch2[i << Vcb->sector_shift], len);
2294 
2295  if (stripe == bad_stripe1)
2296  x = k;
2297  else
2298  y = k;
2299  } else {
2300  RtlCopyMemory(&context->parity_scratch[i << Vcb->sector_shift],
2301  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], len);
2302  RtlCopyMemory(&context->parity_scratch2[i << Vcb->sector_shift],
2303  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], len);
2304  }
2305 
2306  stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2307 
2308  k--;
2309  do {
2310  galois_double(&context->parity_scratch[i << Vcb->sector_shift], len);
2311 
2312  if (stripe != bad_stripe1 && stripe != bad_stripe2) {
2313  do_xor(&context->parity_scratch[i << Vcb->sector_shift],
2314  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], len);
2315  do_xor(&context->parity_scratch2[i << Vcb->sector_shift],
2316  &context->stripes[stripe].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], len);
2317  } else if (stripe == bad_stripe1)
2318  x = k;
2319  else if (stripe == bad_stripe2)
2320  y = k;
2321 
2322  stripe = stripe == 0 ? (c->chunk_item->num_stripes - 1) : (stripe - 1);
2323  k--;
2324  } while (stripe != parity2);
2325 
2326  gyx = gpow2(y > x ? (y-x) : (255-x+y));
2327  gx = gpow2(255-x);
2328 
2329  denom = gdiv(1, gyx ^ 1);
2330  a = gmul(gyx, denom);
2331  b = gmul(gx, denom);
2332 
2333  p = &context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)];
2334  q = &context->stripes[parity2].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)];
2335  pxy = &context->parity_scratch2[i << Vcb->sector_shift];
2336  qxy = &context->parity_scratch[i << Vcb->sector_shift];
2337 
2338  for (j = 0; j < len; j++) {
2339  *qxy = gmul(a, *p ^ *pxy) ^ gmul(b, *q ^ *qxy);
2340 
2341  p++;
2342  q++;
2343  pxy++;
2344  qxy++;
2345  }
2346 
2347  do_xor(&context->parity_scratch2[i << Vcb->sector_shift], &context->parity_scratch[i << Vcb->sector_shift], len);
2348  do_xor(&context->parity_scratch2[i << Vcb->sector_shift], &context->stripes[parity1].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)], len);
2349 
2350  addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off1 << Vcb->sector_shift);
2351 
2352  if (RtlCheckBit(&context->is_tree, bad_off1)) {
2353  tree_header* th = (tree_header*)&context->parity_scratch[i << Vcb->sector_shift];
2354 
2355  if (check_tree_checksum(Vcb, th) && th->address == addr) {
2356  RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)],
2357  &context->parity_scratch[i << Vcb->sector_shift], Vcb->superblock.node_size);
2358 
2359  context->stripes[bad_stripe1].rewrite = true;
2360 
2361  RtlClearBits(&context->stripes[bad_stripe1].error, i + 1, (Vcb->superblock.node_size >> Vcb->sector_shift) - 1);
2362 
2363  log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, true, true, false);
2364  } else
2365  log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, true, false, false);
2366  } else {
2367  if (check_sector_csum(Vcb, &context->parity_scratch[i << Vcb->sector_shift], (uint8_t*)context->csum + (Vcb->csum_size * bad_off1))) {
2368  RtlCopyMemory(&context->stripes[bad_stripe1].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)],
2369  &context->parity_scratch[i << Vcb->sector_shift], Vcb->superblock.sector_size);
2370 
2371  context->stripes[bad_stripe1].rewrite = true;
2372 
2373  log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, false, true, false);
2374  } else
2375  log_error(Vcb, addr, c->devices[bad_stripe1]->devitem.dev_id, false, false, false);
2376  }
2377 
2378  addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (bad_off2 << Vcb->sector_shift);
2379 
2380  if (RtlCheckBit(&context->is_tree, bad_off2)) {
2381  tree_header* th = (tree_header*)&context->parity_scratch2[i << Vcb->sector_shift];
2382 
2383  if (check_tree_checksum(Vcb, th) && th->address == addr) {
2384  RtlCopyMemory(&context->stripes[bad_stripe2].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)],
2385  &context->parity_scratch2[i << Vcb->sector_shift], Vcb->superblock.node_size);
2386 
2387  context->stripes[bad_stripe2].rewrite = true;
2388 
2389  RtlClearBits(&context->stripes[bad_stripe2].error, i + 1, (Vcb->superblock.node_size >> Vcb->sector_shift) - 1);
2390 
2391  log_error(Vcb, addr, c->devices[bad_stripe2]->devitem.dev_id, true, true, false);
2392  } else
2393  log_error(Vcb, addr, c->devices[bad_stripe2]->devitem.dev_id, true, false, false);
2394  } else {
2395  if (check_sector_csum(Vcb, &context->parity_scratch2[i << Vcb->sector_shift], (uint8_t*)context->csum + (Vcb->csum_size * bad_off2))) {
2396  RtlCopyMemory(&context->stripes[bad_stripe2].buf[(num * c->chunk_item->stripe_length) + (i << Vcb->sector_shift)],
2397  &context->parity_scratch2[i << Vcb->sector_shift], Vcb->superblock.sector_size);
2398 
2399  context->stripes[bad_stripe2].rewrite = true;
2400 
2401  log_error(Vcb, addr, c->devices[bad_stripe2]->devitem.dev_id, false, true, false);
2402  } else
2403  log_error(Vcb, addr, c->devices[bad_stripe2]->devitem.dev_id, false, false, false);
2404  }
2405  } else {
2406  stripe = (parity2 + 1) % c->chunk_item->num_stripes;
2407  off = (ULONG)((bit_start + num - stripe_start) * sectors_per_stripe * (c->chunk_item->num_stripes - 2)) + i;
2408 
2409  while (stripe != parity1) {
2410  if (c->devices[stripe]->devobj && RtlCheckBit(&context->alloc, off)) {
2411  if (RtlCheckBit(&context->stripes[stripe].error, i)) {
2412  uint64_t addr = c->offset + (stripe_start * (c->chunk_item->num_stripes - 2) * c->chunk_item->stripe_length) + (off << Vcb->sector_shift);
2413 
2414  log_error(Vcb, addr, c->devices[stripe]->devitem.dev_id, RtlCheckBit(&context->is_tree, off), false, false);
2415  }
2416  }
2417 
2418  off += sectors_per_stripe;
2419  stripe = (stripe + 1) % c->chunk_item->num_stripes;
2420  }
2421  }
2422  }
2423 }
2424 
2426  NTSTATUS Status;
2427  KEY searchkey;
2428  traverse_ptr tp;
2429  bool b;
2430  uint64_t run_start, run_end, full_stripe_len, stripe;
2431  uint32_t max_read, num_sectors;
2432  ULONG arrlen, *allocarr, *csumarr = NULL, *treearr, num_parity_stripes = c->chunk_item->type & BLOCK_FLAG_RAID6 ? 2 : 1;
2434  uint16_t i;
2435  CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
2436 
2437  TRACE("(%p, %p, %I64x, %I64x)\n", Vcb, c, stripe_start, stripe_end);
2438 
2439  full_stripe_len = (c->chunk_item->num_stripes - num_parity_stripes) * c->chunk_item->stripe_length;
2440  run_start = c->offset + (stripe_start * full_stripe_len);
2441  run_end = c->offset + ((stripe_end + 1) * full_stripe_len);
2442 
2443  searchkey.obj_id = run_start;
2444  searchkey.obj_type = TYPE_METADATA_ITEM;
2445  searchkey.offset = 0xffffffffffffffff;
2446 
2447  Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, NULL);
2448  if (!NT_SUCCESS(Status)) {
2449  ERR("find_item returned %08lx\n", Status);
2450  return Status;
2451  }
2452 
2453  num_sectors = (uint32_t)(((stripe_end - stripe_start + 1) * full_stripe_len) >> Vcb->sector_shift);
2454  arrlen = (ULONG)sector_align((num_sectors / 8) + 1, sizeof(ULONG));
2455 
2456  allocarr = ExAllocatePoolWithTag(PagedPool, arrlen, ALLOC_TAG);
2457  if (!allocarr) {
2458  ERR("out of memory\n");
2460  }
2461 
2462  treearr = ExAllocatePoolWithTag(PagedPool, arrlen, ALLOC_TAG);
2463  if (!treearr) {
2464  ERR("out of memory\n");
2465  ExFreePool(allocarr);
2467  }
2468 
2469  RtlInitializeBitMap(&context.alloc, allocarr, num_sectors);
2470  RtlClearAllBits(&context.alloc);
2471 
2472  RtlInitializeBitMap(&context.is_tree, treearr, num_sectors);
2473  RtlClearAllBits(&context.is_tree);
2474 
2475  context.parity_scratch = ExAllocatePoolWithTag(PagedPool, (ULONG)c->chunk_item->stripe_length, ALLOC_TAG);
2476  if (!context.parity_scratch) {
2477  ERR("out of memory\n");
2478  ExFreePool(allocarr);
2479  ExFreePool(treearr);
2481  }
2482 
2483  if (c->chunk_item->type & BLOCK_FLAG_DATA) {
2484  csumarr = ExAllocatePoolWithTag(PagedPool, arrlen, ALLOC_TAG);
2485  if (!csumarr) {
2486  ERR("out of memory\n");
2487  ExFreePool(allocarr);
2488  ExFreePool(treearr);
2489  ExFreePool(context.parity_scratch);
2491  }
2492 
2493  RtlInitializeBitMap(&context.has_csum, csumarr, num_sectors);
2494  RtlClearAllBits(&context.has_csum);
2495 
2496  context.csum = ExAllocatePoolWithTag(PagedPool, num_sectors * Vcb->csum_size, ALLOC_TAG);
2497  if (!context.csum) {
2498  ERR("out of memory\n");
2499  ExFreePool(allocarr);
2500  ExFreePool(treearr);
2501  ExFreePool(context.parity_scratch);
2502  ExFreePool(csumarr);
2504  }
2505  }
2506 
2507  if (c->chunk_item->type & BLOCK_FLAG_RAID6) {
2508  context.parity_scratch2 = ExAllocatePoolWithTag(PagedPool, (ULONG)c->chunk_item->stripe_length, ALLOC_TAG);
2509  if (!context.parity_scratch2) {
2510  ERR("out of memory\n");
2511  ExFreePool(allocarr);
2512  ExFreePool(treearr);
2513  ExFreePool(context.parity_scratch);
2514 
2515  if (c->chunk_item->type & BLOCK_FLAG_DATA) {
2516  ExFreePool(csumarr);
2517  ExFreePool(context.csum);
2518  }
2519 
2521  }
2522  }
2523 
2524  do {
2525  traverse_ptr next_tp;
2526 
2527  if (tp.item->key.obj_id >= run_end)
2528  break;
2529 
2531  uint64_t size = tp.item->key.obj_type == TYPE_METADATA_ITEM ? Vcb->superblock.node_size : tp.item->key.offset;
2532 
2533  if (tp.item->key.obj_id + size > run_start) {
2534  uint64_t extent_start = max(run_start, tp.item->key.obj_id);
2535  uint64_t extent_end = min(tp.item->key.obj_id + size, run_end);
2536  bool extent_is_tree = false;
2537 
2538  RtlSetBits(&context.alloc, (ULONG)((extent_start - run_start) >> Vcb->sector_shift), (ULONG)((extent_end - extent_start) >> Vcb->sector_shift));
2539 
2541  extent_is_tree = true;
2542  else {
2543  EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data;
2544 
2545  if (tp.item->size < sizeof(EXTENT_ITEM)) {
2546  ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
2548  goto end;
2549  }
2550 
2551  if (ei->flags & EXTENT_ITEM_TREE_BLOCK)
2552  extent_is_tree = true;
2553  }
2554 
2555  if (extent_is_tree)
2556  RtlSetBits(&context.is_tree, (ULONG)((extent_start - run_start) >> Vcb->sector_shift), (ULONG)((extent_end - extent_start) >> Vcb->sector_shift));
2557  else if (c->chunk_item->type & BLOCK_FLAG_DATA) {
2558  traverse_ptr tp2;
2559  bool b2;
2560 
2561  searchkey.obj_id = EXTENT_CSUM_ID;
2562  searchkey.obj_type = TYPE_EXTENT_CSUM;
2563  searchkey.offset = extent_start;
2564 
2565  Status = find_item(Vcb, Vcb->checksum_root, &tp2, &searchkey, false, NULL);
2566  if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) {
2567  ERR("find_item returned %08lx\n", Status);
2568  goto end;
2569  }
2570 
2571  do {
2572  traverse_ptr next_tp2;
2573 
2574  if (tp2.item->key.offset >= extent_end)
2575  break;
2576 
2577  if (tp2.item->key.offset >= extent_start) {
2578  uint64_t csum_start = max(extent_start, tp2.item->key.offset);
2579  uint64_t csum_end = min(extent_end, tp2.item->key.offset + (((uint64_t)tp2.item->size << Vcb->sector_shift) / Vcb->csum_size));
2580 
2581  RtlSetBits(&context.has_csum, (ULONG)((csum_start - run_start) >> Vcb->sector_shift), (ULONG)((csum_end - csum_start) >> Vcb->sector_shift));
2582 
2583  RtlCopyMemory((uint8_t*)context.csum + (((csum_start - run_start) * Vcb->csum_size) >> Vcb->sector_shift),
2584  tp2.item->data + (((csum_start - tp2.item->key.offset) * Vcb->csum_size) >> Vcb->sector_shift),
2585  (ULONG)(((csum_end - csum_start) * Vcb->csum_size) >> Vcb->sector_shift));
2586  }
2587 
2588  b2 = find_next_item(Vcb, &tp2, &next_tp2, false, NULL);
2589 
2590  if (b2)
2591  tp2 = next_tp2;
2592  } while (b2);
2593  }
2594  }
2595  }
2596 
2597  b = find_next_item(Vcb, &tp, &next_tp, false, NULL);
2598 
2599  if (b)
2600  tp = next_tp;
2601  } while (b);
2602 
2603  context.stripes = ExAllocatePoolWithTag(PagedPool, sizeof(scrub_context_raid56_stripe) * c->chunk_item->num_stripes, ALLOC_TAG);
2604  if (!context.stripes) {
2605  ERR("out of memory\n");
2607  goto end;
2608  }
2609 
2610  max_read = (uint32_t)min(1048576 / c->chunk_item->stripe_length, stripe_end - stripe_start + 1); // only process 1 MB of data at a time
2611 
2612  for (i = 0; i < c->chunk_item->num_stripes; i++) {
2613  context.stripes[i].buf = ExAllocatePoolWithTag(PagedPool, (ULONG)(max_read * c->chunk_item->stripe_length), ALLOC_TAG);
2614  if (!context.stripes[i].buf) {
2615  uint64_t j;
2616 
2617  ERR("out of memory\n");
2618 
2619  for (j = 0; j < i; j++) {
2620  ExFreePool(context.stripes[j].buf);
2621  }
2622  ExFreePool(context.stripes);
2623 
2625  goto end;
2626  }
2627 
2628  context.stripes[i].errorarr = ExAllocatePoolWithTag(PagedPool, (ULONG)sector_align(((c->chunk_item->stripe_length >> Vcb->sector_shift) / 8) + 1, sizeof(ULONG)), ALLOC_TAG);
2629  if (!context.stripes[i].errorarr) {
2630  uint64_t j;
2631 
2632  ERR("out of memory\n");
2633 
2634  ExFreePool(context.stripes[i].buf);
2635 
2636  for (j = 0; j < i; j++) {
2637  ExFreePool(context.stripes[j].buf);
2638  }
2639  ExFreePool(context.stripes);
2640 
2642  goto end;
2643  }
2644 
2645  RtlInitializeBitMap(&context.stripes[i].error, context.stripes[i].errorarr, (ULONG)(c->chunk_item->stripe_length >> Vcb->sector_shift));
2646 
2647  context.stripes[i].context = &context;
2648  context.stripes[i].rewrite = false;
2649  }
2650 
2651  stripe = stripe_start;
2652 
2654 
2655  chunk_lock_range(Vcb, c, run_start, run_end - run_start);
2656 
2657  do {
2658  ULONG read_stripes;
2659  uint16_t missing_devices = 0;
2660  bool need_wait = false;
2661 
2662  if (max_read < stripe_end + 1 - stripe)
2663  read_stripes = max_read;
2664  else
2665  read_stripes = (ULONG)(stripe_end + 1 - stripe);
2666 
2667  context.stripes_left = c->chunk_item->num_stripes;
2668 
2669  // read megabyte by megabyte
2670  for (i = 0; i < c->chunk_item->num_stripes; i++) {
2671  if (c->devices[i]->devobj) {
2673 
2674  context.stripes[i].Irp = IoAllocateIrp(c->devices[i]->devobj->StackSize, false);
2675 
2676  if (!context.stripes[i].Irp) {
2677  ERR("IoAllocateIrp failed\n");
2679  goto end3;
2680  }
2681 
2682  context.stripes[i].Irp->MdlAddress = NULL;
2683 
2684  IrpSp = IoGetNextIrpStackLocation(context.stripes[i].Irp);
2686  IrpSp->FileObject = c->devices[i]->fileobj;
2687 
2688  if (c->devices[i]->devobj->Flags & DO_BUFFERED_IO) {
2689  context.stripes[i].Irp->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(read_stripes * c->chunk_item->stripe_length), ALLOC_TAG);
2690  if (!context.stripes[i].Irp->AssociatedIrp.SystemBuffer) {
2691  ERR("out of memory\n");
2693  goto end3;
2694  }
2695 
2697 
2698  context.stripes[i].Irp->UserBuffer = context.stripes[i].buf;
2699  } else if (c->devices[i]->devobj->Flags & DO_DIRECT_IO) {
2700  context.stripes[i].Irp->MdlAddress = IoAllocateMdl(context.stripes[i].buf, (ULONG)(read_stripes * c->chunk_item->stripe_length), false, false, NULL);
2701  if (!context.stripes[i].Irp->MdlAddress) {
2702  ERR("IoAllocateMdl failed\n");
2704  goto end3;
2705  }
2706 
2708 
2709  _SEH2_TRY {
2710  MmProbeAndLockPages(context.stripes[i].Irp->MdlAddress, KernelMode, IoWriteAccess);
2713  } _SEH2_END;
2714 
2715  if (!NT_SUCCESS(Status)) {
2716  ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
2717  IoFreeMdl(context.stripes[i].Irp->MdlAddress);
2718  goto end3;
2719  }
2720  } else
2721  context.stripes[i].Irp->UserBuffer = context.stripes[i].buf;
2722 
2723  context.stripes[i].offset = stripe * c->chunk_item->stripe_length;
2724 
2725  IrpSp->Parameters.Read.Length = (ULONG)(read_stripes * c->chunk_item->stripe_length);
2726  IrpSp->Parameters.Read.ByteOffset.QuadPart = cis[i].offset + context.stripes[i].offset;
2727 
2728  context.stripes[i].Irp->UserIosb = &context.stripes[i].iosb;
2729  context.stripes[i].missing = false;
2730 
2731  IoSetCompletionRoutine(context.stripes[i].Irp, scrub_read_completion_raid56, &context.stripes[i], true, true, true);
2732 
2733  Vcb->scrub.data_scrubbed += read_stripes * c->chunk_item->stripe_length;
2734  need_wait = true;
2735  } else {
2736  context.stripes[i].Irp = NULL;
2737  context.stripes[i].missing = true;
2738  missing_devices++;
2739  InterlockedDecrement(&context.stripes_left);
2740  }
2741  }
2742 
2743  if (c->chunk_item->type & BLOCK_FLAG_RAID5 && missing_devices > 1) {
2744  ERR("too many missing devices (%u, maximum 1)\n", missing_devices);
2746  goto end3;
2747  } else if (c->chunk_item->type & BLOCK_FLAG_RAID6 && missing_devices > 2) {
2748  ERR("too many missing devices (%u, maximum 2)\n", missing_devices);
2750  goto end3;
2751  }
2752 
2753  if (need_wait) {
2755 
2756  for (i = 0; i < c->chunk_item->num_stripes; i++) {
2757  if (c->devices[i]->devobj)
2758  IoCallDriver(c->devices[i]->devobj, context.stripes[i].Irp);
2759  }
2760 
2762  }
2763 
2764  // return an error if any of the stripes returned an error
2765  for (i = 0; i < c->chunk_item->num_stripes; i++) {
2766  if (!context.stripes[i].missing && !NT_SUCCESS(context.stripes[i].iosb.Status)) {
2767  Status = context.stripes[i].iosb.Status;
2769  goto end3;
2770  }
2771  }
2772 
2773  if (c->chunk_item->type & BLOCK_FLAG_RAID6) {
2774  for (i = 0; i < read_stripes; i++) {
2775  scrub_raid6_stripe(Vcb, c, &context, stripe_start, stripe, i, missing_devices);
2776  }
2777  } else {
2778  for (i = 0; i < read_stripes; i++) {
2779  scrub_raid5_stripe(Vcb, c, &context, stripe_start, stripe, i, missing_devices);
2780  }
2781  }
2782  stripe += read_stripes;
2783 
2784 end3:
2785  for (i = 0; i < c->chunk_item->num_stripes; i++) {
2786  if (context.stripes[i].Irp) {
2787  if (c->devices[i]->devobj->Flags & DO_DIRECT_IO && context.stripes[i].Irp->MdlAddress) {
2788  MmUnlockPages(context.stripes[i].Irp->MdlAddress);
2789  IoFreeMdl(context.stripes[i].Irp->MdlAddress);
2790  }
2791  IoFreeIrp(context.stripes[i].Irp);
2792  context.stripes[i].Irp = NULL;
2793 
2794  if (context.stripes[i].rewrite) {
2795  Status = write_data_phys(c->devices[i]->devobj, c->devices[i]->fileobj, cis[i].offset + context.stripes[i].offset,
2796  context.stripes[i].buf, (uint32_t)(read_stripes * c->chunk_item->stripe_length));
2797 
2798  if (!NT_SUCCESS(Status)) {
2799  ERR("write_data_phys returned %08lx\n", Status);
2801  goto end2;
2802  }
2803  }
2804  }
2805  }
2806 
2807  if (!NT_SUCCESS(Status))
2808  break;
2809  } while (stripe < stripe_end);
2810 
2811 end2:
2812  chunk_unlock_range(Vcb, c, run_start, run_end - run_start);
2813 
2814  for (i = 0; i < c->chunk_item->num_stripes; i++) {
2815  ExFreePool(context.stripes[i].buf);
2816  ExFreePool(context.stripes[i].errorarr);
2817  }
2818  ExFreePool(context.stripes);
2819 
2820 end:
2821  ExFreePool(treearr);
2822  ExFreePool(allocarr);
2823  ExFreePool(context.parity_scratch);
2824 
2825  if (c->chunk_item->type & BLOCK_FLAG_RAID6)
2826  ExFreePool(context.parity_scratch2);
2827 
2828  if (c->chunk_item->type & BLOCK_FLAG_DATA) {
2829  ExFreePool(csumarr);
2830  ExFreePool(context.csum);
2831  }
2832 
2833  return Status;
2834 }
2835 
2837  NTSTATUS Status;
2838  KEY searchkey;
2839  traverse_ptr tp;
2840  bool b;
2841  uint64_t full_stripe_len, stripe, stripe_start = 0, stripe_end = 0, total_data = 0;
2842  ULONG num_extents = 0, num_parity_stripes = c->chunk_item->type & BLOCK_FLAG_RAID6 ? 2 : 1;
2843 
2844  full_stripe_len = (c->chunk_item->num_stripes - num_parity_stripes) * c->chunk_item->stripe_length;
2845  stripe = (*offset - c->offset) / full_stripe_len;
2846 
2847  *offset = c->offset + (stripe * full_stripe_len);
2848 
2849  searchkey.obj_id = *offset;
2850  searchkey.obj_type = TYPE_METADATA_ITEM;
2851  searchkey.offset = 0xffffffffffffffff;
2852 
2853  Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, NULL);
2854  if (!NT_SUCCESS(Status)) {
2855  ERR("find_item returned %08lx\n", Status);
2856  return Status;
2857  }
2858 
2859  *changed = false;
2860 
2861  do {
2862  traverse_ptr next_tp;
2863 
2864  if (tp.item->key.obj_id >= c->offset + c->chunk_item->size)
2865  break;
2866 
2868  uint64_t size = tp.item->key.obj_type == TYPE_METADATA_ITEM ? Vcb->superblock.node_size : tp.item->key.offset;
2869 
2870  TRACE("%I64x\n", tp.item->key.obj_id);
2871 
2873  ERR("extent %I64x has size less than sector_size (%I64x < %x)\n", tp.item->key.obj_id, size, Vcb->superblock.sector_size);
2874  return STATUS_INTERNAL_ERROR;
2875  }
2876 
2877  stripe = (tp.item->key.obj_id - c->offset) / full_stripe_len;
2878 
2879  if (*changed) {
2880  if (stripe > stripe_end + 1) {
2881  Status = scrub_chunk_raid56_stripe_run(Vcb, c, stripe_start, stripe_end);
2882  if (!NT_SUCCESS(Status)) {
2883  ERR("scrub_chunk_raid56_stripe_run returned %08lx\n", Status);
2884  return Status;
2885  }
2886 
2887  stripe_start = stripe;
2888  }
2889  } else
2890  stripe_start = stripe;
2891 
2892  stripe_end = (tp.item->key.obj_id + size - 1 - c->offset) / full_stripe_len;
2893 
2894  *changed = true;
2895 
2896  total_data += size;
2897  num_extents++;
2898 
2899  // only do so much at a time
2900  if (num_extents >= 64 || total_data >= 0x8000000) // 128 MB
2901  break;
2902  }
2903 
2904  b = find_next_item(Vcb, &tp, &next_tp, false, NULL);
2905 
2906  if (b)
2907  tp = next_tp;
2908  } while (b);
2909 
2910  if (*changed) {
2911  Status = scrub_chunk_raid56_stripe_run(Vcb, c, stripe_start, stripe_end);
2912  if (!NT_SUCCESS(Status)) {
2913  ERR("scrub_chunk_raid56_stripe_run returned %08lx\n", Status);
2914  return Status;
2915  }
2916 
2917  *offset = c->offset + ((stripe_end + 1) * full_stripe_len);
2918  }
2919 
2920  return STATUS_SUCCESS;
2921 }
2922 
2924  NTSTATUS Status;
2925  KEY searchkey;
2926  traverse_ptr tp;
2927  bool b = false, tree_run = false;
2928  ULONG type, num_extents = 0;
2929  uint64_t total_data = 0, tree_run_start = 0, tree_run_end = 0;
2930 
2931  TRACE("chunk %I64x\n", c->offset);
2932 
2933  ExAcquireResourceSharedLite(&Vcb->tree_lock, true);
2934 
2935  if (c->chunk_item->type & BLOCK_FLAG_DUPLICATE)
2937  else if (c->chunk_item->type & BLOCK_FLAG_RAID0)
2939  else if (c->chunk_item->type & BLOCK_FLAG_RAID1)
2941  else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
2943  else if (c->chunk_item->type & BLOCK_FLAG_RAID5) {
2944  Status = scrub_chunk_raid56(Vcb, c, offset, changed);
2945  goto end;
2946  } else if (c->chunk_item->type & BLOCK_FLAG_RAID6) {
2947  Status = scrub_chunk_raid56(Vcb, c, offset, changed);
2948  goto end;
2949  } else if (c->chunk_item->type & BLOCK_FLAG_RAID1C3)
2951  else if (c->chunk_item->type & BLOCK_FLAG_RAID1C4)
2953  else // SINGLE
2955 
2956  searchkey.obj_id = *offset;
2957  searchkey.obj_type = TYPE_METADATA_ITEM;
2958  searchkey.offset = 0xffffffffffffffff;
2959 
2960  Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, NULL);
2961  if (!NT_SUCCESS(Status)) {
2962  ERR("error - find_item returned %08lx\n", Status);
2963  goto end;
2964  }
2965 
2966  do {
2967  traverse_ptr next_tp;
2968 
2969  if (tp.item->key.obj_id >= c->offset + c->chunk_item->size)
2970  break;
2971 
2973  uint64_t size = tp.item->key.obj_type == TYPE_METADATA_ITEM ? Vcb->superblock.node_size : tp.item->key.offset;
2974  bool is_tree;
2975  void* csum = NULL;
2976  RTL_BITMAP bmp;
2977  ULONG* bmparr = NULL, bmplen;
2978 
2979  TRACE("%I64x\n", tp.item->key.obj_id);
2980 
2981  is_tree = false;
2982 
2984  is_tree = true;
2985  else {
2986  EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data;
2987 
2988  if (tp.item->size < sizeof(EXTENT_ITEM)) {
2989  ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
2991  goto end;
2992  }
2993 
2994  if (ei->flags & EXTENT_ITEM_TREE_BLOCK)
2995  is_tree = true;
2996  }
2997 
2999  ERR("extent %I64x has size less than sector_size (%I64x < %x)\n", tp.item->key.obj_id, size, Vcb->superblock.sector_size);
3001  goto end;
3002  }
3003 
3004  // load csum
3005  if (!is_tree) {
3006  traverse_ptr tp2;
3007 
3008  csum = ExAllocatePoolWithTag(PagedPool, (ULONG)((Vcb->csum_size * size) >> Vcb->sector_shift), ALLOC_TAG);
3009  if (!csum) {
3010  ERR("out of memory\n");
3012  goto end;
3013  }
3014 
3015  bmplen = (ULONG)(size >> Vcb->sector_shift);
3016 
3017  bmparr = ExAllocatePoolWithTag(PagedPool, (ULONG)(sector_align((bmplen >> 3) + 1, sizeof(ULONG))), ALLOC_TAG);
3018  if (!bmparr) {
3019  ERR("out of memory\n");
3020  ExFreePool(csum);
3022  goto end;
3023  }
3024 
3025  RtlInitializeBitMap(&bmp, bmparr, bmplen);
3026  RtlSetAllBits(&bmp); // 1 = no csum, 0 = csum
3027 
3028  searchkey.obj_id = EXTENT_CSUM_ID;
3029  searchkey.obj_type = TYPE_EXTENT_CSUM;
3030  searchkey.offset = tp.item->key.obj_id;
3031 
3032  Status = find_item(Vcb, Vcb->checksum_root, &tp2, &searchkey, false, NULL);
3033  if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) {
3034  ERR("find_item returned %08lx\n", Status);
3035  ExFreePool(csum);
3036  ExFreePool(bmparr);
3037  goto end;
3038  }
3039 
3040  if (Status != STATUS_NOT_FOUND) {
3041  do {
3042  traverse_ptr next_tp2;
3043 
3044  if (tp2.item->key.obj_type == TYPE_EXTENT_CSUM) {
3045  if (tp2.item->key.offset >= tp.item->key.obj_id + size)
3046  break;
3047  else if (tp2.item->size >= Vcb->csum_size && tp2.item->key.offset + (((uint64_t)tp2.item->size << Vcb->sector_shift) / Vcb->csum_size) >= tp.item->key.obj_id) {
3048  uint64_t cs = max(tp.item->key.obj_id, tp2.item->key.offset);
3049  uint64_t ce = min(tp.item->key.obj_id + size, tp2.item->key.offset + (((uint64_t)tp2.item->size << Vcb->sector_shift) / Vcb->csum_size));
3050 
3051  RtlCopyMemory((uint8_t*)csum + (((cs - tp.item->key.obj_id) * Vcb->csum_size) >> Vcb->sector_shift),
3052  tp2.item->data + (((cs - tp2.item->key.offset) * Vcb->csum_size) >> Vcb->sector_shift),
3053  (ULONG)(((ce - cs) * Vcb->csum_size) >> Vcb->sector_shift));
3054 
3055  RtlClearBits(&bmp, (ULONG)((cs - tp.item->key.obj_id) >> Vcb->sector_shift), (ULONG)((ce - cs) >> Vcb->sector_shift));
3056 
3057  if (ce == tp.item->key.obj_id + size)
3058  break;
3059  }
3060  }
3061 
3062  if (find_next_item(Vcb, &tp2, &next_tp2, false, NULL))
3063  tp2 = next_tp2;
3064  else
3065  break;
3066  } while (true);
3067  }
3068  }
3069 
3070  if (tree_run) {
3071  if (!is_tree || tp.item->key.obj_id > tree_run_end) {
3072  Status = scrub_extent(Vcb, c, type, tree_run_start, (uint32_t)(tree_run_end - tree_run_start), NULL);
3073  if (!NT_SUCCESS(Status)) {
3074  ERR("scrub_extent returned %08lx\n", Status);
3075  goto end;
3076  }
3077 
3078  if (!is_tree)
3079  tree_run = false;
3080  else {
3081  tree_run_start = tp.item->key.obj_id;
3082  tree_run_end = tp.item->key.obj_id + Vcb->superblock.node_size;
3083  }
3084  } else
3085  tree_run_end = tp.item->key.obj_id + Vcb->superblock.node_size;
3086  } else if (is_tree) {
3087  tree_run = true;
3088  tree_run_start = tp.item->key.obj_id;
3089  tree_run_end = tp.item->key.obj_id + Vcb->superblock.node_size;
3090  }
3091 
3092  if (!is_tree) {
3093  Status = scrub_data_extent(Vcb, c, tp.item->key.obj_id, type, csum, &bmp, bmplen);
3094  if (!NT_SUCCESS(Status)) {
3095  ERR("scrub_data_extent returned %08lx\n", Status);
3096  ExFreePool(csum);
3097  ExFreePool(bmparr);
3098  goto end;
3099  }
3100 
3101  ExFreePool(csum);
3102  ExFreePool(bmparr);
3103  }
3104 
3105  *offset = tp.item->key.obj_id + size;
3106  *changed = true;
3107 
3108  total_data += size;
3109  num_extents++;
3110 
3111  // only do so much at a time
3112  if (num_extents >= 64 || total_data >= 0x8000000) // 128 MB
3113  break;
3114  }
3115 
3116  b = find_next_item(Vcb, &tp, &next_tp, false, NULL);
3117 
3118  if (b)
3119  tp = next_tp;
3120  } while (b);
3121 
3122  if (tree_run) {
3123  Status = scrub_extent(Vcb, c, type, tree_run_start, (uint32_t)(tree_run_end - tree_run_start), NULL);
3124  if (!NT_SUCCESS(Status)) {
3125  ERR("scrub_extent returned %08lx\n", Status);
3126  goto end;
3127  }
3128  }
3129 
3131 
3132 end:
3133  ExReleaseResourceLite(&Vcb->tree_lock);
3134 
3135  return Status;
3136 }
3137 
3138 _Function_class_(KSTART_ROUTINE)
3139 static void __stdcall scrub_thread(void* context) {
3141  LIST_ENTRY chunks, *le;
3142  NTSTATUS Status;
3144 
3145  KeInitializeEvent(&Vcb->scrub.finished, NotificationEvent, false);
3146 
3147  InitializeListHead(&chunks);
3148 
3149  ExAcquireResourceExclusiveLite(&Vcb->tree_lock, true);
3150 
3151  if (Vcb->need_write && !Vcb->readonly)
3152  Status = do_write(Vcb, NULL);
3153  else
3155 
3156  free_trees(Vcb);
3157 
3158  if (!NT_SUCCESS(Status)) {
3159  ExReleaseResourceLite(&Vcb->tree_lock);
3160  ERR("do_write returned %08lx\n", Status);
3161  Vcb->scrub.error = Status;
3162  goto end;
3163  }
3164 
3165  ExConvertExclusiveToSharedLite(&Vcb->tree_lock);
3166 
3167  ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, true);
3168 
3169  KeQuerySystemTime(&Vcb->scrub.start_time);
3170  Vcb->scrub.finish_time.QuadPart = 0;
3171  Vcb->scrub.resume_time.QuadPart = Vcb->scrub.start_time.QuadPart;
3172  Vcb->scrub.duration.QuadPart = 0;
3173  Vcb->scrub.total_chunks = 0;
3174  Vcb->scrub.chunks_left = 0;
3175  Vcb->scrub.data_scrubbed = 0;
3176  Vcb->scrub.num_errors = 0;
3177 
3178  while (!IsListEmpty(&Vcb->scrub.errors)) {
3180  ExFreePool(err);
3181  }
3182 
3183  ExAcquireResourceSharedLite(&Vcb->chunk_lock, true);
3184 
3185  le = Vcb->chunks.Flink;
3186  while (le != &Vcb->chunks) {
3188 
3190 
3191  if (!c->readonly) {
3192  InsertTailList(&chunks, &c->list_entry_balance);
3193  Vcb->scrub.total_chunks++;
3194  Vcb->scrub.chunks_left++;
3195  }
3196 
3198 
3199  le = le->Flink;
3200  }
3201 
3202  ExReleaseResourceLite(&Vcb->chunk_lock);
3203 
3204  ExReleaseResource(&Vcb->scrub.stats_lock);
3205 
3206  ExReleaseResourceLite(&Vcb->tree_lock);
3207 
3208  while (!IsListEmpty(&chunks)) {
3209  chunk* c = CONTAINING_RECORD(RemoveHeadList(&chunks), chunk, list_entry_balance);
3210  uint64_t offset = c->offset;
3211  bool changed;
3212 
3213  c->reloc = true;
3214 
3215  KeWaitForSingleObject(&Vcb->scrub.event, Executive, KernelMode, false, NULL);
3216 
3217  if (!Vcb->scrub.stopping) {
3218  do {
3219  changed = false;
3220 
3221  Status = scrub_chunk(Vcb, c, &offset, &changed);
3222  if (!NT_SUCCESS(Status)) {
3223  ERR("scrub_chunk returned %08lx\n", Status);
3224  Vcb->scrub.stopping = true;
3225  Vcb->scrub.error = Status;
3226  break;
3227  }
3228 
3229  if (offset == c->offset + c->chunk_item->size || Vcb->scrub.stopping)
3230  break;
3231 
3232  KeWaitForSingleObject(&Vcb->scrub.event, Executive, KernelMode, false, NULL);
3233  } while (changed);
3234  }
3235 
3236  ExAcquireResourceExclusiveLite(&Vcb->scrub.stats_lock, true);
3237 
3238  if (!Vcb->scrub.stopping)
3239  Vcb->scrub.chunks_left--;
3240 
3241  if (IsListEmpty(&chunks))
3242  KeQuerySystemTime(&Vcb->scrub.finish_time);
3243 
3244  ExReleaseResource(&Vcb->scrub.stats_lock);
3245 
3246  c->reloc = false;
3247  c->list_entry_balance.Flink = NULL;
3248  }
3249 
3251  Vcb->scrub.duration.QuadPart += time.QuadPart - Vcb->scrub.resume_time.QuadPart;
3252 
3253 end:
3254  ZwClose(Vcb->scrub.thread);
3255  Vcb->scrub.thread = NULL;
3256 
3257  KeSetEvent(&Vcb->scrub.finished, 0, false);
3258 }
3259 
3261  NTSTATUS Status;
3262  OBJECT_ATTRIBUTES oa;
3263 
3264  if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode))
3266 
3267  if (Vcb->locked) {
3268  WARN("cannot start scrub while locked\n");
3269  return STATUS_DEVICE_NOT_READY;
3270  }
3271 
3272  if (Vcb->balance.thread) {
3273  WARN("cannot start scrub while balance running\n");
3274  return STATUS_DEVICE_NOT_READY;
3275  }
3276 
3277  if (Vcb->scrub.thread) {
3278  WARN("scrub already running\n");
3279  return STATUS_DEVICE_NOT_READY;
3280  }
3281 
3282  if (Vcb->readonly)
3284 
3285  Vcb->scrub.stopping = false;
3286  Vcb->scrub.paused = false;
3287  Vcb->scrub.error = STATUS_SUCCESS;
3288  KeInitializeEvent(&Vcb->scrub.event, NotificationEvent, !Vcb->scrub.paused);
3289 
3291 
3292  Status = PsCreateSystemThread(&Vcb->scrub.thread, 0, &oa, NULL, NULL, scrub_thread, Vcb);
3293  if (!NT_SUCCESS(Status)) {
3294  ERR("PsCreateSystemThread returned %08lx\n", Status);
3295  return Status;
3296  }
3297 
3298  return STATUS_SUCCESS;
3299 }
3300 
3303  ULONG len;
3304  NTSTATUS Status;
3305  LIST_ENTRY* le;
3306  btrfs_scrub_error* bse = NULL;
3307 
3308  if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode))
3310 
3311  if (length < offsetof(btrfs_query_scrub, errors))
3312  return STATUS_BUFFER_TOO_SMALL;
3313 
3314  ExAcquireResourceSharedLite(&Vcb->scrub.stats_lock, true);
3315 
3316  if (Vcb->scrub.thread && Vcb->scrub.chunks_left > 0)
3317  bqs->status = Vcb->scrub.paused ? BTRFS_SCRUB_PAUSED : BTRFS_SCRUB_RUNNING;
3318  else
3319  bqs->status = BTRFS_SCRUB_STOPPED;
3320 
3321  bqs->start_time.QuadPart = Vcb->scrub.start_time.QuadPart;
3322  bqs->finish_time.QuadPart = Vcb->scrub.finish_time.QuadPart;
3323  bqs->chunks_left = Vcb->scrub.chunks_left;
3324  bqs->total_chunks = Vcb->scrub.total_chunks;
3325  bqs->data_scrubbed = Vcb->scrub.data_scrubbed;
3326 
3327  bqs->duration = Vcb->scrub.duration.QuadPart;
3328 
3329  if (bqs->status == BTRFS_SCRUB_RUNNING) {
3331 
3333  bqs->duration += time.QuadPart - Vcb->scrub.resume_time.QuadPart;
3334  }
3335 
3336  bqs->error = Vcb->scrub.error;
3337 
3338  bqs->num_errors = Vcb->scrub.num_errors;
3339 
3340  len = length - offsetof(btrfs_query_scrub, errors);
3341 
3342  le = Vcb->scrub.errors.Flink;
3343  while (le != &Vcb->scrub.errors) {
3345  ULONG errlen;
3346 
3347  if (err->is_metadata)
3348  errlen = offsetof(btrfs_scrub_error, metadata.firstitem) + sizeof(KEY);
3349  else
3350  errlen = offsetof(btrfs_scrub_error, data.filename) + err->data.filename_length;
3351 
3352  if (len < errlen) {
3354  goto end;
3355  }
3356 
3357  if (!bse)
3358  bse = &bqs->errors;
3359  else {
3360  ULONG lastlen;
3361 
3362  if (bse->is_metadata)
3363  lastlen = offsetof(btrfs_scrub_error, metadata.firstitem) + sizeof(KEY);
3364  else
3365  lastlen = offsetof(btrfs_scrub_error, data.filename) + bse->data.filename_length;
3366 
3367  bse->next_entry = lastlen;
3368  bse = (btrfs_scrub_error*)(((uint8_t*)bse) + lastlen);
3369  }
3370 
3371  bse->next_entry = 0;
3372  bse->address = err->address;
3373  bse->device = err->device;
3374  bse->recovered = err->recovered;
3375  bse->is_metadata = err->is_metadata;
3376  bse->parity = err->parity;
3377 
3378  if (err->is_metadata) {
3379  bse->metadata.root = err->metadata.root;
3380  bse->metadata.level = err->metadata.level;
3381  bse->metadata.firstitem = err->metadata.firstitem;
3382  } else {
3383  bse->data.subvol = err->data.subvol;
3384  bse->data.offset = err->data.offset;
3385  bse->data.filename_length = err->data.filename_length;
3386  RtlCopyMemory(bse->data.filename, err->data.filename, err->data.filename_length);
3387  }
3388 
3389  len -= errlen;
3390  le = le->Flink;
3391  }
3392 
3394 
3395 end:
3396  ExReleaseResourceLite(&Vcb->scrub.stats_lock);
3397 
3398  return Status;
3399 }
3400 
3403 
3404  if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode))
3406 
3407  if (!Vcb->scrub.thread)
3408  return STATUS_DEVICE_NOT_READY;
3409 
3410  if (Vcb->scrub.paused)
3411  return STATUS_DEVICE_NOT_READY;
3412 
3413  Vcb->scrub.paused = true;
3414  KeClearEvent(&Vcb->scrub.event);
3415 
3417  Vcb->scrub.duration.QuadPart += time.QuadPart - Vcb->scrub.resume_time.QuadPart;
3418 
3419  return STATUS_SUCCESS;
3420 }
3421 
3423  if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode))
3425 
3426  if (!Vcb->scrub.thread)
3427  return STATUS_DEVICE_NOT_READY;
3428 
3429  if (!Vcb->scrub.paused)
3430  return STATUS_DEVICE_NOT_READY;
3431 
3432  Vcb->scrub.paused = false;
3433  KeSetEvent(&Vcb->scrub.event, 0, false);
3434 
3435  KeQuerySystemTime(&Vcb->scrub.resume_time);
3436 
3437  return STATUS_SUCCESS;
3438 }
3439 
3441  if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode))
3443 
3444  if (!Vcb->scrub.thread)
3445  return STATUS_DEVICE_NOT_READY;
3446 
3447  Vcb->scrub.paused = false;
3448  Vcb->scrub.stopping = true;
3449  KeSetEvent(&Vcb->scrub.event, 0, false);
3450 
3451  return