ReactOS  0.4.15-dev-509-g96a357b
read.c
Go to the documentation of this file.
1 /* Copyright (c) Mark Harmstone 2016-17
2  *
3  * This file is part of WinBtrfs.
4  *
5  * WinBtrfs is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public Licence as published by
7  * the Free Software Foundation, either version 3 of the Licence, or
8  * (at your option) any later version.
9  *
10  * WinBtrfs is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser General Public Licence for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public Licence
16  * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
17 
18 #include "btrfs_drv.h"
19 #include "xxhash.h"
20 #include "crc32c.h"
21 
28 };
29 
30 struct read_data_context;
31 
32 typedef struct {
35  bool rewrite;
43 
44 typedef struct {
47  chunk* c;
54  void* csum;
55  bool tree;
59 
60 extern bool diskacc;
64 
65 #define LZO_PAGE_SIZE 4096
66 
67 _Function_class_(IO_COMPLETION_ROUTINE)
68 static NTSTATUS __stdcall read_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
69  read_data_stripe* stripe = conptr;
71 
73 
74  stripe->iosb = Irp->IoStatus;
75 
76  if (NT_SUCCESS(Irp->IoStatus.Status))
78  else
79  stripe->status = ReadDataStatus_Error;
80 
81  if (InterlockedDecrement(&context->stripes_left) == 0)
82  KeSetEvent(&context->Event, 0, false);
83 
85 }
86 
88  void* csum2;
89 
90  csum2 = ExAllocatePoolWithTag(PagedPool, Vcb->csum_size * sectors, ALLOC_TAG);
91  if (!csum2) {
92  ERR("out of memory\n");
94  }
95 
96  do_calc_job(Vcb, data, sectors, csum2);
97 
98  if (RtlCompareMemory(csum2, csum, sectors * Vcb->csum_size) != sectors * Vcb->csum_size) {
99  ExFreePool(csum2);
100  return STATUS_CRC_ERROR;
101  }
102 
103  ExFreePool(csum2);
104 
105  return STATUS_SUCCESS;
106 }
107 
109  switch (Vcb->superblock.csum_type) {
110  case CSUM_TYPE_CRC32C:
111  *(uint32_t*)csum = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
112  break;
113 
114  case CSUM_TYPE_XXHASH:
115  *(uint64_t*)csum = XXH64((uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum), 0);
116  break;
117 
118  case CSUM_TYPE_SHA256:
119  calc_sha256(csum, &th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
120  break;
121 
122  case CSUM_TYPE_BLAKE2:
123  blake2b(csum, BLAKE2_HASH_SIZE, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
124  break;
125  }
126 }
127 
129  switch (Vcb->superblock.csum_type) {
130  case CSUM_TYPE_CRC32C: {
131  uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
132 
133  if (crc32 == *((uint32_t*)th->csum))
134  return true;
135 
136  WARN("hash was %08x, expected %08x\n", crc32, *((uint32_t*)th->csum));
137 
138  break;
139  }
140 
141  case CSUM_TYPE_XXHASH: {
142  uint64_t hash = XXH64((uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum), 0);
143 
144  if (hash == *((uint64_t*)th->csum))
145  return true;
146 
147  WARN("hash was %I64x, expected %I64x\n", hash, *((uint64_t*)th->csum));
148 
149  break;
150  }
151 
152  case CSUM_TYPE_SHA256: {
154 
155  calc_sha256(hash, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
156 
158  return true;
159 
160  WARN("hash was invalid\n");
161 
162  break;
163  }
164 
165  case CSUM_TYPE_BLAKE2: {
167 
168  blake2b(hash, sizeof(hash), (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
169 
171  return true;
172 
173  WARN("hash was invalid\n");
174 
175  break;
176  }
177  }
178 
179  return false;
180 }
181 
183  switch (Vcb->superblock.csum_type) {
184  case CSUM_TYPE_CRC32C:
185  *(uint32_t*)csum = ~calc_crc32c(0xffffffff, buf, Vcb->superblock.sector_size);
186  break;
187 
188  case CSUM_TYPE_XXHASH:
189  *(uint64_t*)csum = XXH64(buf, Vcb->superblock.sector_size, 0);
190  break;
191 
192  case CSUM_TYPE_SHA256:
193  calc_sha256(csum, buf, Vcb->superblock.sector_size);
194  break;
195 
196  case CSUM_TYPE_BLAKE2:
197  blake2b(csum, BLAKE2_HASH_SIZE, buf, Vcb->superblock.sector_size);
198  break;
199  }
200 }
201 
203  switch (Vcb->superblock.csum_type) {
204  case CSUM_TYPE_CRC32C: {
205  uint32_t crc32 = ~calc_crc32c(0xffffffff, buf, Vcb->superblock.sector_size);
206 
207  return *(uint32_t*)csum == crc32;
208  }
209 
210  case CSUM_TYPE_XXHASH: {
211  uint64_t hash = XXH64(buf, Vcb->superblock.sector_size, 0);
212 
213  return *(uint64_t*)csum == hash;
214  }
215 
216  case CSUM_TYPE_SHA256: {
218 
219  calc_sha256(hash, buf, Vcb->superblock.sector_size);
220 
222  }
223 
224  case CSUM_TYPE_BLAKE2: {
226 
227  blake2b(hash, sizeof(hash), buf, Vcb->superblock.sector_size);
228 
230  }
231  }
232 
233  return false;
234 }
235 
238  ULONG i;
239  bool checksum_error = false;
240  uint16_t j, stripe = 0;
242  CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
243 
244  for (j = 0; j < ci->num_stripes; j++) {
245  if (context->stripes[j].status == ReadDataStatus_Error) {
246  WARN("stripe %u returned error %08lx\n", j, context->stripes[j].iosb.Status);
248  return context->stripes[j].iosb.Status;
249  } else if (context->stripes[j].status == ReadDataStatus_Success) {
250  stripe = j;
251  break;
252  }
253  }
254 
255  if (context->stripes[stripe].status != ReadDataStatus_Success)
256  return STATUS_INTERNAL_ERROR;
257 
258  if (context->tree) {
259  tree_header* th = (tree_header*)buf;
260 
261  if (th->address != context->address || !check_tree_checksum(Vcb, th)) {
262  checksum_error = true;
264  } else if (generation != 0 && th->generation != generation) {
265  checksum_error = true;
267  }
268  } else if (context->csum) {
269  Status = check_csum(Vcb, buf, (ULONG)context->stripes[stripe].Irp->IoStatus.Information / context->sector_size, context->csum);
270 
271  if (Status == STATUS_CRC_ERROR) {
272  checksum_error = true;
274  } else if (!NT_SUCCESS(Status)) {
275  ERR("check_csum returned %08lx\n", Status);
276  return Status;
277  }
278  }
279 
280  if (!checksum_error)
281  return STATUS_SUCCESS;
282 
283  if (ci->num_stripes == 1)
284  return STATUS_CRC_ERROR;
285 
286  if (context->tree) {
287  tree_header* t2;
288  bool recovered = false;
289 
290  t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
291  if (!t2) {
292  ERR("out of memory\n");
294  }
295 
296  for (j = 0; j < ci->num_stripes; j++) {
297  if (j != stripe && devices[j] && devices[j]->devobj) {
298  Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + context->stripes[stripe].stripestart,
299  Vcb->superblock.node_size, (uint8_t*)t2, false);
300  if (!NT_SUCCESS(Status)) {
301  WARN("sync_read_phys returned %08lx\n", Status);
303  } else {
304  bool checksum_error = !check_tree_checksum(Vcb, t2);
305 
306  if (t2->address == addr && !checksum_error && (generation == 0 || t2->generation == generation)) {
307  RtlCopyMemory(buf, t2, Vcb->superblock.node_size);
308  ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id);
309  recovered = true;
310 
311  if (!Vcb->readonly && !devices[stripe]->readonly) { // write good data over bad
312  Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj, cis[stripe].offset + context->stripes[stripe].stripestart,
313  t2, Vcb->superblock.node_size);
314  if (!NT_SUCCESS(Status)) {
315  WARN("write_data_phys returned %08lx\n", Status);
317  }
318  }
319 
320  break;
321  } else if (t2->address != addr || checksum_error)
323  else
325  }
326  }
327  }
328 
329  if (!recovered) {
330  ERR("unrecoverable checksum error at %I64x\n", addr);
331  ExFreePool(t2);
332  return STATUS_CRC_ERROR;
333  }
334 
335  ExFreePool(t2);
336  } else {
337  ULONG sectors = (ULONG)context->stripes[stripe].Irp->IoStatus.Information / Vcb->superblock.sector_size;
338  uint8_t* sector;
339  void* ptr = context->csum;
340 
341  sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size, ALLOC_TAG);
342  if (!sector) {
343  ERR("out of memory\n");
345  }
346 
347  for (i = 0; i < sectors; i++) {
348  if (!check_sector_csum(Vcb, buf + (i * Vcb->superblock.sector_size), ptr)) {
349  bool recovered = false;
350 
351  for (j = 0; j < ci->num_stripes; j++) {
352  if (j != stripe && devices[j] && devices[j]->devobj) {
353  Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj,
354  cis[j].offset + context->stripes[stripe].stripestart + UInt32x32To64(i, Vcb->superblock.sector_size),
355  Vcb->superblock.sector_size, sector, false);
356  if (!NT_SUCCESS(Status)) {
357  WARN("sync_read_phys returned %08lx\n", Status);
359  } else {
360  if (check_sector_csum(Vcb, sector, ptr)) {
361  RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size);
362  ERR("recovering from checksum error at %I64x, device %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe]->devitem.dev_id);
363  recovered = true;
364 
365  if (!Vcb->readonly && !devices[stripe]->readonly) { // write good data over bad
366  Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj,
367  cis[stripe].offset + context->stripes[stripe].stripestart + UInt32x32To64(i, Vcb->superblock.sector_size),
368  sector, Vcb->superblock.sector_size);
369  if (!NT_SUCCESS(Status)) {
370  WARN("write_data_phys returned %08lx\n", Status);
372  }
373  }
374 
375  break;
376  } else
378  }
379  }
380  }
381 
382  if (!recovered) {
383  ERR("unrecoverable checksum error at %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size));
385  return STATUS_CRC_ERROR;
386  }
387  }
388 
389  ptr = (uint8_t*)ptr + Vcb->csum_size;
390  }
391 
393  }
394 
395  return STATUS_SUCCESS;
396 }
397 
400  uint64_t i;
401 
402  for (i = 0; i < ci->num_stripes; i++) {
403  if (context->stripes[i].status == ReadDataStatus_Error) {
404  WARN("stripe %I64u returned error %08lx\n", i, context->stripes[i].iosb.Status);
406  return context->stripes[i].iosb.Status;
407  }
408  }
409 
410  if (context->tree) { // shouldn't happen, as trees shouldn't cross stripe boundaries
411  tree_header* th = (tree_header*)buf;
412  bool checksum_error = !check_tree_checksum(Vcb, th);
413 
414  if (checksum_error || addr != th->address || (generation != 0 && generation != th->generation)) {
415  uint64_t off;
417 
419 
420  ERR("unrecoverable checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id);
421 
422  if (checksum_error) {
424  return STATUS_CRC_ERROR;
425  } else if (addr != th->address) {
426  WARN("address of tree was %I64x, not %I64x as expected\n", th->address, addr);
428  return STATUS_CRC_ERROR;
429  } else if (generation != 0 && generation != th->generation) {
430  WARN("generation of tree was %I64x, not %I64x as expected\n", th->generation, generation);
432  return STATUS_CRC_ERROR;
433  }
434  }
435  } else if (context->csum) {
437 
438  Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum);
439 
440  if (Status == STATUS_CRC_ERROR) {
441  void* ptr = context->csum;
442 
443  for (i = 0; i < length / Vcb->superblock.sector_size; i++) {
444  if (!check_sector_csum(Vcb, buf + (i * Vcb->superblock.sector_size), ptr)) {
445  uint64_t off;
447 
448  get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length, ci->num_stripes, &off, &stripe);
449 
450  ERR("unrecoverable checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id);
451 
453 
454  return Status;
455  }
456 
457  ptr = (uint8_t*)ptr + Vcb->csum_size;
458  }
459 
460  return Status;
461  } else if (!NT_SUCCESS(Status)) {
462  ERR("check_csum returned %08lx\n", Status);
463  return Status;
464  }
465  }
466 
467  return STATUS_SUCCESS;
468 }
469 
472  uint64_t i;
473  uint16_t j, stripe;
475  bool checksum_error = false;
476  CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
477 
478  for (j = 0; j < ci->num_stripes; j++) {
479  if (context->stripes[j].status == ReadDataStatus_Error) {
480  WARN("stripe %u returned error %08lx\n", j, context->stripes[j].iosb.Status);
482  return context->stripes[j].iosb.Status;
483  } else if (context->stripes[j].status == ReadDataStatus_Success)
484  stripe = j;
485  }
486 
487  if (context->tree) {
488  tree_header* th = (tree_header*)buf;
489 
490  if (!check_tree_checksum(Vcb, th)) {
491  checksum_error = true;
493  } else if (addr != th->address) {
494  WARN("address of tree was %I64x, not %I64x as expected\n", th->address, addr);
495  checksum_error = true;
497  } else if (generation != 0 && generation != th->generation) {
498  WARN("generation of tree was %I64x, not %I64x as expected\n", th->generation, generation);
499  checksum_error = true;
501  }
502  } else if (context->csum) {
503  Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum);
504 
505  if (Status == STATUS_CRC_ERROR)
506  checksum_error = true;
507  else if (!NT_SUCCESS(Status)) {
508  ERR("check_csum returned %08lx\n", Status);
509  return Status;
510  }
511  }
512 
513  if (!checksum_error)
514  return STATUS_SUCCESS;
515 
516  if (context->tree) {
517  tree_header* t2;
518  uint64_t off;
519  uint16_t badsubstripe = 0;
520  bool recovered = false;
521 
522  t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
523  if (!t2) {
524  ERR("out of memory\n");
526  }
527 
529 
530  stripe *= ci->sub_stripes;
531 
532  for (j = 0; j < ci->sub_stripes; j++) {
533  if (context->stripes[stripe + j].status == ReadDataStatus_Success) {
534  badsubstripe = j;
535  break;
536  }
537  }
538 
539  for (j = 0; j < ci->sub_stripes; j++) {
540  if (context->stripes[stripe + j].status != ReadDataStatus_Success && devices[stripe + j] && devices[stripe + j]->devobj) {
541  Status = sync_read_phys(devices[stripe + j]->devobj, devices[stripe + j]->fileobj, cis[stripe + j].offset + off,
542  Vcb->superblock.node_size, (uint8_t*)t2, false);
543  if (!NT_SUCCESS(Status)) {
544  WARN("sync_read_phys returned %08lx\n", Status);
546  } else {
547  bool checksum_error = !check_tree_checksum(Vcb, t2);
548 
549  if (t2->address == addr && !checksum_error && (generation == 0 || t2->generation == generation)) {
550  RtlCopyMemory(buf, t2, Vcb->superblock.node_size);
551  ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[stripe + j]->devitem.dev_id);
552  recovered = true;
553 
554  if (!Vcb->readonly && !devices[stripe + badsubstripe]->readonly && devices[stripe + badsubstripe]->devobj) { // write good data over bad
555  Status = write_data_phys(devices[stripe + badsubstripe]->devobj, devices[stripe + badsubstripe]->fileobj,
556  cis[stripe + badsubstripe].offset + off, t2, Vcb->superblock.node_size);
557  if (!NT_SUCCESS(Status)) {
558  WARN("write_data_phys returned %08lx\n", Status);
560  }
561  }
562 
563  break;
564  } else if (t2->address != addr || checksum_error)
566  else
568  }
569  }
570  }
571 
572  if (!recovered) {
573  ERR("unrecoverable checksum error at %I64x\n", addr);
574  ExFreePool(t2);
575  return STATUS_CRC_ERROR;
576  }
577 
578  ExFreePool(t2);
579  } else {
580  ULONG sectors = length / Vcb->superblock.sector_size;
581  uint8_t* sector;
582  void* ptr = context->csum;
583 
584  sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size, ALLOC_TAG);
585  if (!sector) {
586  ERR("out of memory\n");
588  }
589 
590  for (i = 0; i < sectors; i++) {
591  if (!check_sector_csum(Vcb, buf + (i * Vcb->superblock.sector_size), ptr)) {
592  uint64_t off;
593  uint16_t stripe2, badsubstripe = 0;
594  bool recovered = false;
595 
596  get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length,
597  ci->num_stripes / ci->sub_stripes, &off, &stripe2);
598 
599  stripe2 *= ci->sub_stripes;
600 
601  for (j = 0; j < ci->sub_stripes; j++) {
602  if (context->stripes[stripe2 + j].status == ReadDataStatus_Success) {
603  badsubstripe = j;
604  break;
605  }
606  }
607 
609 
610  for (j = 0; j < ci->sub_stripes; j++) {
611  if (context->stripes[stripe2 + j].status != ReadDataStatus_Success && devices[stripe2 + j] && devices[stripe2 + j]->devobj) {
612  Status = sync_read_phys(devices[stripe2 + j]->devobj, devices[stripe2 + j]->fileobj, cis[stripe2 + j].offset + off,
613  Vcb->superblock.sector_size, sector, false);
614  if (!NT_SUCCESS(Status)) {
615  WARN("sync_read_phys returned %08lx\n", Status);
617  } else {
618  if (check_sector_csum(Vcb, sector, ptr)) {
619  RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size);
620  ERR("recovering from checksum error at %I64x, device %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe2 + j]->devitem.dev_id);
621  recovered = true;
622 
623  if (!Vcb->readonly && !devices[stripe2 + badsubstripe]->readonly && devices[stripe2 + badsubstripe]->devobj) { // write good data over bad
624  Status = write_data_phys(devices[stripe2 + badsubstripe]->devobj, devices[stripe2 + badsubstripe]->fileobj,
625  cis[stripe2 + badsubstripe].offset + off, sector, Vcb->superblock.sector_size);
626  if (!NT_SUCCESS(Status)) {
627  WARN("write_data_phys returned %08lx\n", Status);
628  log_device_error(Vcb, devices[stripe2 + badsubstripe], BTRFS_DEV_STAT_READ_ERRORS);
629  }
630  }
631 
632  break;
633  } else
635  }
636  }
637  }
638 
639  if (!recovered) {
640  ERR("unrecoverable checksum error at %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size));
642  return STATUS_CRC_ERROR;
643  }
644  }
645 
646  ptr = (uint8_t*)ptr + Vcb->csum_size;
647  }
648 
650  }
651 
652  return STATUS_SUCCESS;
653 }
654 
656  device** devices, uint64_t offset, uint64_t generation, chunk* c, bool degraded) {
657  ULONG i;
659  bool checksum_error = false;
660  CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
661  uint16_t j, stripe;
662  bool no_success = true;
663 
664  for (j = 0; j < ci->num_stripes; j++) {
665  if (context->stripes[j].status == ReadDataStatus_Error) {
666  WARN("stripe %u returned error %08lx\n", j, context->stripes[j].iosb.Status);
668  return context->stripes[j].iosb.Status;
669  } else if (context->stripes[j].status == ReadDataStatus_Success) {
670  stripe = j;
671  no_success = false;
672  }
673  }
674 
675  if (c) { // check partial stripes
676  LIST_ENTRY* le;
677  uint64_t ps_length = (ci->num_stripes - 1) * ci->stripe_length;
678 
679  ExAcquireResourceSharedLite(&c->partial_stripes_lock, true);
680 
681  le = c->partial_stripes.Flink;
682  while (le != &c->partial_stripes) {
684 
685  if (ps->address + ps_length > addr && ps->address < addr + length) {
686  ULONG runlength, index;
687 
688  runlength = RtlFindFirstRunClear(&ps->bmp, &index);
689 
690  while (runlength != 0) {
691 #ifdef __REACTOS__
692  uint64_t runstart, runend, start, end;
693 #endif
694  if (index >= ps->bmplen)
695  break;
696 
697  if (index + runlength >= ps->bmplen) {
698  runlength = ps->bmplen - index;
699 
700  if (runlength == 0)
701  break;
702  }
703 
704 #ifndef __REACTOS__
705  uint64_t runstart = ps->address + (index * Vcb->superblock.sector_size);
706  uint64_t runend = runstart + (runlength * Vcb->superblock.sector_size);
707  uint64_t start = max(runstart, addr);
708  uint64_t end = min(runend, addr + length);
709 #else
710  runstart = ps->address + (index * Vcb->superblock.sector_size);
711  runend = runstart + (runlength * Vcb->superblock.sector_size);
712  start = max(runstart, addr);
713  end = min(runend, addr + length);
714 #endif
715 
716  if (end > start)
717  RtlCopyMemory(buf + start - addr, &ps->data[start - ps->address], (ULONG)(end - start));
718 
719  runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index);
720  }
721  } else if (ps->address >= addr + length)
722  break;
723 
724  le = le->Flink;
725  }
726 
727  ExReleaseResourceLite(&c->partial_stripes_lock);
728  }
729 
730  if (context->tree) {
731  tree_header* th = (tree_header*)buf;
732 
733  if (addr != th->address || !check_tree_checksum(Vcb, th)) {
734  checksum_error = true;
735  if (!no_success && !degraded)
737  } else if (generation != 0 && generation != th->generation) {
738  checksum_error = true;
739  if (!no_success && !degraded)
741  }
742  } else if (context->csum) {
743  Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum);
744 
745  if (Status == STATUS_CRC_ERROR) {
746  if (!degraded)
747  WARN("checksum error\n");
748  checksum_error = true;
749  } else if (!NT_SUCCESS(Status)) {
750  ERR("check_csum returned %08lx\n", Status);
751  return Status;
752  }
753  } else if (degraded)
754  checksum_error = true;
755 
756  if (!checksum_error)
757  return STATUS_SUCCESS;
758 
759  if (context->tree) {
760  uint16_t parity;
761  uint64_t off;
762  bool recovered = false, first = true, failed = false;
763  uint8_t* t2;
764 
765  t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size * 2, ALLOC_TAG);
766  if (!t2) {
767  ERR("out of memory\n");
769  }
770 
772 
773  parity = (((addr - offset) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
774 
775  stripe = (parity + stripe + 1) % ci->num_stripes;
776 
777  for (j = 0; j < ci->num_stripes; j++) {
778  if (j != stripe) {
779  if (devices[j] && devices[j]->devobj) {
780  if (first) {
781  Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.node_size, t2, false);
782  if (!NT_SUCCESS(Status)) {
783  ERR("sync_read_phys returned %08lx\n", Status);
785  failed = true;
786  break;
787  }
788 
789  first = false;
790  } else {
791  Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.node_size, t2 + Vcb->superblock.node_size, false);
792  if (!NT_SUCCESS(Status)) {
793  ERR("sync_read_phys returned %08lx\n", Status);
795  failed = true;
796  break;
797  }
798 
799  do_xor(t2, t2 + Vcb->superblock.node_size, Vcb->superblock.node_size);
800  }
801  } else {
802  failed = true;
803  break;
804  }
805  }
806  }
807 
808  if (!failed) {
809  tree_header* t3 = (tree_header*)t2;
810 
811  if (t3->address == addr && check_tree_checksum(Vcb, t3) && (generation == 0 || t3->generation == generation)) {
812  RtlCopyMemory(buf, t2, Vcb->superblock.node_size);
813 
814  if (!degraded)
815  ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id);
816 
817  recovered = true;
818 
819  if (!Vcb->readonly && devices[stripe] && !devices[stripe]->readonly && devices[stripe]->devobj) { // write good data over bad
820  Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj, cis[stripe].offset + off, t2, Vcb->superblock.node_size);
821  if (!NT_SUCCESS(Status)) {
822  WARN("write_data_phys returned %08lx\n", Status);
824  }
825  }
826  }
827  }
828 
829  if (!recovered) {
830  ERR("unrecoverable checksum error at %I64x\n", addr);
831  ExFreePool(t2);
832  return STATUS_CRC_ERROR;
833  }
834 
835  ExFreePool(t2);
836  } else {
837  ULONG sectors = length / Vcb->superblock.sector_size;
838  uint8_t* sector;
839  void* ptr = context->csum;
840 
841  sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size * 2, ALLOC_TAG);
842  if (!sector) {
843  ERR("out of memory\n");
845  }
846 
847  for (i = 0; i < sectors; i++) {
848  uint16_t parity;
849  uint64_t off;
850 
851  get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length,
852  ci->num_stripes - 1, &off, &stripe);
853 
854  parity = (((addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size)) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
855 
856  stripe = (parity + stripe + 1) % ci->num_stripes;
857 
858  if (!devices[stripe] || !devices[stripe]->devobj || (ptr && !check_sector_csum(Vcb, buf + (i * Vcb->superblock.sector_size), ptr))) {
859  bool recovered = false, first = true, failed = false;
860 
861  if (devices[stripe] && devices[stripe]->devobj)
863 
864  for (j = 0; j < ci->num_stripes; j++) {
865  if (j != stripe) {
866  if (devices[j] && devices[j]->devobj) {
867  if (first) {
868  Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.sector_size, sector, false);
869  if (!NT_SUCCESS(Status)) {
870  ERR("sync_read_phys returned %08lx\n", Status);
871  failed = true;
873  break;
874  }
875 
876  first = false;
877  } else {
878  Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.sector_size,
879  sector + Vcb->superblock.sector_size, false);
880  if (!NT_SUCCESS(Status)) {
881  ERR("sync_read_phys returned %08lx\n", Status);
882  failed = true;
884  break;
885  }
886 
887  do_xor(sector, sector + Vcb->superblock.sector_size, Vcb->superblock.sector_size);
888  }
889  } else {
890  failed = true;
891  break;
892  }
893  }
894  }
895 
896  if (!failed) {
897  if (!ptr || check_sector_csum(Vcb, sector, ptr)) {
898  RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size);
899 
900  if (!degraded)
901  ERR("recovering from checksum error at %I64x, device %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe]->devitem.dev_id);
902 
903  recovered = true;
904 
905  if (!Vcb->readonly && devices[stripe] && !devices[stripe]->readonly && devices[stripe]->devobj) { // write good data over bad
906  Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj, cis[stripe].offset + off,
907  sector, Vcb->superblock.sector_size);
908  if (!NT_SUCCESS(Status)) {
909  WARN("write_data_phys returned %08lx\n", Status);
911  }
912  }
913  }
914  }
915 
916  if (!recovered) {
917  ERR("unrecoverable checksum error at %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size));
919  return STATUS_CRC_ERROR;
920  }
921  }
922 
923  if (ptr)
924  ptr = (uint8_t*)ptr + Vcb->csum_size;
925  }
926 
928  }
929 
930  return STATUS_SUCCESS;
931 }
932 
933 void raid6_recover2(uint8_t* sectors, uint16_t num_stripes, ULONG sector_size, uint16_t missing1, uint16_t missing2, uint8_t* out) {
934  if (missing1 == num_stripes - 2 || missing2 == num_stripes - 2) { // reconstruct from q and data
935  uint16_t missing = missing1 == (num_stripes - 2) ? missing2 : missing1;
937 
938  stripe = num_stripes - 3;
939 
940  if (stripe == missing)
942  else
944 
945  do {
946  stripe--;
947 
949 
950  if (stripe != missing)
952  } while (stripe > 0);
953 
954  do_xor(out, sectors + ((num_stripes - 1) * sector_size), sector_size);
955 
956  if (missing != 0)
958  } else { // reconstruct from p and q
959  uint16_t x, y, stripe;
960  uint8_t gyx, gx, denom, a, b, *p, *q, *pxy, *qxy;
961  uint32_t j;
962 
963  stripe = num_stripes - 3;
964 
965  pxy = out + sector_size;
966  qxy = out;
967 
968  if (stripe == missing1 || stripe == missing2) {
971 
972  if (stripe == missing1)
973  x = stripe;
974  else
975  y = stripe;
976  } else {
979  }
980 
981  do {
982  stripe--;
983 
985 
986  if (stripe != missing1 && stripe != missing2) {
989  } else if (stripe == missing1)
990  x = stripe;
991  else if (stripe == missing2)
992  y = stripe;
993  } while (stripe > 0);
994 
995  gyx = gpow2(y > x ? (y-x) : (255-x+y));
996  gx = gpow2(255-x);
997 
998  denom = gdiv(1, gyx ^ 1);
999  a = gmul(gyx, denom);
1000  b = gmul(gx, denom);
1001 
1002  p = sectors + ((num_stripes - 2) * sector_size);
1003  q = sectors + ((num_stripes - 1) * sector_size);
1004 
1005  for (j = 0; j < sector_size; j++) {
1006  *qxy = gmul(a, *p ^ *pxy) ^ gmul(b, *q ^ *qxy);
1007 
1008  p++;
1009  q++;
1010  pxy++;
1011  qxy++;
1012  }
1013 
1015  do_xor(out + sector_size, sectors + ((num_stripes - 2) * sector_size), sector_size);
1016  }
1017 }
1018 
1020  device** devices, uint64_t offset, uint64_t generation, chunk* c, bool degraded) {
1021  NTSTATUS Status;
1022  ULONG i;
1023  bool checksum_error = false;
1024  CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
1025  uint16_t stripe, j;
1026  bool no_success = true;
1027 
1028  for (j = 0; j < ci->num_stripes; j++) {
1029  if (context->stripes[j].status == ReadDataStatus_Error) {
1030  WARN("stripe %u returned error %08lx\n", j, context->stripes[j].iosb.Status);
1031 
1032  if (devices[j])
1034  return context->stripes[j].iosb.Status;
1035  } else if (context->stripes[j].status == ReadDataStatus_Success) {
1036  stripe = j;
1037  no_success = false;
1038  }
1039  }
1040 
1041  if (c) { // check partial stripes
1042  LIST_ENTRY* le;
1043  uint64_t ps_length = (ci->num_stripes - 2) * ci->stripe_length;
1044 
1045  ExAcquireResourceSharedLite(&c->partial_stripes_lock, true);
1046 
1047  le = c->partial_stripes.Flink;
1048  while (le != &c->partial_stripes) {
1050 
1051  if (ps->address + ps_length > addr && ps->address < addr + length) {
1052  ULONG runlength, index;
1053 
1054  runlength = RtlFindFirstRunClear(&ps->bmp, &index);
1055 
1056  while (runlength != 0) {
1057 #ifdef __REACTOS__
1058  uint64_t runstart, runend, start, end;
1059 #endif
1060  if (index >= ps->bmplen)
1061  break;
1062 
1063  if (index + runlength >= ps->bmplen) {
1064  runlength = ps->bmplen - index;
1065 
1066  if (runlength == 0)
1067  break;
1068  }
1069 
1070 #ifndef __REACTOS__
1071  uint64_t runstart = ps->address + (index * Vcb->superblock.sector_size);
1072  uint64_t runend = runstart + (runlength * Vcb->superblock.sector_size);
1073  uint64_t start = max(runstart, addr);
1074  uint64_t end = min(runend, addr + length);
1075 #else
1076  runstart = ps->address + (index * Vcb->superblock.sector_size);
1077  runend = runstart + (runlength * Vcb->superblock.sector_size);
1078  start = max(runstart, addr);
1079  end = min(runend, addr + length);
1080 #endif
1081 
1082  if (end > start)
1083  RtlCopyMemory(buf + start - addr, &ps->data[start - ps->address], (ULONG)(end - start));
1084 
1085  runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index);
1086  }
1087  } else if (ps->address >= addr + length)
1088  break;
1089 
1090  le = le->Flink;
1091  }
1092 
1093  ExReleaseResourceLite(&c->partial_stripes_lock);
1094  }
1095 
1096  if (context->tree) {
1097  tree_header* th = (tree_header*)buf;
1098 
1099  if (addr != th->address || !check_tree_checksum(Vcb, th)) {
1100  checksum_error = true;
1101  if (!no_success && !degraded && devices[stripe])
1103  } else if (generation != 0 && generation != th->generation) {
1104  checksum_error = true;
1105  if (!no_success && !degraded && devices[stripe])
1107  }
1108  } else if (context->csum) {
1109  Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum);
1110 
1111  if (Status == STATUS_CRC_ERROR) {
1112  if (!degraded)
1113  WARN("checksum error\n");
1114  checksum_error = true;
1115  } else if (!NT_SUCCESS(Status)) {
1116  ERR("check_csum returned %08lx\n", Status);
1117  return Status;
1118  }
1119  } else if (degraded)
1120  checksum_error = true;
1121 
1122  if (!checksum_error)
1123  return STATUS_SUCCESS;
1124 
1125  if (context->tree) {
1126  uint8_t* sector;
1127  uint16_t k, physstripe, parity1, parity2, error_stripe;
1128  uint64_t off;
1129  bool recovered = false, failed = false;
1130  ULONG num_errors = 0;
1131 
1132  sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size * (ci->num_stripes + 2), ALLOC_TAG);
1133  if (!sector) {
1134  ERR("out of memory\n");
1136  }
1137 
1139 
1140  parity1 = (((addr - offset) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
1141  parity2 = (parity1 + 1) % ci->num_stripes;
1142 
1143  physstripe = (parity2 + stripe + 1) % ci->num_stripes;
1144 
1145  j = (parity2 + 1) % ci->num_stripes;
1146 
1147  for (k = 0; k < ci->num_stripes - 1; k++) {
1148  if (j != physstripe) {
1149  if (devices[j] && devices[j]->devobj) {
1150  Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.node_size,
1151  sector + (k * Vcb->superblock.node_size), false);
1152  if (!NT_SUCCESS(Status)) {
1153  ERR("sync_read_phys returned %08lx\n", Status);
1155  num_errors++;
1156  error_stripe = k;
1157 
1158  if (num_errors > 1) {
1159  failed = true;
1160  break;
1161  }
1162  }
1163  } else {
1164  num_errors++;
1165  error_stripe = k;
1166 
1167  if (num_errors > 1) {
1168  failed = true;
1169  break;
1170  }
1171  }
1172  }
1173 
1174  j = (j + 1) % ci->num_stripes;
1175  }
1176 
1177  if (!failed) {
1178  if (num_errors == 0) {
1179  tree_header* th = (tree_header*)(sector + (stripe * Vcb->superblock.node_size));
1180 
1181  RtlCopyMemory(sector + (stripe * Vcb->superblock.node_size), sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size),
1182  Vcb->superblock.node_size);
1183 
1184  for (j = 0; j < ci->num_stripes - 2; j++) {
1185  if (j != stripe)
1186  do_xor(sector + (stripe * Vcb->superblock.node_size), sector + (j * Vcb->superblock.node_size), Vcb->superblock.node_size);
1187  }
1188 
1189  if (th->address == addr && check_tree_checksum(Vcb, th) && (generation == 0 || th->generation == generation)) {
1190  RtlCopyMemory(buf, sector + (stripe * Vcb->superblock.node_size), Vcb->superblock.node_size);
1191 
1192  if (devices[physstripe] && devices[physstripe]->devobj)
1193  ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[physstripe]->devitem.dev_id);
1194 
1195  recovered = true;
1196 
1197  if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1198  Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off,
1199  sector + (stripe * Vcb->superblock.node_size), Vcb->superblock.node_size);
1200  if (!NT_SUCCESS(Status)) {
1201  WARN("write_data_phys returned %08lx\n", Status);
1203  }
1204  }
1205  }
1206  }
1207 
1208  if (!recovered) {
1209  tree_header* th = (tree_header*)(sector + (ci->num_stripes * Vcb->superblock.node_size));
1210  bool read_q = false;
1211 
1212  if (devices[parity2] && devices[parity2]->devobj) {
1213  Status = sync_read_phys(devices[parity2]->devobj, devices[parity2]->fileobj, cis[parity2].offset + off,
1214  Vcb->superblock.node_size, sector + ((ci->num_stripes - 1) * Vcb->superblock.node_size), false);
1215  if (!NT_SUCCESS(Status)) {
1216  ERR("sync_read_phys returned %08lx\n", Status);
1218  } else
1219  read_q = true;
1220  }
1221 
1222  if (read_q) {
1223  if (num_errors == 1) {
1224  raid6_recover2(sector, ci->num_stripes, Vcb->superblock.node_size, stripe, error_stripe, sector + (ci->num_stripes * Vcb->superblock.node_size));
1225 
1226  if (th->address == addr && check_tree_checksum(Vcb, th) && (generation == 0 || th->generation == generation))
1227  recovered = true;
1228  } else {
1229  for (j = 0; j < ci->num_stripes - 1; j++) {
1230  if (j != stripe) {
1231  raid6_recover2(sector, ci->num_stripes, Vcb->superblock.node_size, stripe, j, sector + (ci->num_stripes * Vcb->superblock.node_size));
1232 
1233  if (th->address == addr && check_tree_checksum(Vcb, th) && (generation == 0 || th->generation == generation)) {
1234  recovered = true;
1235  error_stripe = j;
1236  break;
1237  }
1238  }
1239  }
1240  }
1241  }
1242 
1243  if (recovered) {
1244  uint16_t error_stripe_phys = (parity2 + error_stripe + 1) % ci->num_stripes;
1245 
1246  if (devices[physstripe] && devices[physstripe]->devobj)
1247  ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[physstripe]->devitem.dev_id);
1248 
1249  RtlCopyMemory(buf, sector + (ci->num_stripes * Vcb->superblock.node_size), Vcb->superblock.node_size);
1250 
1251  if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1252  Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off,
1253  sector + (ci->num_stripes * Vcb->superblock.node_size), Vcb->superblock.node_size);
1254  if (!NT_SUCCESS(Status)) {
1255  WARN("write_data_phys returned %08lx\n", Status);
1257  }
1258  }
1259 
1260  if (devices[error_stripe_phys] && devices[error_stripe_phys]->devobj) {
1261  if (error_stripe == ci->num_stripes - 2) {
1262  ERR("recovering from parity error at %I64x, device %I64x\n", addr, devices[error_stripe_phys]->devitem.dev_id);
1263 
1265 
1266  RtlZeroMemory(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), Vcb->superblock.node_size);
1267 
1268  for (j = 0; j < ci->num_stripes - 2; j++) {
1269  if (j == stripe) {
1270  do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), sector + (ci->num_stripes * Vcb->superblock.node_size),
1271  Vcb->superblock.node_size);
1272  } else {
1273  do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), sector + (j * Vcb->superblock.node_size),
1274  Vcb->superblock.node_size);
1275  }
1276  }
1277  } else {
1278  ERR("recovering from checksum error at %I64x, device %I64x\n", addr + ((error_stripe - stripe) * ci->stripe_length),
1279  devices[error_stripe_phys]->devitem.dev_id);
1280 
1282 
1283  RtlCopyMemory(sector + (error_stripe * Vcb->superblock.node_size),
1284  sector + ((ci->num_stripes + 1) * Vcb->superblock.node_size), Vcb->superblock.node_size);
1285  }
1286  }
1287 
1288  if (!Vcb->readonly && devices[error_stripe_phys] && devices[error_stripe_phys]->devobj && !devices[error_stripe_phys]->readonly) { // write good data over bad
1289  Status = write_data_phys(devices[error_stripe_phys]->devobj, devices[error_stripe_phys]->fileobj, cis[error_stripe_phys].offset + off,
1290  sector + (error_stripe * Vcb->superblock.node_size), Vcb->superblock.node_size);
1291  if (!NT_SUCCESS(Status)) {
1292  WARN("write_data_phys returned %08lx\n", Status);
1294  }
1295  }
1296  }
1297  }
1298  }
1299 
1300  if (!recovered) {
1301  ERR("unrecoverable checksum error at %I64x\n", addr);
1302  ExFreePool(sector);
1303  return STATUS_CRC_ERROR;
1304  }
1305 
1306  ExFreePool(sector);
1307  } else {
1308  ULONG sectors = length / Vcb->superblock.sector_size;
1309  uint8_t* sector;
1310  void* ptr = context->csum;
1311 
1312  sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size * (ci->num_stripes + 2), ALLOC_TAG);
1313  if (!sector) {
1314  ERR("out of memory\n");
1316  }
1317 
1318  for (i = 0; i < sectors; i++) {
1319  uint64_t off;
1320  uint16_t physstripe, parity1, parity2;
1321 
1322  get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length,
1323  ci->num_stripes - 2, &off, &stripe);
1324 
1325  parity1 = (((addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size)) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
1326  parity2 = (parity1 + 1) % ci->num_stripes;
1327 
1328  physstripe = (parity2 + stripe + 1) % ci->num_stripes;
1329 
1330  if (!devices[physstripe] || !devices[physstripe]->devobj || (context->csum && !check_sector_csum(Vcb, buf + (i * Vcb->superblock.sector_size), ptr))) {
1331  uint16_t k, error_stripe;
1332  bool recovered = false, failed = false;
1333  ULONG num_errors = 0;
1334 
1335  if (devices[physstripe] && devices[physstripe]->devobj)
1337 
1338  j = (parity2 + 1) % ci->num_stripes;
1339 
1340  for (k = 0; k < ci->num_stripes - 1; k++) {
1341  if (j != physstripe) {
1342  if (devices[j] && devices[j]->devobj) {
1343  Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.sector_size,
1344  sector + (k * Vcb->superblock.sector_size), false);
1345  if (!NT_SUCCESS(Status)) {
1346  ERR("sync_read_phys returned %08lx\n", Status);
1348  num_errors++;
1349  error_stripe = k;
1350 
1351  if (num_errors > 1) {
1352  failed = true;
1353  break;
1354  }
1355  }
1356  } else {
1357  num_errors++;
1358  error_stripe = k;
1359 
1360  if (num_errors > 1) {
1361  failed = true;
1362  break;
1363  }
1364  }
1365  }
1366 
1367  j = (j + 1) % ci->num_stripes;
1368  }
1369 
1370  if (!failed) {
1371  if (num_errors == 0) {
1372  RtlCopyMemory(sector + (stripe * Vcb->superblock.sector_size), sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1373 
1374  for (j = 0; j < ci->num_stripes - 2; j++) {
1375  if (j != stripe)
1376  do_xor(sector + (stripe * Vcb->superblock.sector_size), sector + (j * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1377  }
1378 
1379  if (!ptr || check_sector_csum(Vcb, sector + (stripe * Vcb->superblock.sector_size), ptr)) {
1380  RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1381 
1382  if (devices[physstripe] && devices[physstripe]->devobj)
1383  ERR("recovering from checksum error at %I64x, device %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size),
1384  devices[physstripe]->devitem.dev_id);
1385 
1386  recovered = true;
1387 
1388  if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1389  Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off,
1390  sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1391  if (!NT_SUCCESS(Status)) {
1392  WARN("write_data_phys returned %08lx\n", Status);
1394  }
1395  }
1396  }
1397  }
1398 
1399  if (!recovered) {
1400  bool read_q = false;
1401 
1402  if (devices[parity2] && devices[parity2]->devobj) {
1403  Status = sync_read_phys(devices[parity2]->devobj, devices[parity2]->fileobj, cis[parity2].offset + off,
1404  Vcb->superblock.sector_size, sector + ((ci->num_stripes - 1) * Vcb->superblock.sector_size), false);
1405  if (!NT_SUCCESS(Status)) {
1406  ERR("sync_read_phys returned %08lx\n", Status);
1408  } else
1409  read_q = true;
1410  }
1411 
1412  if (read_q) {
1413  if (num_errors == 1) {
1414  raid6_recover2(sector, ci->num_stripes, Vcb->superblock.sector_size, stripe, error_stripe, sector + (ci->num_stripes * Vcb->superblock.sector_size));
1415 
1416  if (!devices[physstripe] || !devices[physstripe]->devobj)
1417  recovered = true;
1418  else
1419  recovered = check_sector_csum(Vcb, sector + (ci->num_stripes * Vcb->superblock.sector_size), ptr);
1420  } else {
1421  for (j = 0; j < ci->num_stripes - 1; j++) {
1422  if (j != stripe) {
1423  raid6_recover2(sector, ci->num_stripes, Vcb->superblock.sector_size, stripe, j, sector + (ci->num_stripes * Vcb->superblock.sector_size));
1424 
1425  if (check_sector_csum(Vcb, sector + (ci->num_stripes * Vcb->superblock.sector_size), ptr)) {
1426  recovered = true;
1427  error_stripe = j;
1428  break;
1429  }
1430  }
1431  }
1432  }
1433  }
1434 
1435  if (recovered) {
1436  uint16_t error_stripe_phys = (parity2 + error_stripe + 1) % ci->num_stripes;
1437 
1438  if (devices[physstripe] && devices[physstripe]->devobj)
1439  ERR("recovering from checksum error at %I64x, device %I64x\n",
1440  addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[physstripe]->devitem.dev_id);
1441 
1442  RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1443 
1444  if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1445  Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off,
1446  sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1447  if (!NT_SUCCESS(Status)) {
1448  WARN("write_data_phys returned %08lx\n", Status);
1450  }
1451  }
1452 
1453  if (devices[error_stripe_phys] && devices[error_stripe_phys]->devobj) {
1454  if (error_stripe == ci->num_stripes - 2) {
1455  ERR("recovering from parity error at %I64x, device %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size),
1456  devices[error_stripe_phys]->devitem.dev_id);
1457 
1459 
1460  RtlZeroMemory(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1461 
1462  for (j = 0; j < ci->num_stripes - 2; j++) {
1463  if (j == stripe) {
1464  do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), sector + (ci->num_stripes * Vcb->superblock.sector_size),
1465  Vcb->superblock.sector_size);
1466  } else {
1467  do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), sector + (j * Vcb->superblock.sector_size),
1468  Vcb->superblock.sector_size);
1469  }
1470  }
1471  } else {
1472  ERR("recovering from checksum error at %I64x, device %I64x\n",
1473  addr + UInt32x32To64(i, Vcb->superblock.sector_size) + ((error_stripe - stripe) * ci->stripe_length),
1474  devices[error_stripe_phys]->devitem.dev_id);
1475 
1477 
1478  RtlCopyMemory(sector + (error_stripe * Vcb->superblock.sector_size),
1479  sector + ((ci->num_stripes + 1) * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1480  }
1481  }
1482 
1483  if (!Vcb->readonly && devices[error_stripe_phys] && devices[error_stripe_phys]->devobj && !devices[error_stripe_phys]->readonly) { // write good data over bad
1484  Status = write_data_phys(devices[error_stripe_phys]->devobj, devices[error_stripe_phys]->fileobj, cis[error_stripe_phys].offset + off,
1485  sector + (error_stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1486  if (!NT_SUCCESS(Status)) {
1487  WARN("write_data_phys returned %08lx\n", Status);
1489  }
1490  }
1491  }
1492  }
1493  }
1494 
1495  if (!recovered) {
1496  ERR("unrecoverable checksum error at %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size));
1497  ExFreePool(sector);
1498  return STATUS_CRC_ERROR;
1499  }
1500  }
1501 
1502  if (ptr)
1503  ptr = (uint8_t*)ptr + Vcb->csum_size;
1504  }
1505 
1506  ExFreePool(sector);
1507  }
1508 
1509  return STATUS_SUCCESS;
1510 }
1511 
1514  _In_ ULONG priority) {
1515  CHUNK_ITEM* ci;
1516  CHUNK_ITEM_STRIPE* cis;
1518  uint64_t type, offset, total_reading = 0;
1519  NTSTATUS Status;
1520  device** devices = NULL;
1521  uint16_t i, startoffstripe, allowed_missing, missing_devices = 0;
1522  uint8_t* dummypage = NULL;
1523  PMDL dummy_mdl = NULL;
1524  bool need_to_wait;
1525  uint64_t lockaddr, locklen;
1526 
1527  if (Vcb->log_to_phys_loaded) {
1528  if (!c) {
1530 
1531  if (!c) {
1532  ERR("get_chunk_from_address failed\n");
1533  return STATUS_INTERNAL_ERROR;
1534  }
1535  }
1536 
1537  ci = c->chunk_item;
1538  offset = c->offset;
1539  devices = c->devices;
1540 
1541  if (pc)
1542  *pc = c;
1543  } else {
1544  LIST_ENTRY* le = Vcb->sys_chunks.Flink;
1545 
1546  ci = NULL;
1547 
1548  c = NULL;
1549  while (le != &Vcb->sys_chunks) {
1551 
1552  if (sc->key.obj_id == 0x100 && sc->key.obj_type == TYPE_CHUNK_ITEM && sc->key.offset <= addr) {
1553  CHUNK_ITEM* chunk_item = sc->data;
1554 
1555  if ((addr - sc->key.offset) < chunk_item->size && chunk_item->num_stripes > 0) {
1556  ci = chunk_item;
1557  offset = sc->key.offset;
1558  cis = (CHUNK_ITEM_STRIPE*)&chunk_item[1];
1559 
1561  if (!devices) {
1562  ERR("out of memory\n");
1564  }
1565 
1566  for (i = 0; i < ci->num_stripes; i++) {
1567  devices[i] = find_device_from_uuid(Vcb, &cis[i].dev_uuid);
1568  }
1569 
1570  break;
1571  }
1572  }
1573 
1574  le = le->Flink;
1575  }
1576 
1577  if (!ci) {
1578  ERR("could not find chunk for %I64x in bootstrap\n", addr);
1579  return STATUS_INTERNAL_ERROR;
1580  }
1581 
1582  if (pc)
1583  *pc = NULL;
1584  }
1585 
1586  if (ci->type & BLOCK_FLAG_DUPLICATE) {
1588  allowed_missing = ci->num_stripes - 1;
1589  } else if (ci->type & BLOCK_FLAG_RAID0) {
1591  allowed_missing = 0;
1592  } else if (ci->type & BLOCK_FLAG_RAID1) {
1594  allowed_missing = 1;
1595  } else if (ci->type & BLOCK_FLAG_RAID10) {
1597  allowed_missing = 1;
1598  } else if (ci->type & BLOCK_FLAG_RAID5) {
1600  allowed_missing = 1;
1601  } else if (ci->type & BLOCK_FLAG_RAID6) {
1603  allowed_missing = 2;
1604  } else if (ci->type & BLOCK_FLAG_RAID1C3) {
1606  allowed_missing = 2;
1607  } else if (ci->type & BLOCK_FLAG_RAID1C4) {
1609  allowed_missing = 3;
1610  } else { // SINGLE
1612  allowed_missing = 0;
1613  }
1614 
1615  cis = (CHUNK_ITEM_STRIPE*)&ci[1];
1616 
1619 
1621  if (!context.stripes) {
1622  ERR("out of memory\n");
1624  }
1625 
1626  if (c && (type == BLOCK_FLAG_RAID5 || type == BLOCK_FLAG_RAID6)) {
1627  get_raid56_lock_range(c, addr, length, &lockaddr, &locklen);
1628  chunk_lock_range(Vcb, c, lockaddr, locklen);
1629  }
1630 
1631  RtlZeroMemory(context.stripes, sizeof(read_data_stripe) * ci->num_stripes);
1632 
1633  context.buflen = length;
1634  context.num_stripes = ci->num_stripes;
1635  context.stripes_left = context.num_stripes;
1636  context.sector_size = Vcb->superblock.sector_size;
1637  context.csum = csum;
1638  context.tree = is_tree;
1639  context.type = type;
1640 
1641  if (type == BLOCK_FLAG_RAID0) {
1642  uint64_t startoff, endoff;
1643  uint16_t endoffstripe, stripe;
1644  uint32_t *stripeoff, pos;
1645  PMDL master_mdl;
1646  PFN_NUMBER* pfns;
1647 
1648  // FIXME - test this still works if page size isn't the same as sector size
1649 
1650  // This relies on the fact that MDLs are followed in memory by the page file numbers,
1651  // so with a bit of jiggery-pokery you can trick your disks into deinterlacing your RAID0
1652  // data for you without doing a memcpy yourself.
1653  // MDLs are officially opaque, so this might very well break in future versions of Windows.
1654 
1655  get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes, &startoff, &startoffstripe);
1656  get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes, &endoff, &endoffstripe);
1657 
1658  if (file_read) {
1659  // Unfortunately we can't avoid doing at least one memcpy, as Windows can give us an MDL
1660  // with duplicated dummy PFNs, which confuse check_csum. Ah well.
1661  // See https://msdn.microsoft.com/en-us/library/windows/hardware/Dn614012.aspx if you're interested.
1662 
1664 
1665  if (!context.va) {
1666  ERR("out of memory\n");
1668  goto exit;
1669  }
1670  } else
1671  context.va = buf;
1672 
1673  master_mdl = IoAllocateMdl(context.va, length, false, false, NULL);
1674  if (!master_mdl) {
1675  ERR("out of memory\n");
1677  goto exit;
1678  }
1679 
1681 
1682  _SEH2_TRY {
1686  } _SEH2_END;
1687 
1688  if (!NT_SUCCESS(Status)) {
1689  ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
1690  IoFreeMdl(master_mdl);
1691  goto exit;
1692  }
1693 
1694  pfns = (PFN_NUMBER*)(master_mdl + 1);
1695 
1696  for (i = 0; i < ci->num_stripes; i++) {
1697  if (startoffstripe > i)
1698  context.stripes[i].stripestart = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
1699  else if (startoffstripe == i)
1700  context.stripes[i].stripestart = startoff;
1701  else
1702  context.stripes[i].stripestart = startoff - (startoff % ci->stripe_length);
1703 
1704  if (endoffstripe > i)
1705  context.stripes[i].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
1706  else if (endoffstripe == i)
1707  context.stripes[i].stripeend = endoff + 1;
1708  else
1709  context.stripes[i].stripeend = endoff - (endoff % ci->stripe_length);
1710 
1711  if (context.stripes[i].stripestart != context.stripes[i].stripeend) {
1712  context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), false, false, NULL);
1713 
1714  if (!context.stripes[i].mdl) {
1715  ERR("IoAllocateMdl failed\n");
1716  MmUnlockPages(master_mdl);
1717  IoFreeMdl(master_mdl);
1719  goto exit;
1720  }
1721  }
1722  }
1723 
1724  stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes, ALLOC_TAG);
1725  if (!stripeoff) {
1726  ERR("out of memory\n");
1727  MmUnlockPages(master_mdl);
1728  IoFreeMdl(master_mdl);
1730  goto exit;
1731  }
1732 
1733  RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes);
1734 
1735  pos = 0;
1736  stripe = startoffstripe;
1737  while (pos < length) {
1738  PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
1739 
1740  if (pos == 0) {
1741  uint32_t readlen = (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length));
1742 
1743  RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1744 
1745  stripeoff[stripe] += readlen;
1746  pos += readlen;
1747  } else if (length - pos < ci->stripe_length) {
1748  RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1749 
1750  pos = length;
1751  } else {
1752  RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1753 
1754  stripeoff[stripe] += (uint32_t)ci->stripe_length;
1755  pos += (uint32_t)ci->stripe_length;
1756  }
1757 
1758  stripe = (stripe + 1) % ci->num_stripes;
1759  }
1760 
1761  MmUnlockPages(master_mdl);
1762  IoFreeMdl(master_mdl);
1763 
1764  ExFreePool(stripeoff);
1765  } else if (type == BLOCK_FLAG_RAID10) {
1766  uint64_t startoff, endoff;
1767  uint16_t endoffstripe, j, stripe;
1768  ULONG orig_ls;
1769  PMDL master_mdl;
1770  PFN_NUMBER* pfns;
1771  uint32_t* stripeoff, pos;
1772  read_data_stripe** stripes;
1773 
1774  if (c)
1775  orig_ls = c->last_stripe;
1776  else
1777  orig_ls = 0;
1778 
1779  get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &startoff, &startoffstripe);
1780  get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &endoff, &endoffstripe);
1781 
1782  if ((ci->num_stripes % ci->sub_stripes) != 0) {
1783  ERR("chunk %I64x: num_stripes %x was not a multiple of sub_stripes %x!\n", offset, ci->num_stripes, ci->sub_stripes);
1785  goto exit;
1786  }
1787 
1788  if (file_read) {
1790 
1791  if (!context.va) {
1792  ERR("out of memory\n");
1794  goto exit;
1795  }
1796  } else
1797  context.va = buf;
1798 
1799  context.firstoff = (uint16_t)((startoff % ci->stripe_length) / Vcb->superblock.sector_size);
1800  context.startoffstripe = startoffstripe;
1801  context.sectors_per_stripe = (uint16_t)(ci->stripe_length / Vcb->superblock.sector_size);
1802 
1803  startoffstripe *= ci->sub_stripes;
1804  endoffstripe *= ci->sub_stripes;
1805 
1806  if (c)
1807  c->last_stripe = (orig_ls + 1) % ci->sub_stripes;
1808 
1809  master_mdl = IoAllocateMdl(context.va, length, false, false, NULL);
1810  if (!master_mdl) {
1811  ERR("out of memory\n");
1813  goto exit;
1814  }
1815 
1817 
1818  _SEH2_TRY {
1822  } _SEH2_END;
1823 
1824  if (!NT_SUCCESS(Status)) {
1825  ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
1826  IoFreeMdl(master_mdl);
1827  goto exit;
1828  }
1829 
1830  pfns = (PFN_NUMBER*)(master_mdl + 1);
1831 
1833  if (!stripes) {
1834  ERR("out of memory\n");
1835  MmUnlockPages(master_mdl);
1836  IoFreeMdl(master_mdl);
1838  goto exit;
1839  }
1840 
1841  RtlZeroMemory(stripes, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes);
1842 
1843  for (i = 0; i < ci->num_stripes; i += ci->sub_stripes) {
1844  uint64_t sstart, send;
1845  bool stripeset = false;
1846 
1847  if (startoffstripe > i)
1848  sstart = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
1849  else if (startoffstripe == i)
1850  sstart = startoff;
1851  else
1852  sstart = startoff - (startoff % ci->stripe_length);
1853 
1854  if (endoffstripe > i)
1855  send = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
1856  else if (endoffstripe == i)
1857  send = endoff + 1;
1858  else
1859  send = endoff - (endoff % ci->stripe_length);
1860 
1861  for (j = 0; j < ci->sub_stripes; j++) {
1862  if (j == orig_ls && devices[i+j] && devices[i+j]->devobj) {
1863  context.stripes[i+j].stripestart = sstart;
1864  context.stripes[i+j].stripeend = send;
1865  stripes[i / ci->sub_stripes] = &context.stripes[i+j];
1866 
1867  if (sstart != send) {
1868  context.stripes[i+j].mdl = IoAllocateMdl(context.va, (ULONG)(send - sstart), false, false, NULL);
1869 
1870  if (!context.stripes[i+j].mdl) {
1871  ERR("IoAllocateMdl failed\n");
1872  MmUnlockPages(master_mdl);
1873  IoFreeMdl(master_mdl);
1875  goto exit;
1876  }
1877  }
1878 
1879  stripeset = true;
1880  } else
1881  context.stripes[i+j].status = ReadDataStatus_Skip;
1882  }
1883 
1884  if (!stripeset) {
1885  for (j = 0; j < ci->sub_stripes; j++) {
1886  if (devices[i+j] && devices[i+j]->devobj) {
1887  context.stripes[i+j].stripestart = sstart;
1888  context.stripes[i+j].stripeend = send;
1889  context.stripes[i+j].status = ReadDataStatus_Pending;
1890  stripes[i / ci->sub_stripes] = &context.stripes[i+j];
1891 
1892  if (sstart != send) {
1893  context.stripes[i+j].mdl = IoAllocateMdl(context.va, (ULONG)(send - sstart), false, false, NULL);
1894 
1895  if (!context.stripes[i+j].mdl) {
1896  ERR("IoAllocateMdl failed\n");
1897  MmUnlockPages(master_mdl);
1898  IoFreeMdl(master_mdl);
1900  goto exit;
1901  }
1902  }
1903 
1904  stripeset = true;
1905  break;
1906  }
1907  }
1908 
1909  if (!stripeset) {
1910  ERR("could not find stripe to read\n");
1912  goto exit;
1913  }
1914  }
1915  }
1916 
1917  stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG);
1918  if (!stripeoff) {
1919  ERR("out of memory\n");
1920  MmUnlockPages(master_mdl);
1921  IoFreeMdl(master_mdl);
1923  goto exit;
1924  }
1925 
1926  RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes / ci->sub_stripes);
1927 
1928  pos = 0;
1929  stripe = startoffstripe / ci->sub_stripes;
1930  while (pos < length) {
1931  PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(stripes[stripe]->mdl + 1);
1932 
1933  if (pos == 0) {
1934  uint32_t readlen = (uint32_t)min(stripes[stripe]->stripeend - stripes[stripe]->stripestart,
1935  ci->stripe_length - (stripes[stripe]->stripestart % ci->stripe_length));
1936 
1937  RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1938 
1939  stripeoff[stripe] += readlen;
1940  pos += readlen;
1941  } else if (length - pos < ci->stripe_length) {
1942  RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1943 
1944  pos = length;
1945  } else {
1946  RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1947 
1948  stripeoff[stripe] += (ULONG)ci->stripe_length;
1949  pos += (ULONG)ci->stripe_length;
1950  }
1951 
1952  stripe = (stripe + 1) % (ci->num_stripes / ci->sub_stripes);
1953  }
1954 
1955  MmUnlockPages(master_mdl);
1956  IoFreeMdl(master_mdl);
1957 
1958  ExFreePool(stripeoff);
1959  ExFreePool(stripes);
1960  } else if (type == BLOCK_FLAG_DUPLICATE) {
1961  uint64_t orig_ls;
1962 
1963  if (c)
1964  orig_ls = i = c->last_stripe;
1965  else
1966  orig_ls = i = 0;
1967 
1968  while (!devices[i] || !devices[i]->devobj) {
1969  i = (i + 1) % ci->num_stripes;
1970 
1971  if (i == orig_ls) {
1972  ERR("no devices available to service request\n");
1974  goto exit;
1975  }
1976  }
1977 
1978  if (c)
1979  c->last_stripe = (i + 1) % ci->num_stripes;
1980 
1981  context.stripes[i].stripestart = addr - offset;
1982  context.stripes[i].stripeend = context.stripes[i].stripestart + length;
1983 
1984  if (file_read) {
1986 
1987  if (!context.va) {
1988  ERR("out of memory\n");
1990  goto exit;
1991  }
1992 
1993  context.stripes[i].mdl = IoAllocateMdl(context.va, length, false, false, NULL);
1994  if (!context.stripes[i].mdl) {
1995  ERR("IoAllocateMdl failed\n");
1997  goto exit;
1998  }
1999 
2000  MmBuildMdlForNonPagedPool(context.stripes[i].mdl);
2001  } else {
2002  context.stripes[i].mdl = IoAllocateMdl(buf, length, false, false, NULL);
2003 
2004  if (!context.stripes[i].mdl) {
2005  ERR("IoAllocateMdl failed\n");
2007  goto exit;
2008  }
2009 
2011 
2012  _SEH2_TRY {
2016  } _SEH2_END;
2017 
2018  if (!NT_SUCCESS(Status)) {
2019  ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
2020  goto exit;
2021  }
2022  }
2023  } else if (type == BLOCK_FLAG_RAID5) {
2024  uint64_t startoff, endoff;
2025  uint16_t endoffstripe, parity;
2026  uint32_t *stripeoff, pos;
2027  PMDL master_mdl;
2028  PFN_NUMBER *pfns, dummy;
2029  bool need_dummy = false;
2030 
2031  get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 1, &startoff, &startoffstripe);
2032  get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 1, &endoff, &endoffstripe);
2033 
2034  if (file_read) {
2036 
2037  if (!context.va) {
2038  ERR("out of memory\n");
2040  goto exit;
2041  }
2042  } else
2043  context.va = buf;
2044 
2045  master_mdl = IoAllocateMdl(context.va, length, false, false, NULL);
2046  if (!master_mdl) {
2047  ERR("out of memory\n");
2049  goto exit;
2050  }
2051 
2053 
2054  _SEH2_TRY {
2058  } _SEH2_END;
2059 
2060  if (!NT_SUCCESS(Status)) {
2061  ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
2062  IoFreeMdl(master_mdl);
2063  goto exit;
2064  }
2065 
2066  pfns = (PFN_NUMBER*)(master_mdl + 1);
2067 
2068  pos = 0;
2069  while (pos < length) {
2070  parity = (((addr - offset + pos) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
2071 
2072  if (pos == 0) {
2073  uint16_t stripe = (parity + startoffstripe + 1) % ci->num_stripes;
2074  ULONG skip, readlen;
2075 
2076  i = startoffstripe;
2077  while (stripe != parity) {
2078  if (i == startoffstripe) {
2079  readlen = min(length, (ULONG)(ci->stripe_length - (startoff % ci->stripe_length)));
2080 
2081  context.stripes[stripe].stripestart = startoff;
2082  context.stripes[stripe].stripeend = startoff + readlen;
2083 
2084  pos += readlen;
2085 
2086  if (pos == length)
2087  break;
2088  } else {
2089  readlen = min(length - pos, (ULONG)ci->stripe_length);
2090 
2091  context.stripes[stripe].stripestart = startoff - (startoff % ci->stripe_length);
2092  context.stripes[stripe].stripeend = context.stripes[stripe].stripestart + readlen;
2093 
2094  pos += readlen;
2095 
2096  if (pos == length)
2097  break;
2098  }
2099 
2100  i++;
2101  stripe = (stripe + 1) % ci->num_stripes;
2102  }
2103 
2104  if (pos == length)
2105  break;
2106 
2107  for (i = 0; i < startoffstripe; i++) {
2108  uint16_t stripe2 = (parity + i + 1) % ci->num_stripes;
2109 
2110  context.stripes[stripe2].stripestart = context.stripes[stripe2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2111  }
2112 
2113  context.stripes[parity].stripestart = context.stripes[parity].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2114 
2115  if (length - pos > ci->num_stripes * (ci->num_stripes - 1) * ci->stripe_length) {
2116  skip = (ULONG)(((length - pos) / (ci->num_stripes * (ci->num_stripes - 1) * ci->stripe_length)) - 1);
2117 
2118  for (i = 0; i < ci->num_stripes; i++) {
2119  context.stripes[i].stripeend += skip * ci->num_stripes * ci->stripe_length;
2120  }
2121 
2122  pos += (uint32_t)(skip * (ci->num_stripes - 1) * ci->num_stripes * ci->stripe_length);
2123  need_dummy = true;
2124  }
2125  } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 1)) {
2126  for (i = 0; i < ci->num_stripes; i++) {
2127  context.stripes[i].stripeend += ci->stripe_length;
2128  }
2129 
2130  pos += (uint32_t)(ci->stripe_length * (ci->num_stripes - 1));
2131  need_dummy = true;
2132  } else {
2133  uint16_t stripe = (parity + 1) % ci->num_stripes;
2134 
2135  i = 0;
2136  while (stripe != parity) {
2137  if (endoffstripe == i) {
2138  context.stripes[stripe].stripeend = endoff + 1;
2139  break;
2140  } else if (endoffstripe > i)
2141  context.stripes[stripe].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
2142 
2143  i++;
2144  stripe = (stripe + 1) % ci->num_stripes;
2145  }
2146 
2147  break;
2148  }
2149  }
2150 
2151  for (i = 0; i < ci->num_stripes; i++) {
2152  if (context.stripes[i].stripestart != context.stripes[i].stripeend) {
2153  context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart),
2154  false, false, NULL);
2155 
2156  if (!context.stripes[i].mdl) {
2157  ERR("IoAllocateMdl failed\n");
2158  MmUnlockPages(master_mdl);
2159  IoFreeMdl(master_mdl);
2161  goto exit;
2162  }
2163  }
2164  }
2165 
2166  if (need_dummy) {
2168  if (!dummypage) {
2169  ERR("out of memory\n");
2170  MmUnlockPages(master_mdl);
2171  IoFreeMdl(master_mdl);
2173  goto exit;
2174  }
2175 
2176  dummy_mdl = IoAllocateMdl(dummypage, PAGE_SIZE, false, false, NULL);
2177  if (!dummy_mdl) {
2178  ERR("IoAllocateMdl failed\n");
2179  MmUnlockPages(master_mdl);
2180  IoFreeMdl(master_mdl);
2182  goto exit;
2183  }
2184 
2185  MmBuildMdlForNonPagedPool(dummy_mdl);
2186 
2187  dummy = *(PFN_NUMBER*)(dummy_mdl + 1);
2188  }
2189 
2190  stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes, ALLOC_TAG);
2191  if (!stripeoff) {
2192  ERR("out of memory\n");
2193  MmUnlockPages(master_mdl);
2194  IoFreeMdl(master_mdl);
2196  goto exit;
2197  }
2198 
2199  RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes);
2200 
2201  pos = 0;
2202 
2203  while (pos < length) {
2204  PFN_NUMBER* stripe_pfns;
2205 
2206  parity = (((addr - offset + pos) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
2207 
2208  if (pos == 0) {
2209  uint16_t stripe = (parity + startoffstripe + 1) % ci->num_stripes;
2210  uint32_t readlen = min(length - pos, (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart,
2211  ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length)));
2212 
2213  stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2214 
2215  RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2216 
2217  stripeoff[stripe] = readlen;
2218  pos += readlen;
2219 
2220  stripe = (stripe + 1) % ci->num_stripes;
2221 
2222  while (stripe != parity) {
2223  stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2224  readlen = min(length - pos, (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2225 
2226  if (readlen == 0)
2227  break;
2228 
2229  RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2230 
2231  stripeoff[stripe] = readlen;
2232  pos += readlen;
2233 
2234  stripe = (stripe + 1) % ci->num_stripes;
2235  }
2236  } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 1)) {
2237  uint16_t stripe = (parity + 1) % ci->num_stripes;
2238  ULONG k;
2239 
2240  while (stripe != parity) {
2241  stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2242 
2243  RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
2244 
2245  stripeoff[stripe] += (uint32_t)ci->stripe_length;
2246  pos += (uint32_t)ci->stripe_length;
2247 
2248  stripe = (stripe + 1) % ci->num_stripes;
2249  }
2250 
2251  stripe_pfns = (PFN_NUMBER*)(context.stripes[parity].mdl + 1);
2252 
2253  for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) {
2254  stripe_pfns[stripeoff[parity] >> PAGE_SHIFT] = dummy;
2255  stripeoff[parity] += PAGE_SIZE;
2256  }
2257  } else {
2258  uint16_t stripe = (parity + 1) % ci->num_stripes;
2259  uint32_t readlen;
2260 
2261  while (pos < length) {
2262  stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2263  readlen = min(length - pos, (ULONG)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2264 
2265  if (readlen == 0)
2266  break;
2267 
2268  RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2269 
2270  stripeoff[stripe] += readlen;
2271  pos += readlen;
2272 
2273  stripe = (stripe + 1) % ci->num_stripes;
2274  }
2275  }
2276  }
2277 
2278  MmUnlockPages(master_mdl);
2279  IoFreeMdl(master_mdl);
2280 
2281  ExFreePool(stripeoff);
2282  } else if (type == BLOCK_FLAG_RAID6) {
2283  uint64_t startoff, endoff;
2284  uint16_t endoffstripe, parity1;
2285  uint32_t *stripeoff, pos;
2286  PMDL master_mdl;
2287  PFN_NUMBER *pfns, dummy;
2288  bool need_dummy = false;
2289 
2290  get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 2, &startoff, &startoffstripe);
2291  get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 2, &endoff, &endoffstripe);
2292 
2293  if (file_read) {
2295 
2296  if (!context.va) {
2297  ERR("out of memory\n");
2299  goto exit;
2300  }
2301  } else
2302  context.va = buf;
2303 
2304  master_mdl = IoAllocateMdl(context.va, length, false, false, NULL);
2305  if (!master_mdl) {
2306  ERR("out of memory\n");
2308  goto exit;
2309  }
2310 
2312 
2313  _SEH2_TRY {
2317  } _SEH2_END;
2318 
2319  if (!NT_SUCCESS(Status)) {
2320  ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
2321  IoFreeMdl(master_mdl);
2322  goto exit;
2323  }
2324 
2325  pfns = (PFN_NUMBER*)(master_mdl + 1);
2326 
2327  pos = 0;
2328  while (pos < length) {
2329  parity1 = (((addr - offset + pos) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
2330 
2331  if (pos == 0) {
2332  uint16_t stripe = (parity1 + startoffstripe + 2) % ci->num_stripes, parity2;
2333  ULONG skip, readlen;
2334 
2335  i = startoffstripe;
2336  while (stripe != parity1) {
2337  if (i == startoffstripe) {
2338  readlen = (ULONG)min(length, ci->stripe_length - (startoff % ci->stripe_length));
2339 
2340  context.stripes[stripe].stripestart = startoff;
2341  context.stripes[stripe].stripeend = startoff + readlen;
2342 
2343  pos += readlen;
2344 
2345  if (pos == length)
2346  break;
2347  } else {
2348  readlen = min(length - pos, (ULONG)ci->stripe_length);
2349 
2350  context.stripes[stripe].stripestart = startoff - (startoff % ci->stripe_length);
2351  context.stripes[stripe].stripeend = context.stripes[stripe].stripestart + readlen;
2352 
2353  pos += readlen;
2354 
2355  if (pos == length)
2356  break;
2357  }
2358 
2359  i++;
2360  stripe = (stripe + 1) % ci->num_stripes;
2361  }
2362 
2363  if (pos == length)
2364  break;
2365 
2366  for (i = 0; i < startoffstripe; i++) {
2367  uint16_t stripe2 = (parity1 + i + 2) % ci->num_stripes;
2368 
2369  context.stripes[stripe2].stripestart = context.stripes[stripe2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2370  }
2371 
2372  context.stripes[parity1].stripestart = context.stripes[parity1].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2373 
2374  parity2 = (parity1 + 1) % ci->num_stripes;
2375  context.stripes[parity2].stripestart = context.stripes[parity2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2376 
2377  if (length - pos > ci->num_stripes * (ci->num_stripes - 2) * ci->stripe_length) {
2378  skip = (ULONG)(((length - pos) / (ci->num_stripes * (ci->num_stripes - 2) * ci->stripe_length)) - 1);
2379 
2380  for (i = 0; i < ci->num_stripes; i++) {
2381  context.stripes[i].stripeend += skip * ci->num_stripes * ci->stripe_length;
2382  }
2383 
2384  pos += (uint32_t)(skip * (ci->num_stripes - 2) * ci->num_stripes * ci->stripe_length);
2385  need_dummy = true;
2386  }
2387  } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 2)) {
2388  for (i = 0; i < ci->num_stripes; i++) {
2389  context.stripes[i].stripeend += ci->stripe_length;
2390  }
2391 
2392  pos += (uint32_t)(ci->stripe_length * (ci->num_stripes - 2));
2393  need_dummy = true;
2394  } else {
2395  uint16_t stripe = (parity1 + 2) % ci->num_stripes;
2396 
2397  i = 0;
2398  while (stripe != parity1) {
2399  if (endoffstripe == i) {
2400  context.stripes[stripe].stripeend = endoff + 1;
2401  break;
2402  } else if (endoffstripe > i)
2403  context.stripes[stripe].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
2404 
2405  i++;
2406  stripe = (stripe + 1) % ci->num_stripes;
2407  }
2408 
2409  break;
2410  }
2411  }
2412 
2413  for (i = 0; i < ci->num_stripes; i++) {
2414  if (context.stripes[i].stripestart != context.stripes[i].stripeend) {
2415  context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), false, false, NULL);
2416 
2417  if (!context.stripes[i].mdl) {
2418  ERR("IoAllocateMdl failed\n");
2419  MmUnlockPages(master_mdl);
2420  IoFreeMdl(master_mdl);
2422  goto exit;
2423  }
2424  }
2425  }
2426 
2427  if (need_dummy) {
2429  if (!dummypage) {
2430  ERR("out of memory\n");
2431  MmUnlockPages(master_mdl);
2432  IoFreeMdl(master_mdl);
2434  goto exit;
2435  }
2436 
2437  dummy_mdl = IoAllocateMdl(dummypage, PAGE_SIZE, false, false, NULL);
2438  if (!dummy_mdl) {
2439  ERR("IoAllocateMdl failed\n");
2440  MmUnlockPages(master_mdl);
2441  IoFreeMdl(master_mdl);
2443  goto exit;
2444  }
2445 
2446  MmBuildMdlForNonPagedPool(dummy_mdl);
2447 
2448  dummy = *(PFN_NUMBER*)(dummy_mdl + 1);
2449  }
2450 
2451  stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes, ALLOC_TAG);
2452  if (!stripeoff) {
2453  ERR("out of memory\n");
2454  MmUnlockPages(master_mdl);
2455  IoFreeMdl(master_mdl);
2457  goto exit;
2458  }
2459 
2460  RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes);
2461 
2462  pos = 0;
2463 
2464  while (pos < length) {
2465  PFN_NUMBER* stripe_pfns;
2466 
2467  parity1 = (((addr - offset + pos) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
2468 
2469  if (pos == 0) {
2470  uint16_t stripe = (parity1 + startoffstripe + 2) % ci->num_stripes;
2471  uint32_t readlen = min(length - pos, (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart,
2472  ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length)));
2473 
2474  stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2475 
2476  RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2477 
2478  stripeoff[stripe] = readlen;
2479  pos += readlen;
2480 
2481  stripe = (stripe + 1) % ci->num_stripes;
2482 
2483  while (stripe != parity1) {
2484  stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2485  readlen = (uint32_t)min(length - pos, min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2486 
2487  if (readlen == 0)
2488  break;
2489 
2490  RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2491 
2492  stripeoff[stripe] = readlen;
2493  pos += readlen;
2494 
2495  stripe = (stripe + 1) % ci->num_stripes;
2496  }
2497  } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 2)) {
2498  uint16_t stripe = (parity1 + 2) % ci->num_stripes;
2499  uint16_t parity2 = (parity1 + 1) % ci->num_stripes;
2500  ULONG k;
2501 
2502  while (stripe != parity1) {
2503  stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2504 
2505  RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
2506 
2507  stripeoff[stripe] += (uint32_t)ci->stripe_length;
2508  pos += (uint32_t)ci->stripe_length;
2509 
2510  stripe = (stripe + 1) % ci->num_stripes;
2511  }
2512 
2513  stripe_pfns = (PFN_NUMBER*)(context.stripes[parity1].mdl + 1);
2514 
2515  for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) {
2516  stripe_pfns[stripeoff[parity1] >> PAGE_SHIFT] = dummy;
2517  stripeoff[parity1] += PAGE_SIZE;
2518  }
2519 
2520  stripe_pfns = (PFN_NUMBER*)(context.stripes[parity2].mdl + 1);
2521 
2522  for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) {
2523  stripe_pfns[stripeoff[parity2] >> PAGE_SHIFT] = dummy;
2524  stripeoff[parity2] += PAGE_SIZE;
2525  }
2526  } else {
2527  uint16_t stripe = (parity1 + 2) % ci->num_stripes;
2528  uint32_t readlen;
2529 
2530  while (pos < length) {
2531  stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2532  readlen = (uint32_t)min(length - pos, min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2533 
2534  if (readlen == 0)
2535  break;
2536 
2537  RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2538 
2539  stripeoff[stripe] += readlen;
2540  pos += readlen;
2541 
2542  stripe = (stripe + 1) % ci->num_stripes;
2543  }
2544  }
2545  }
2546 
2547  MmUnlockPages(master_mdl);
2548  IoFreeMdl(master_mdl);
2549 
2550  ExFreePool(stripeoff);
2551  }
2552 
2553  context.address = addr;
2554 
2555  for (i = 0; i < ci->num_stripes; i++) {
2556  if (!devices[i] || !devices[i]->devobj || context.stripes[i].stripestart == context.stripes[i].stripeend) {
2557  context.stripes[i].status = ReadDataStatus_MissingDevice;
2558  context.stripes_left--;
2559 
2560  if (!devices[i] || !devices[i]->devobj)
2561  missing_devices++;
2562  }
2563  }
2564 
2565  if (missing_devices > allowed_missing) {
2566  ERR("not enough devices to service request (%u missing)\n", missing_devices);
2568  goto exit;
2569  }
2570 
2571  for (i = 0; i < ci->num_stripes; i++) {
2573 
2574  if (devices[i] && devices[i]->devobj && context.stripes[i].stripestart != context.stripes[i].stripeend && context.stripes[i].status != ReadDataStatus_Skip) {
2575  context.stripes[i].context = (struct read_data_context*)&context;
2576 
2577  if (type == BLOCK_FLAG_RAID10) {
2578  context.stripes[i].stripenum = i / ci->sub_stripes;
2579  }
2580 
2581  if (!Irp) {
2582  context.stripes[i].Irp = IoAllocateIrp(devices[i]->devobj->StackSize, false);
2583 
2584  if (!context.stripes[i].Irp) {
2585  ERR("IoAllocateIrp failed\n");
2587  goto exit;
2588  }
2589  } else {
2590  context.stripes[i].Irp = IoMakeAssociatedIrp(Irp, devices[i]->devobj->StackSize);
2591 
2592  if (!context.stripes[i].Irp) {
2593  ERR("IoMakeAssociatedIrp failed\n");
2595  goto exit;
2596  }
2597  }
2598 
2599  IrpSp = IoGetNextIrpStackLocation(context.stripes[i].Irp);
2602  IrpSp->FileObject = devices[i]->fileobj;
2603 
2604  if (devices[i]->devobj->Flags & DO_BUFFERED_IO) {
2605  context.stripes[i].Irp->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), ALLOC_TAG);
2606  if (!context.stripes[i].Irp->AssociatedIrp.SystemBuffer) {
2607  ERR("out of memory\n");
2609  goto exit;
2610  }
2611 
2613 
2614  context.stripes[i].Irp->UserBuffer = MmGetSystemAddressForMdlSafe(context.stripes[i].mdl, priority);
2615  } else if (devices[i]->devobj->Flags & DO_DIRECT_IO)
2616  context.stripes[i].Irp->MdlAddress = context.stripes[i].mdl;
2617  else
2618  context.stripes[i].Irp->UserBuffer = MmGetSystemAddressForMdlSafe(context.stripes[i].mdl, priority);
2619 
2620  IrpSp->Parameters.Read.Length = (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart);
2621  IrpSp->Parameters.Read.ByteOffset.QuadPart = context.stripes[i].stripestart + cis[i].offset;
2622 
2623  total_reading += IrpSp->Parameters.Read.Length;
2624 
2625  context.stripes[i].Irp->UserIosb = &context.stripes[i].iosb;
2626 
2627  IoSetCompletionRoutine(context.stripes[i].Irp, read_data_completion, &context.stripes[i], true, true, true);
2628 
2629  context.stripes[i].status = ReadDataStatus_Pending;
2630  }
2631  }
2632 
2633  need_to_wait = false;
2634  for (i = 0; i < ci->num_stripes; i++) {
2635  if (context.stripes[i].status != ReadDataStatus_MissingDevice && context.stripes[i].status != ReadDataStatus_Skip) {
2636  IoCallDriver(devices[i]->devobj, context.stripes[i].Irp);
2637  need_to_wait = true;
2638  }
2639  }
2640 
2641  if (need_to_wait)
2643 
2644  if (diskacc)
2645  fFsRtlUpdateDiskCounters(total_reading, 0);
2646 
2647  // check if any of the devices return a "user-induced" error
2648 
2649  for (i = 0; i < ci->num_stripes; i++) {
2650  if (context.stripes[i].status == ReadDataStatus_Error && IoIsErrorUserInduced(context.stripes[i].iosb.Status)) {
2651  Status = context.stripes[i].iosb.Status;
2652  goto exit;
2653  }
2654  }
2655 
2656  if (type == BLOCK_FLAG_RAID0) {
2657  Status = read_data_raid0(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, generation, offset);
2658  if (!NT_SUCCESS(Status)) {
2659  ERR("read_data_raid0 returned %08lx\n", Status);
2660 
2661  if (file_read)
2662  ExFreePool(context.va);
2663 
2664  goto exit;
2665  }
2666 
2667  if (file_read) {
2669  ExFreePool(context.va);
2670  }
2671  } else if (type == BLOCK_FLAG_RAID10) {
2672  Status = read_data_raid10(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, generation, offset);
2673 
2674  if (!NT_SUCCESS(Status)) {
2675  ERR("read_data_raid10 returned %08lx\n", Status);
2676 
2677  if (file_read)
2678  ExFreePool(context.va);
2679 
2680  goto exit;
2681  }
2682 
2683  if (file_read) {
2685  ExFreePool(context.va);
2686  }
2687  } else if (type == BLOCK_FLAG_DUPLICATE) {
2688  Status = read_data_dup(Vcb, file_read ? context.va : buf, addr, &context, ci, devices, generation);
2689  if (!NT_SUCCESS(Status)) {
2690  ERR("read_data_dup returned %08lx\n", Status);
2691 
2692  if (file_read)
2693  ExFreePool(context.va);
2694 
2695  goto exit;
2696  }
2697 
2698  if (file_read) {
2700  ExFreePool(context.va);
2701  }
2702  } else if (type == BLOCK_FLAG_RAID5) {
2703  Status = read_data_raid5(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, offset, generation, c, missing_devices > 0 ? true : false);
2704  if (!NT_SUCCESS(Status)) {
2705  ERR("read_data_raid5 returned %08lx\n", Status);
2706 
2707  if (file_read)
2708  ExFreePool(context.va);
2709 
2710  goto exit;
2711  }
2712 
2713  if (file_read) {
2715  ExFreePool(context.va);
2716  }
2717  } else if (type == BLOCK_FLAG_RAID6) {
2718  Status = read_data_raid6(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, offset, generation, c, missing_devices > 0 ? true : false);
2719  if (!NT_SUCCESS(Status)) {
2720  ERR("read_data_raid6 returned %08lx\n", Status);
2721 
2722  if (file_read)
2723  ExFreePool(context.va);
2724 
2725  goto exit;
2726  }
2727 
2728  if (file_read) {
2730  ExFreePool(context.va);
2731  }
2732  }
2733 
2734 exit:
2735  if (c && (type == BLOCK_FLAG_RAID5 || type == BLOCK_FLAG_RAID6))
2736  chunk_unlock_range(Vcb, c, lockaddr, locklen);
2737 
2738  if (dummy_mdl)
2739  IoFreeMdl(dummy_mdl);
2740 
2741  if (dummypage)
2742  ExFreePool(dummypage);
2743 
2744  for (i = 0; i < ci->num_stripes; i++) {
2745  if (context.stripes[i].mdl) {
2746  if (context.stripes[i].mdl->MdlFlags & MDL_PAGES_LOCKED)
2747  MmUnlockPages(context.stripes[i].mdl);
2748 
2749  IoFreeMdl(context.stripes[i].mdl);
2750  }
2751 
2752  if (context.stripes[i].Irp)
2753  IoFreeIrp(context.stripes[i].Irp);
2754  }
2755 
2756  ExFreePool(context.stripes);
2757 
2758  if (!Vcb->log_to_phys_loaded)
2760 
2761  return Status;
2762 }
2763 
2765  ULONG readlen;
2766 
2767  TRACE("(%p, %p, %I64x, %lx, %p)\n", fcb, data, start, length, pbr);
2768 
2769  if (pbr) *pbr = 0;
2770 
2771  if (start >= fcb->adsdata.Length) {
2772  TRACE("tried to read beyond end of stream\n");
2773  return STATUS_END_OF_FILE;
2774  }
2775 
2776  if (length == 0) {
2777  WARN("tried to read zero bytes\n");
2778  return STATUS_SUCCESS;
2779  }
2780 
2781  if (start + length < fcb->adsdata.Length)
2782  readlen = length;
2783  else
2784  readlen = fcb->adsdata.Length - (ULONG)start;
2785 
2786  if (readlen > 0)
2787  RtlCopyMemory(data, fcb->adsdata.Buffer + start, readlen);
2788 
2789  if (pbr) *pbr = readlen;
2790 
2791  return STATUS_SUCCESS;
2792 }
2793 
2794 typedef struct {
2800 
2801 typedef struct {
2807  void* csum;
2810  bool buf_free;
2812  bool mdl;
2813  void* data;
2815  unsigned int num_extents;
2816  read_part_extent extents[1];
2817 } read_part;
2818 
2819 typedef struct {
2822  void* decomp;
2823  void* data;
2824  unsigned int offset;
2825  size_t length;
2826 } comp_calc_job;
2827 
2829  NTSTATUS Status;
2830  uint32_t bytes_read = 0;
2831  uint64_t last_end;
2832  LIST_ENTRY* le;
2833  POOL_TYPE pool_type;
2834  LIST_ENTRY read_parts, calc_jobs;
2835 
2836  TRACE("(%p, %p, %I64x, %I64x, %p)\n", fcb, data, start, length, pbr);
2837 
2838  if (pbr)
2839  *pbr = 0;
2840 
2841  if (start >= fcb->inode_item.st_size) {
2842  WARN("Tried to read beyond end of file\n");
2843  return STATUS_END_OF_FILE;
2844  }
2845 
2846  InitializeListHead(&read_parts);
2847  InitializeListHead(&calc_jobs);
2848 
2849  pool_type = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? NonPagedPool : PagedPool;
2850 
2851  le = fcb->extents.Flink;
2852 
2853  last_end = start;
2854 
2855  while (le != &fcb->extents) {
2856  uint64_t len;
2858 
2859  if (!ext->ignore) {
2860  EXTENT_DATA* ed = &ext->extent_data;
2862 
2863  len = ed2 ? ed2->num_bytes : ed->decoded_size;
2864 
2865  if (ext->offset + len <= start) {
2866  last_end = ext->offset + len;
2867  goto nextitem;
2868  }
2869 
2870  if (ext->offset > last_end && ext->offset > start + bytes_read) {
2871  uint32_t read = (uint32_t)min(length, ext->offset - max(start, last_end));
2872 
2873  RtlZeroMemory(data + bytes_read, read);
2874  bytes_read += read;
2875  length -= read;
2876  }
2877 
2878  if (length == 0 || ext->offset > start + bytes_read + length)
2879  break;
2880 
2882  WARN("Encryption not supported\n");
2884  goto exit;
2885  }
2886 
2887  if (ed->encoding != BTRFS_ENCODING_NONE) {
2888  WARN("Other encodings not supported\n");
2890  goto exit;
2891  }
2892 
2893  switch (ed->type) {
2894  case EXTENT_TYPE_INLINE:
2895  {
2896  uint64_t off = start + bytes_read - ext->offset;
2897  uint32_t read;
2898 
2900  read = (uint32_t)min(min(len, ext->datalen) - off, length);
2901 
2902  RtlCopyMemory(data + bytes_read, &ed->data[off], read);
2904  uint8_t* decomp;
2905  bool decomp_alloc;
2906  uint16_t inlen = ext->datalen - (uint16_t)offsetof(EXTENT_DATA, data[0]);
2907 
2908  if (ed->decoded_size == 0 || ed->decoded_size > 0xffffffff) {
2909  ERR("ed->decoded_size was invalid (%I64x)\n", ed->decoded_size);
2911  goto exit;
2912  }
2913 
2915 
2916  if (off > 0) {
2918  if (!decomp) {
2919  ERR("out of memory\n");
2921  goto exit;
2922  }
2923 
2924  decomp_alloc = true;
2925  } else {
2926  decomp = data + bytes_read;
2927  decomp_alloc = false;
2928  }
2929 
2931  Status = zlib_decompress(ed->data, inlen, decomp, (uint32_t)(read + off));
2932  if (!NT_SUCCESS(Status)) {
2933  ERR("zlib_decompress returned %08lx\n", Status);
2934  if (decomp_alloc) ExFreePool(decomp);
2935  goto exit;
2936  }
2937  } else if (ed->compression == BTRFS_COMPRESSION_LZO) {
2938  if (inlen < sizeof(uint32_t)) {
2939  ERR("extent data was truncated\n");
2941  if (decomp_alloc) ExFreePool(decomp);
2942  goto exit;
2943  } else
2944  inlen -= sizeof(uint32_t);
2945 
2946  Status = lzo_decompress(ed->data + sizeof(uint32_t), inlen, decomp, (uint32_t)(read + off), sizeof(uint32_t));
2947  if (!NT_SUCCESS(Status)) {
2948  ERR("lzo_decompress returned %08lx\n", Status);
2949  if (decomp_alloc) ExFreePool(decomp);
2950  goto exit;
2951  }
2952  } else if (ed->compression == BTRFS_COMPRESSION_ZSTD) {
2953  Status = zstd_decompress(ed->data, inlen, decomp, (uint32_t)(read + off));
2954  if (!NT_SUCCESS(Status)) {
2955  ERR("zstd_decompress returned %08lx\n", Status);
2956  if (decomp_alloc) ExFreePool(decomp);
2957  goto exit;
2958  }
2959  }
2960 
2961  if (decomp_alloc) {
2962  RtlCopyMemory(data + bytes_read, decomp + off, read);
2963  ExFreePool(decomp);
2964  }
2965  } else {
2966  ERR("unhandled compression type %x\n", ed->compression);
2968  goto exit;
2969  }
2970 
2971  bytes_read += read;
2972  length -= read;
2973 
2974  break;
2975  }
2976 
2977  case EXTENT_TYPE_REGULAR:
2978  {
2979  read_part* rp;
2980 
2981  rp = ExAllocatePoolWithTag(pool_type, sizeof(read_part), ALLOC_TAG);
2982  if (!rp) {
2983  ERR("out of memory\n");
2985  goto exit;
2986  }
2987 
2988  rp->mdl = (Irp && Irp->MdlAddress) ? true : false;
2989  rp->extents[0].off = start + bytes_read - ext->offset;
2990  rp->bumpoff = 0;
2991  rp->num_extents = 1;
2992  rp->csum_free = false;
2993 
2994  rp->read = (uint32_t)(len - rp->extents[0].off);
2995  if (rp->read > length) rp->read = (uint32_t)length;
2996 
2998  rp->addr = ed2->address + ed2->offset + rp->extents[0].off;
2999  rp->to_read = (uint32_t)sector_align(rp->read, fcb->Vcb->superblock.sector_size);
3000 
3001  if (rp->addr % fcb->Vcb->superblock.sector_size > 0) {
3002  rp->bumpoff = rp->addr % fcb->Vcb->superblock.sector_size;
3003  rp->addr -= rp->bumpoff;
3004  rp->to_read = (uint32_t)sector_align(rp->read + rp->bumpoff, fcb->Vcb->superblock.sector_size);
3005  }
3006  } else {
3007  rp->addr = ed2->address;
3008  rp->to_read = (uint32_t)sector_align(ed2->size, fcb->Vcb->superblock.sector_size);
3009  }
3010 
3011  if (ed->compression == BTRFS_COMPRESSION_NONE && start % fcb->Vcb->superblock.sector_size == 0 &&
3012  length % fcb->Vcb->superblock.sector_size == 0) {
3013  rp->buf = data + bytes_read;
3014  rp->buf_free = false;
3015  } else {
3016  rp->buf = ExAllocatePoolWithTag(pool_type, rp->to_read, ALLOC_TAG);
3017  rp->buf_free = true;
3018 
3019  if (!rp->buf) {
3020  ERR("out of memory\n");
3022  ExFreePool(rp);
3023  goto exit;
3024  }
3025 
3026  rp->mdl = false;
3027  }
3028 
3029  rp->c = get_chunk_from_address(fcb->Vcb, rp->addr);
3030 
3031  if (!rp->c) {
3032  ERR("get_chunk_from_address(%I64x) failed\n", rp->addr);
3033 
3034  if (rp->buf_free)
3035  ExFreePool(rp->buf);
3036 
3037  ExFreePool(rp);
3038 
3039  goto exit;
3040  }
3041 
3042  if (ext->csum) {
3044  rp->csum = (uint8_t*)ext->csum + (fcb->Vcb->csum_size * (rp->extents[0].off / fcb->Vcb->superblock.sector_size));
3045  } else
3046  rp->csum = ext->csum;
3047  } else
3048  rp->csum = NULL;
3049 
3050  rp->data = data + bytes_read;
3051  rp->compression = ed->compression;
3052  rp->extents[0].ed_offset = ed2->offset;
3053  rp->extents[0].ed_size = ed2->size;
3054  rp->extents[0].ed_num_bytes = ed2->num_bytes;
3055 
3056  InsertTailList(&read_parts, &rp->list_entry);
3057 
3058  bytes_read += rp->read;
3059  length -= rp->read;
3060 
3061  break;
3062  }
3063 
3064  case EXTENT_TYPE_PREALLOC:
3065  {
3066  uint64_t off = start + bytes_read - ext->offset;
3067  uint32_t read = (uint32_t)(len - off);
3068 
3069  if (read > length) read = (uint32_t)length;
3070 
3071  RtlZeroMemory(data + bytes_read, read);
3072 
3073  bytes_read += read;
3074  length -= read;
3075 
3076  break;
3077  }
3078 
3079  default:
3080  WARN("Unsupported extent data type %u\n", ed->type);
3082  goto exit;
3083  }
3084 
3085  last_end = ext->offset + len;
3086 
3087  if (length == 0)
3088  break;
3089  }
3090 
3091 nextitem:
3092  le = le->Flink;
3093  }
3094 
3095  if (!IsListEmpty(&read_parts) && read_parts.Flink->Flink != &read_parts) { // at least two entries in list
3096  read_part* last_rp = CONTAINING_RECORD(read_parts.Flink, read_part, list_entry);
3097 
3098  le = read_parts.Flink->Flink;
3099  while (le != &read_parts) {
3100  LIST_ENTRY* le2 = le->Flink;
3102 
3103  // merge together runs
3104  if (rp->compression != BTRFS_COMPRESSION_NONE && rp->compression == last_rp->compression && rp->addr == last_rp->addr + last_rp->to_read &&
3105  rp->data == (uint8_t*)last_rp->data + last_rp->read && rp->c == last_rp->c && ((rp->csum && last_rp->csum) || (!rp->csum && !last_rp->csum))) {
3106  read_part* rp2;
3107 
3108  rp2 = ExAllocatePoolWithTag(pool_type, offsetof(read_part, extents) + (sizeof(read_part_extent) * (last_rp->num_extents + 1)), ALLOC_TAG);
3109 
3110  rp2->addr = last_rp->addr;
3111  rp2->c = last_rp->c;
3112  rp2->read = last_rp->read + rp->read;
3113  rp2->to_read = last_rp->to_read + rp->to_read;
3114  rp2->csum_free = false;
3115 
3116  if (last_rp->csum) {
3117  uint32_t sectors = (last_rp->to_read + rp->to_read) / fcb->Vcb->superblock.sector_size;
3118 
3119  rp2->csum = ExAllocatePoolWithTag(pool_type, sectors * fcb->Vcb->csum_size, ALLOC_TAG);
3120  if (!rp2->csum) {
3121  ERR("out of memory\n");
3122  ExFreePool(rp2);
3124  goto exit;
3125  }
3126 
3127  RtlCopyMemory(rp2->csum, last_rp->csum, last_rp->to_read * fcb->Vcb->csum_size / fcb->Vcb->superblock.sector_size);
3128  RtlCopyMemory((uint8_t*)rp2->csum + (last_rp->to_read * fcb->Vcb->csum_size / fcb->Vcb->superblock.sector_size), rp->csum,
3129  rp->to_read * fcb->Vcb->csum_size / fcb->Vcb->superblock.sector_size);
3130 
3131  rp2->csum_free = true;
3132  } else
3133  rp2->csum = NULL;
3134 
3135  rp2->buf = ExAllocatePoolWithTag(pool_type, rp2->to_read, ALLOC_TAG);
3136  if (!rp2->buf) {
3137  ERR("out of memory\n");
3138 
3139  if (rp2->csum)
3140  ExFreePool(rp2->csum);
3141 
3142  ExFreePool(rp2);
3144  goto exit;
3145  }
3146 
3147  rp2->buf_free = true;
3148  rp2->bumpoff = 0;
3149  rp2->mdl = false;
3150  rp2->data = last_rp->data;
3151  rp2->compression = last_rp->compression;
3152  rp2->num_extents = last_rp->num_extents + 1;
3153 
3154  RtlCopyMemory(rp2->extents, last_rp->extents, last_rp->num_extents * sizeof(read_part_extent));
3155  RtlCopyMemory(&rp2->extents[last_rp->num_extents], rp->extents, sizeof(read_part_extent));
3156 
3157  InsertHeadList(le->Blink, &rp2->list_entry);
3158 
3159  if (rp->buf_free)
3160  ExFreePool(rp->buf);
3161 
3162  if (rp->csum_free)
3163  ExFreePool(rp->csum);
3164 
3166 
3167  ExFreePool(rp);
3168 
3169  if (last_rp->buf_free)
3170  ExFreePool(last_rp->buf);
3171 
3172  if (last_rp->csum_free)
3173  ExFreePool(last_rp->csum);
3174 
3175  RemoveEntryList(&last_rp->list_entry);
3176 
3177  ExFreePool(last_rp);
3178 
3179  last_rp = rp2;
3180  } else
3181  last_rp = rp;
3182 
3183  le = le2;
3184  }
3185  }
3186 
3187  le = read_parts.Flink;
3188  while (le != &read_parts) {
3190 
3191  Status = read_data(fcb->Vcb, rp->addr, rp->to_read, rp->csum, false, rp->buf, rp->c, NULL, Irp, 0, rp->mdl,
3193  if (!NT_SUCCESS(Status)) {
3194  ERR("read_data returned %08lx\n", Status);
3195  goto exit;
3196  }
3197 
3198  if (rp->compression == BTRFS_COMPRESSION_NONE) {
3199  if (rp->buf_free)
3200  RtlCopyMemory(rp->data, rp->buf + rp->bumpoff, rp->read);
3201  } else {
3202  uint8_t* buf = rp->buf;
3203 #ifdef __REACTOS__
3204  unsigned int i;
3205  for (i = 0; i < rp->num_extents; i++) {
3206 #else
3207  for (unsigned int i = 0; i < rp->num_extents; i++) {
3208 #endif // __REACTOS__
3209  uint8_t *decomp = NULL, *buf2;
3210  ULONG outlen, inlen, off2;
3211  uint32_t inpageoff = 0;
3212  comp_calc_job* ccj;
3213 
3214  off2 = (ULONG)(rp->extents[i].ed_offset + rp->extents[i].off);
3215  buf2 = buf;
3216  inlen = (ULONG)rp->extents[i].ed_size;
3217 
3219  ULONG inoff = sizeof(uint32_t);
3220 
3221  inlen -= sizeof(uint32_t);
3222 
3223  // If reading a few sectors in, skip to the interesting bit
3224  while (off2 > LZO_PAGE_SIZE) {
3225  uint32_t partlen;
3226 
3227  if (inlen < sizeof(uint32_t))
3228  break;
3229 
3230  partlen = *(uint32_t*)(buf2 + inoff);
3231 
3232  if (partlen < inlen) {
3233  off2 -= LZO_PAGE_SIZE;
3234  inoff += partlen + sizeof(uint32_t);
3235  inlen -= partlen + sizeof(uint32_t);
3236 
3237  if (LZO_PAGE_SIZE - (inoff % LZO_PAGE_SIZE) < sizeof(uint32_t))
3238  inoff = ((inoff / LZO_PAGE_SIZE) + 1) * LZO_PAGE_SIZE;
3239  } else
3240  break;
3241  }
3242 
3243  buf2 = &buf2[inoff];
3244  inpageoff = inoff % LZO_PAGE_SIZE;
3245  }
3246 
3247  if (off2 != 0) {
3248  outlen = off2 + min(rp->read, (uint32_t)(rp->extents[i].ed_num_bytes - rp->extents[i].off));
3249 
3250  decomp = ExAllocatePoolWithTag(pool_type, outlen, ALLOC_TAG);
3251  if (!decomp) {
3252  ERR("out of memory\n");
3254  goto exit;
3255  }
3256  } else
3257  outlen = min(rp->read, (uint32_t)(rp->extents[i].ed_num_bytes - rp->extents[i].off));
3258 
3259  ccj = (comp_calc_job*)ExAllocatePoolWithTag(pool_type, sizeof(comp_calc_job), ALLOC_TAG);
3260  if (!ccj) {
3261  ERR("out of memory\n");
3262 
3263  if (decomp)
3264  ExFreePool(decomp);
3265 
3267  goto exit;
3268  }
3269 
3270  Status = add_calc_job_decomp(fcb->Vcb, rp->compression, buf2, inlen, decomp ? decomp : rp->data, outlen,
3271  inpageoff, &ccj->cj);
3272  if (!NT_SUCCESS(Status)) {
3273  ERR("add_calc_job_decomp returned %08lx\n", Status);
3274 
3275  if (decomp)
3276  ExFreePool(decomp);
3277 
3278  ExFreePool(ccj);
3279 
3280  goto exit;
3281  }
3282 
3283  ccj->data = rp->data;
3284  ccj->decomp = decomp;
3285 
3286  ccj->offset = off2;
3287  ccj->length = (size_t)min(rp->read, rp->extents[i].ed_num_bytes - rp->extents[i].off);
3288 
3289  InsertTailList(&calc_jobs, &ccj->list_entry);
3290 
3291  buf += rp->extents[i].ed_size;
3292  rp->data = (uint8_t*)rp->data + rp->extents[i].ed_num_bytes - rp->extents[i].off;
3293  rp->read -= (uint32_t)(rp->extents[i].ed_num_bytes - rp->extents[i].off);
3294  }
3295  }
3296 
3297  le = le->Flink;
3298  }
3299 
3300  if (length > 0 && start + bytes_read < fcb->inode_item.st_size) {
3301  uint32_t read = (uint32_t)min(fcb->inode_item.st_size - start - bytes_read, length);
3302 
3303  RtlZeroMemory(data + bytes_read, read);
3304 
3305  bytes_read += read;
3306  length -= read;
3307  }
3308 
3310 
3311  while (!IsListEmpty(&calc_jobs)) {
3313 
3314  calc_thread_main(fcb->Vcb, ccj->cj);
3315 
3317 
3318  if (!NT_SUCCESS(ccj->cj->Status))
3319  Status = ccj->cj->Status;
3320 
3321  if (ccj->decomp) {
3322  RtlCopyMemory(ccj->data, (uint8_t*)ccj->decomp + ccj->offset, ccj->length);
3323  ExFreePool(ccj->decomp);
3324  }
3325 
3326  ExFreePool(ccj);
3327  }
3328 
3329  if (pbr)
3330  *pbr = bytes_read;
3331 
3332 exit:
3333  while (!IsListEmpty(&read_parts)) {
3335 
3336  if (rp->buf_free)
3337  ExFreePool(rp->buf);
3338 
3339  if (rp->csum_free)
3340  ExFreePool(rp->csum);
3341 
3342  ExFreePool(rp);
3343  }
3344 
3345  while (!IsListEmpty(&calc_jobs)) {
3347 
3349 
3350  if (ccj->decomp)
3351  ExFreePool(ccj->decomp);
3352 
3353  ExFreePool(ccj->cj);
3354 
3355  ExFreePool(ccj);
3356  }
3357 
3358  return Status;
3359 }
3360 
3361 NTSTATUS do_read(PIRP Irp, bool wait, ULONG* bytes_read) {
3364  fcb* fcb = FileObject->FsContext;
3365  uint8_t* data = NULL;
3366  ULONG length = IrpSp->Parameters.Read.Length, addon = 0;
3367  uint64_t start = IrpSp->Parameters.Read.ByteOffset.QuadPart;
3368 
3369  *bytes_read = 0;
3370 
3371  if (!fcb || !fcb->Vcb || !fcb->subvol)
3372  return STATUS_INTERNAL_ERROR;
3373 
3374  TRACE("fcb = %p\n", fcb);
3375  TRACE("offset = %I64x, length = %lx\n", start, length);
3376  TRACE("paging_io = %s, no cache = %s\n", Irp->Flags & IRP_PAGING_IO ? "true" : "false", Irp->Flags & IRP_NOCACHE ? "true" : "false");
3377 
3378  if (!fcb->ads && fcb->type == BTRFS_TYPE_DIRECTORY)
3380 
3381  if (!(Irp->Flags & IRP_PAGING_IO) && !FsRtlCheckLockForReadAccess(&fcb->lock, Irp)) {
3382  WARN("tried to read locked region\n");
3384  }
3385 
3386  if (length == 0) {
3387  TRACE("tried to read zero bytes\n");
3388  return STATUS_SUCCESS;
3389  }
3390 
3391  if (start >= (uint64_t)fcb->Header.FileSize.QuadPart) {
3392  TRACE("tried to read with offset after file end (%I64x >= %I64x)\n", start, fcb->Header.FileSize.QuadPart);
3393  return STATUS_END_OF_FILE;
3394  }
3395 
3396  TRACE("FileObject %p fcb %p FileSize = %I64x st_size = %I64x (%p)\n", FileObject, fcb, fcb->Header.FileSize.QuadPart, fcb->inode_item.st_size, &fcb->inode_item.st_size);
3397 
3398  if (Irp->Flags & IRP_NOCACHE || !(IrpSp->MinorFunction & IRP_MN_MDL)) {
3400 
3401  if (Irp->MdlAddress && !data) {
3402  ERR("MmGetSystemAddressForMdlSafe returned NULL\n");
3404  }
3405 
3406  if (start >= (uint64_t)fcb->Header.ValidDataLength.QuadPart) {
3407  length = (ULONG)min(length, min(start + length, (uint64_t)fcb->Header.FileSize.QuadPart) - fcb->Header.ValidDataLength.QuadPart);
3409  Irp->IoStatus.Information = *bytes_read = length;
3410  return STATUS_SUCCESS;
3411  }
3412 
3413  if (length + start > (uint64_t)fcb->Header.ValidDataLength.QuadPart) {
3414  addon = (ULONG)(min(start + length, (uint64_t)fcb->Header.FileSize.QuadPart) - fcb->Header.ValidDataLength.QuadPart);
3415  RtlZeroMemory(data + (fcb->Header.ValidDataLength.QuadPart - start), addon);
3416  length = (ULONG)(fcb->Header.ValidDataLength.QuadPart - start);
3417  }
3418  }
3419 
3420  if (!(Irp->Flags & IRP_NOCACHE)) {
3422 
3423  _SEH2_TRY {
3424  if (!FileObject->PrivateCacheMap) {
3425  CC_FILE_SIZES ccfs;
3426 
3427  ccfs.AllocationSize = fcb->Header.AllocationSize;
3428  ccfs.FileSize = fcb->Header.FileSize;
3429  ccfs.ValidDataLength = fcb->Header.ValidDataLength;
3430 
3431  init_file_cache(FileObject, &ccfs);
3432  }
3433 
3434  if (IrpSp->MinorFunction & IRP_MN_MDL) {
3435  CcMdlRead(FileObject,&IrpSp->Parameters.Read.ByteOffset, length, &Irp->MdlAddress, &Irp->IoStatus);
3436  } else {
3437  if (fCcCopyReadEx) {
3438  TRACE("CcCopyReadEx(%p, %I64x, %lx, %u, %p, %p, %p)\n", FileObject, IrpSp->Parameters.Read.ByteOffset.QuadPart,
3439  length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread);
3440  TRACE("sizes = %I64x, %I64x, %I64x\n", fcb->Header.AllocationSize.QuadPart, fcb->Header.FileSize.QuadPart, fcb->Header.ValidDataLength.QuadPart);
3441  if (!fCcCopyReadEx(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread)) {
3442  TRACE("CcCopyReadEx could not wait\n");
3443 
3445  return STATUS_PENDING;
3446  }
3447  TRACE("CcCopyReadEx finished\n");
3448  } else {
3449  TRACE("CcCopyRead(%p, %I64x, %lx, %u, %p, %p)\n", FileObject, IrpSp->Parameters.Read.ByteOffset.QuadPart, length, wait, data, &Irp->IoStatus);
3450  TRACE("sizes = %I64x, %I64x, %I64x\n", fcb->Header.AllocationSize.QuadPart, fcb->Header.FileSize.QuadPart, fcb->Header.ValidDataLength.QuadPart);
3451  if (!CcCopyRead(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus)) {
3452  TRACE("CcCopyRead could not wait\n");
3453 
3455  return STATUS_PENDING;
3456  }
3457  TRACE("CcCopyRead finished\n");
3458  }
3459  }
3462  } _SEH2_END;
3463 
3464  if (NT_SUCCESS(Status)) {
3465  Status = Irp->IoStatus.Status;
3466  Irp->IoStatus.Information += addon;
3467  *bytes_read = (ULONG)Irp->IoStatus.Information;
3468  } else
3469  ERR("EXCEPTION - %08lx\n", Status);
3470 
3471  return Status;
3472  } else {
3473  NTSTATUS Status;
3474 
3475  if (!wait) {
3477  return STATUS_PENDING;
3478  }
3479 
3480  if (fcb->ads) {
3481  Status = read_stream(fcb, data, start, length, bytes_read);
3482 
3483  if (!NT_SUCCESS(Status))
3484  ERR("read_stream returned %08lx\n", Status);
3485  } else {
3486  Status = read_file(fcb, data, start, length, bytes_read, Irp);
3487 
3488  if (!NT_SUCCESS(Status))
3489  ERR("read_file returned %08lx\n", Status);
3490  }
3491 
3492  *bytes_read += addon;
3493  TRACE("read %lu bytes\n", *bytes_read);
3494 
3495  Irp->IoStatus.Information = *bytes_read;
3496 
3497  if (diskacc && Status != STATUS_PENDING) {
3498  PETHREAD thread = NULL;
3499 
3500  if (Irp->Tail.Overlay.Thread && !IoIsSystemThread(Irp->Tail.Overlay.Thread))
3501  thread = Irp->Tail.Overlay.Thread;
3502  else if (!IoIsSystemThread(PsGetCurrentThread()))
3506 
3507  if (thread)
3508  fPsUpdateDiskCounters(PsGetThreadProcess(thread), *bytes_read, 0, 1, 0, 0);
3509  }
3510 
3511  return Status;
3512  }
3513 }
3514