ReactOS  0.4.15-dev-5130-gc1c1279
read.c
Go to the documentation of this file.
1 /* Copyright (c) Mark Harmstone 2016-17
2  *
3  * This file is part of WinBtrfs.
4  *
5  * WinBtrfs is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public Licence as published by
7  * the Free Software Foundation, either version 3 of the Licence, or
8  * (at your option) any later version.
9  *
10  * WinBtrfs is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser General Public Licence for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public Licence
16  * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
17 
18 #include "btrfs_drv.h"
19 #include "xxhash.h"
20 #include "crc32c.h"
21 
28 };
29 
30 struct read_data_context;
31 
32 typedef struct {
35  bool rewrite;
43 
44 typedef struct {
47  chunk* c;
54  void* csum;
55  bool tree;
59 
60 extern bool diskacc;
64 
65 #define LZO_PAGE_SIZE 4096
66 
67 _Function_class_(IO_COMPLETION_ROUTINE)
68 static NTSTATUS __stdcall read_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
69  read_data_stripe* stripe = conptr;
71 
73 
74  stripe->iosb = Irp->IoStatus;
75 
76  if (NT_SUCCESS(Irp->IoStatus.Status))
78  else
79  stripe->status = ReadDataStatus_Error;
80 
81  if (InterlockedDecrement(&context->stripes_left) == 0)
82  KeSetEvent(&context->Event, 0, false);
83 
85 }
86 
88  void* csum2;
89 
90  csum2 = ExAllocatePoolWithTag(PagedPool, Vcb->csum_size * sectors, ALLOC_TAG);
91  if (!csum2) {
92  ERR("out of memory\n");
94  }
95 
96  do_calc_job(Vcb, data, sectors, csum2);
97 
98  if (RtlCompareMemory(csum2, csum, sectors * Vcb->csum_size) != sectors * Vcb->csum_size) {
99  ExFreePool(csum2);
100  return STATUS_CRC_ERROR;
101  }
102 
103  ExFreePool(csum2);
104 
105  return STATUS_SUCCESS;
106 }
107 
109  switch (Vcb->superblock.csum_type) {
110  case CSUM_TYPE_CRC32C:
111  *(uint32_t*)csum = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
112  break;
113 
114  case CSUM_TYPE_XXHASH:
115  *(uint64_t*)csum = XXH64((uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum), 0);
116  break;
117 
118  case CSUM_TYPE_SHA256:
119  calc_sha256(csum, &th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
120  break;
121 
122  case CSUM_TYPE_BLAKE2:
123  blake2b(csum, BLAKE2_HASH_SIZE, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
124  break;
125  }
126 }
127 
129  switch (Vcb->superblock.csum_type) {
130  case CSUM_TYPE_CRC32C: {
131  uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
132 
133  if (crc32 == *((uint32_t*)th->csum))
134  return true;
135 
136  WARN("hash was %08x, expected %08x\n", crc32, *((uint32_t*)th->csum));
137 
138  break;
139  }
140 
141  case CSUM_TYPE_XXHASH: {
142  uint64_t hash = XXH64((uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum), 0);
143 
144  if (hash == *((uint64_t*)th->csum))
145  return true;
146 
147  WARN("hash was %I64x, expected %I64x\n", hash, *((uint64_t*)th->csum));
148 
149  break;
150  }
151 
152  case CSUM_TYPE_SHA256: {
154 
155  calc_sha256(hash, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
156 
158  return true;
159 
160  WARN("hash was invalid\n");
161 
162  break;
163  }
164 
165  case CSUM_TYPE_BLAKE2: {
167 
168  blake2b(hash, sizeof(hash), (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
169 
171  return true;
172 
173  WARN("hash was invalid\n");
174 
175  break;
176  }
177  }
178 
179  return false;
180 }
181 
183  switch (Vcb->superblock.csum_type) {
184  case CSUM_TYPE_CRC32C:
185  *(uint32_t*)csum = ~calc_crc32c(0xffffffff, buf, Vcb->superblock.sector_size);
186  break;
187 
188  case CSUM_TYPE_XXHASH:
189  *(uint64_t*)csum = XXH64(buf, Vcb->superblock.sector_size, 0);
190  break;
191 
192  case CSUM_TYPE_SHA256:
193  calc_sha256(csum, buf, Vcb->superblock.sector_size);
194  break;
195 
196  case CSUM_TYPE_BLAKE2:
197  blake2b(csum, BLAKE2_HASH_SIZE, buf, Vcb->superblock.sector_size);
198  break;
199  }
200 }
201 
203  switch (Vcb->superblock.csum_type) {
204  case CSUM_TYPE_CRC32C: {
205  uint32_t crc32 = ~calc_crc32c(0xffffffff, buf, Vcb->superblock.sector_size);
206 
207  return *(uint32_t*)csum == crc32;
208  }
209 
210  case CSUM_TYPE_XXHASH: {
211  uint64_t hash = XXH64(buf, Vcb->superblock.sector_size, 0);
212 
213  return *(uint64_t*)csum == hash;
214  }
215 
216  case CSUM_TYPE_SHA256: {
218 
219  calc_sha256(hash, buf, Vcb->superblock.sector_size);
220 
222  }
223 
224  case CSUM_TYPE_BLAKE2: {
226 
227  blake2b(hash, sizeof(hash), buf, Vcb->superblock.sector_size);
228 
230  }
231  }
232 
233  return false;
234 }
235 
238  bool checksum_error = false;
239  uint16_t j, stripe = 0;
241  CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
242 
243  for (j = 0; j < ci->num_stripes; j++) {
244  if (context->stripes[j].status == ReadDataStatus_Error) {
245  WARN("stripe %u returned error %08lx\n", j, context->stripes[j].iosb.Status);
247  return context->stripes[j].iosb.Status;
248  } else if (context->stripes[j].status == ReadDataStatus_Success) {
249  stripe = j;
250  break;
251  }
252  }
253 
254  if (context->stripes[stripe].status != ReadDataStatus_Success)
255  return STATUS_INTERNAL_ERROR;
256 
257  if (context->tree) {
258  tree_header* th = (tree_header*)buf;
259 
260  if (th->address != context->address || !check_tree_checksum(Vcb, th)) {
261  checksum_error = true;
263  } else if (generation != 0 && th->generation != generation) {
264  checksum_error = true;
266  }
267  } else if (context->csum) {
268  Status = check_csum(Vcb, buf, (ULONG)context->stripes[stripe].Irp->IoStatus.Information / context->sector_size, context->csum);
269 
270  if (Status == STATUS_CRC_ERROR) {
271  checksum_error = true;
273  } else if (!NT_SUCCESS(Status)) {
274  ERR("check_csum returned %08lx\n", Status);
275  return Status;
276  }
277  }
278 
279  if (!checksum_error)
280  return STATUS_SUCCESS;
281 
282  if (ci->num_stripes == 1)
283  return STATUS_CRC_ERROR;
284 
285  if (context->tree) {
286  tree_header* t2;
287  bool recovered = false;
288 
289  t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
290  if (!t2) {
291  ERR("out of memory\n");
293  }
294 
295  for (j = 0; j < ci->num_stripes; j++) {
296  if (j != stripe && devices[j] && devices[j]->devobj) {
297  Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + context->stripes[stripe].stripestart,
298  Vcb->superblock.node_size, (uint8_t*)t2, false);
299  if (!NT_SUCCESS(Status)) {
300  WARN("sync_read_phys returned %08lx\n", Status);
302  } else {
303  bool checksum_error = !check_tree_checksum(Vcb, t2);
304 
305  if (t2->address == addr && !checksum_error && (generation == 0 || t2->generation == generation)) {
306  RtlCopyMemory(buf, t2, Vcb->superblock.node_size);
307  ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id);
308  recovered = true;
309 
310  if (!Vcb->readonly && !devices[stripe]->readonly) { // write good data over bad
311  Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj, cis[stripe].offset + context->stripes[stripe].stripestart,
312  t2, Vcb->superblock.node_size);
313  if (!NT_SUCCESS(Status)) {
314  WARN("write_data_phys returned %08lx\n", Status);
316  }
317  }
318 
319  break;
320  } else if (t2->address != addr || checksum_error)
322  else
324  }
325  }
326  }
327 
328  if (!recovered) {
329  ERR("unrecoverable checksum error at %I64x\n", addr);
330  ExFreePool(t2);
331  return STATUS_CRC_ERROR;
332  }
333 
334  ExFreePool(t2);
335  } else {
336  ULONG sectors = (ULONG)context->stripes[stripe].Irp->IoStatus.Information >> Vcb->sector_shift;
337  uint8_t* sector;
338  void* ptr = context->csum;
339 
340  sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size, ALLOC_TAG);
341  if (!sector) {
342  ERR("out of memory\n");
344  }
345 
346  for (ULONG i = 0; i < sectors; i++) {
347  if (!check_sector_csum(Vcb, buf + (i << Vcb->sector_shift), ptr)) {
348  bool recovered = false;
349 
350  for (j = 0; j < ci->num_stripes; j++) {
351  if (j != stripe && devices[j] && devices[j]->devobj) {
352  Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj,
353  cis[j].offset + context->stripes[stripe].stripestart + ((uint64_t)i << Vcb->sector_shift),
354  Vcb->superblock.sector_size, sector, false);
355  if (!NT_SUCCESS(Status)) {
356  WARN("sync_read_phys returned %08lx\n", Status);
358  } else {
359  if (check_sector_csum(Vcb, sector, ptr)) {
360  RtlCopyMemory(buf + (i << Vcb->sector_shift), sector, Vcb->superblock.sector_size);
361  ERR("recovering from checksum error at %I64x, device %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift), devices[stripe]->devitem.dev_id);
362  recovered = true;
363 
364  if (!Vcb->readonly && !devices[stripe]->readonly) { // write good data over bad
365  Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj,
366  cis[stripe].offset + context->stripes[stripe].stripestart + ((uint64_t)i << Vcb->sector_shift),
367  sector, Vcb->superblock.sector_size);
368  if (!NT_SUCCESS(Status)) {
369  WARN("write_data_phys returned %08lx\n", Status);
371  }
372  }
373 
374  break;
375  } else
377  }
378  }
379  }
380 
381  if (!recovered) {
382  ERR("unrecoverable checksum error at %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift));
384  return STATUS_CRC_ERROR;
385  }
386  }
387 
388  ptr = (uint8_t*)ptr + Vcb->csum_size;
389  }
390 
392  }
393 
394  return STATUS_SUCCESS;
395 }
396 
399  for (uint16_t i = 0; i < ci->num_stripes; i++) {
400  if (context->stripes[i].status == ReadDataStatus_Error) {
401  WARN("stripe %u returned error %08lx\n", i, context->stripes[i].iosb.Status);
403  return context->stripes[i].iosb.Status;
404  }
405  }
406 
407  if (context->tree) { // shouldn't happen, as trees shouldn't cross stripe boundaries
408  tree_header* th = (tree_header*)buf;
409  bool checksum_error = !check_tree_checksum(Vcb, th);
410 
411  if (checksum_error || addr != th->address || (generation != 0 && generation != th->generation)) {
412  uint64_t off;
414 
416 
417  ERR("unrecoverable checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id);
418 
419  if (checksum_error) {
421  return STATUS_CRC_ERROR;
422  } else if (addr != th->address) {
423  WARN("address of tree was %I64x, not %I64x as expected\n", th->address, addr);
425  return STATUS_CRC_ERROR;
426  } else if (generation != 0 && generation != th->generation) {
427  WARN("generation of tree was %I64x, not %I64x as expected\n", th->generation, generation);
429  return STATUS_CRC_ERROR;
430  }
431  }
432  } else if (context->csum) {
434 
435  Status = check_csum(Vcb, buf, length >> Vcb->sector_shift, context->csum);
436 
437  if (Status == STATUS_CRC_ERROR) {
438  void* ptr = context->csum;
439 
440  for (uint32_t i = 0; i < length >> Vcb->sector_shift; i++) {
441  if (!check_sector_csum(Vcb, buf + (i << Vcb->sector_shift), ptr)) {
442  uint64_t off;
444 
445  get_raid0_offset(addr - offset + ((uint64_t)i << Vcb->sector_shift), ci->stripe_length, ci->num_stripes, &off, &stripe);
446 
447  ERR("unrecoverable checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id);
448 
450 
451  return Status;
452  }
453 
454  ptr = (uint8_t*)ptr + Vcb->csum_size;
455  }
456 
457  return Status;
458  } else if (!NT_SUCCESS(Status)) {
459  ERR("check_csum returned %08lx\n", Status);
460  return Status;
461  }
462  }
463 
464  return STATUS_SUCCESS;
465 }
466 
469  uint16_t stripe = 0;
471  bool checksum_error = false;
472  CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
473 
474  for (uint16_t j = 0; j < ci->num_stripes; j++) {
475  if (context->stripes[j].status == ReadDataStatus_Error) {
476  WARN("stripe %u returned error %08lx\n", j, context->stripes[j].iosb.Status);
478  return context->stripes[j].iosb.Status;
479  } else if (context->stripes[j].status == ReadDataStatus_Success)
480  stripe = j;
481  }
482 
483  if (context->tree) {
484  tree_header* th = (tree_header*)buf;
485 
486  if (!check_tree_checksum(Vcb, th)) {
487  checksum_error = true;
489  } else if (addr != th->address) {
490  WARN("address of tree was %I64x, not %I64x as expected\n", th->address, addr);
491  checksum_error = true;
493  } else if (generation != 0 && generation != th->generation) {
494  WARN("generation of tree was %I64x, not %I64x as expected\n", th->generation, generation);
495  checksum_error = true;
497  }
498  } else if (context->csum) {
499  Status = check_csum(Vcb, buf, length >> Vcb->sector_shift, context->csum);
500 
501  if (Status == STATUS_CRC_ERROR)
502  checksum_error = true;
503  else if (!NT_SUCCESS(Status)) {
504  ERR("check_csum returned %08lx\n", Status);
505  return Status;
506  }
507  }
508 
509  if (!checksum_error)
510  return STATUS_SUCCESS;
511 
512  if (context->tree) {
513  tree_header* t2;
514  uint64_t off;
515  uint16_t badsubstripe = 0;
516  bool recovered = false;
517 
518  t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
519  if (!t2) {
520  ERR("out of memory\n");
522  }
523 
525 
526  stripe *= ci->sub_stripes;
527 
528  for (uint16_t j = 0; j < ci->sub_stripes; j++) {
529  if (context->stripes[stripe + j].status == ReadDataStatus_Success) {
530  badsubstripe = j;
531  break;
532  }
533  }
534 
535  for (uint16_t j = 0; j < ci->sub_stripes; j++) {
536  if (context->stripes[stripe + j].status != ReadDataStatus_Success && devices[stripe + j] && devices[stripe + j]->devobj) {
537  Status = sync_read_phys(devices[stripe + j]->devobj, devices[stripe + j]->fileobj, cis[stripe + j].offset + off,
538  Vcb->superblock.node_size, (uint8_t*)t2, false);
539  if (!NT_SUCCESS(Status)) {
540  WARN("sync_read_phys returned %08lx\n", Status);
542  } else {
543  bool checksum_error = !check_tree_checksum(Vcb, t2);
544 
545  if (t2->address == addr && !checksum_error && (generation == 0 || t2->generation == generation)) {
546  RtlCopyMemory(buf, t2, Vcb->superblock.node_size);
547  ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[stripe + j]->devitem.dev_id);
548  recovered = true;
549 
550  if (!Vcb->readonly && !devices[stripe + badsubstripe]->readonly && devices[stripe + badsubstripe]->devobj) { // write good data over bad
551  Status = write_data_phys(devices[stripe + badsubstripe]->devobj, devices[stripe + badsubstripe]->fileobj,
552  cis[stripe + badsubstripe].offset + off, t2, Vcb->superblock.node_size);
553  if (!NT_SUCCESS(Status)) {
554  WARN("write_data_phys returned %08lx\n", Status);
556  }
557  }
558 
559  break;
560  } else if (t2->address != addr || checksum_error)
562  else
564  }
565  }
566  }
567 
568  if (!recovered) {
569  ERR("unrecoverable checksum error at %I64x\n", addr);
570  ExFreePool(t2);
571  return STATUS_CRC_ERROR;
572  }
573 
574  ExFreePool(t2);
575  } else {
576  ULONG sectors = length >> Vcb->sector_shift;
577  uint8_t* sector;
578  void* ptr = context->csum;
579 
580  sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size, ALLOC_TAG);
581  if (!sector) {
582  ERR("out of memory\n");
584  }
585 
586  for (ULONG i = 0; i < sectors; i++) {
587  if (!check_sector_csum(Vcb, buf + (i << Vcb->sector_shift), ptr)) {
588  uint64_t off;
589  uint16_t stripe2, badsubstripe = 0;
590  bool recovered = false;
591 
592  get_raid0_offset(addr - offset + ((uint64_t)i << Vcb->sector_shift), ci->stripe_length,
593  ci->num_stripes / ci->sub_stripes, &off, &stripe2);
594 
595  stripe2 *= ci->sub_stripes;
596 
597  for (uint16_t j = 0; j < ci->sub_stripes; j++) {
598  if (context->stripes[stripe2 + j].status == ReadDataStatus_Success) {
599  badsubstripe = j;
600  break;
601  }
602  }
603 
605 
606  for (uint16_t j = 0; j < ci->sub_stripes; j++) {
607  if (context->stripes[stripe2 + j].status != ReadDataStatus_Success && devices[stripe2 + j] && devices[stripe2 + j]->devobj) {
608  Status = sync_read_phys(devices[stripe2 + j]->devobj, devices[stripe2 + j]->fileobj, cis[stripe2 + j].offset + off,
609  Vcb->superblock.sector_size, sector, false);
610  if (!NT_SUCCESS(Status)) {
611  WARN("sync_read_phys returned %08lx\n", Status);
613  } else {
614  if (check_sector_csum(Vcb, sector, ptr)) {
615  RtlCopyMemory(buf + (i << Vcb->sector_shift), sector, Vcb->superblock.sector_size);
616  ERR("recovering from checksum error at %I64x, device %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift), devices[stripe2 + j]->devitem.dev_id);
617  recovered = true;
618 
619  if (!Vcb->readonly && !devices[stripe2 + badsubstripe]->readonly && devices[stripe2 + badsubstripe]->devobj) { // write good data over bad
620  Status = write_data_phys(devices[stripe2 + badsubstripe]->devobj, devices[stripe2 + badsubstripe]->fileobj,
621  cis[stripe2 + badsubstripe].offset + off, sector, Vcb->superblock.sector_size);
622  if (!NT_SUCCESS(Status)) {
623  WARN("write_data_phys returned %08lx\n", Status);
624  log_device_error(Vcb, devices[stripe2 + badsubstripe], BTRFS_DEV_STAT_READ_ERRORS);
625  }
626  }
627 
628  break;
629  } else
631  }
632  }
633  }
634 
635  if (!recovered) {
636  ERR("unrecoverable checksum error at %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift));
638  return STATUS_CRC_ERROR;
639  }
640  }
641 
642  ptr = (uint8_t*)ptr + Vcb->csum_size;
643  }
644 
646  }
647 
648  return STATUS_SUCCESS;
649 }
650 
652  device** devices, uint64_t offset, uint64_t generation, chunk* c, bool degraded) {
654  bool checksum_error = false;
655  CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
656  uint16_t j, stripe = 0;
657  bool no_success = true;
658 
659  for (j = 0; j < ci->num_stripes; j++) {
660  if (context->stripes[j].status == ReadDataStatus_Error) {
661  WARN("stripe %u returned error %08lx\n", j, context->stripes[j].iosb.Status);
663  return context->stripes[j].iosb.Status;
664  } else if (context->stripes[j].status == ReadDataStatus_Success) {
665  stripe = j;
666  no_success = false;
667  }
668  }
669 
670  if (c) { // check partial stripes
671  LIST_ENTRY* le;
672  uint64_t ps_length = (ci->num_stripes - 1) * ci->stripe_length;
673 
674  ExAcquireResourceSharedLite(&c->partial_stripes_lock, true);
675 
676  le = c->partial_stripes.Flink;
677  while (le != &c->partial_stripes) {
679 
680  if (ps->address + ps_length > addr && ps->address < addr + length) {
681  ULONG runlength, index;
682 
683  runlength = RtlFindFirstRunClear(&ps->bmp, &index);
684 
685  while (runlength != 0) {
686  if (index >= ps->bmplen)
687  break;
688 
689  if (index + runlength >= ps->bmplen) {
690  runlength = ps->bmplen - index;
691 
692  if (runlength == 0)
693  break;
694  }
695 
696  uint64_t runstart = ps->address + (index << Vcb->sector_shift);
697  uint64_t runend = runstart + (runlength << Vcb->sector_shift);
698  uint64_t start = max(runstart, addr);
699  uint64_t end = min(runend, addr + length);
700 
701  if (end > start)
702  RtlCopyMemory(buf + start - addr, &ps->data[start - ps->address], (ULONG)(end - start));
703 
704  runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index);
705  }
706  } else if (ps->address >= addr + length)
707  break;
708 
709  le = le->Flink;
710  }
711 
712  ExReleaseResourceLite(&c->partial_stripes_lock);
713  }
714 
715  if (context->tree) {
716  tree_header* th = (tree_header*)buf;
717 
718  if (addr != th->address || !check_tree_checksum(Vcb, th)) {
719  checksum_error = true;
720  if (!no_success && !degraded)
722  } else if (generation != 0 && generation != th->generation) {
723  checksum_error = true;
724  if (!no_success && !degraded)
726  }
727  } else if (context->csum) {
728  Status = check_csum(Vcb, buf, length >> Vcb->sector_shift, context->csum);
729 
730  if (Status == STATUS_CRC_ERROR) {
731  if (!degraded)
732  WARN("checksum error\n");
733  checksum_error = true;
734  } else if (!NT_SUCCESS(Status)) {
735  ERR("check_csum returned %08lx\n", Status);
736  return Status;
737  }
738  } else if (degraded)
739  checksum_error = true;
740 
741  if (!checksum_error)
742  return STATUS_SUCCESS;
743 
744  if (context->tree) {
745  uint16_t parity;
746  uint64_t off;
747  bool recovered = false, first = true, failed = false;
748  uint8_t* t2;
749 
750  t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size * 2, ALLOC_TAG);
751  if (!t2) {
752  ERR("out of memory\n");
754  }
755 
756  get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 1, &off, &stripe);
757 
758  parity = (((addr - offset) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
759 
760  stripe = (parity + stripe + 1) % ci->num_stripes;
761 
762  for (j = 0; j < ci->num_stripes; j++) {
763  if (j != stripe) {
764  if (devices[j] && devices[j]->devobj) {
765  if (first) {
766  Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.node_size, t2, false);
767  if (!NT_SUCCESS(Status)) {
768  ERR("sync_read_phys returned %08lx\n", Status);
770  failed = true;
771  break;
772  }
773 
774  first = false;
775  } else {
776  Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.node_size, t2 + Vcb->superblock.node_size, false);
777  if (!NT_SUCCESS(Status)) {
778  ERR("sync_read_phys returned %08lx\n", Status);
780  failed = true;
781  break;
782  }
783 
784  do_xor(t2, t2 + Vcb->superblock.node_size, Vcb->superblock.node_size);
785  }
786  } else {
787  failed = true;
788  break;
789  }
790  }
791  }
792 
793  if (!failed) {
794  tree_header* t3 = (tree_header*)t2;
795 
796  if (t3->address == addr && check_tree_checksum(Vcb, t3) && (generation == 0 || t3->generation == generation)) {
797  RtlCopyMemory(buf, t2, Vcb->superblock.node_size);
798 
799  if (!degraded)
800  ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id);
801 
802  recovered = true;
803 
804  if (!Vcb->readonly && devices[stripe] && !devices[stripe]->readonly && devices[stripe]->devobj) { // write good data over bad
805  Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj, cis[stripe].offset + off, t2, Vcb->superblock.node_size);
806  if (!NT_SUCCESS(Status)) {
807  WARN("write_data_phys returned %08lx\n", Status);
809  }
810  }
811  }
812  }
813 
814  if (!recovered) {
815  ERR("unrecoverable checksum error at %I64x\n", addr);
816  ExFreePool(t2);
817  return STATUS_CRC_ERROR;
818  }
819 
820  ExFreePool(t2);
821  } else {
822  ULONG sectors = length >> Vcb->sector_shift;
823  uint8_t* sector;
824  void* ptr = context->csum;
825 
826  sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size * 2, ALLOC_TAG);
827  if (!sector) {
828  ERR("out of memory\n");
830  }
831 
832  for (ULONG i = 0; i < sectors; i++) {
833  uint16_t parity;
834  uint64_t off;
835 
836  get_raid0_offset(addr - offset + ((uint64_t)i << Vcb->sector_shift), ci->stripe_length,
837  ci->num_stripes - 1, &off, &stripe);
838 
839  parity = (((addr - offset + ((uint64_t)i << Vcb->sector_shift)) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
840 
841  stripe = (parity + stripe + 1) % ci->num_stripes;
842 
843  if (!devices[stripe] || !devices[stripe]->devobj || (ptr && !check_sector_csum(Vcb, buf + (i << Vcb->sector_shift), ptr))) {
844  bool recovered = false, first = true, failed = false;
845 
846  if (devices[stripe] && devices[stripe]->devobj)
848 
849  for (j = 0; j < ci->num_stripes; j++) {
850  if (j != stripe) {
851  if (devices[j] && devices[j]->devobj) {
852  if (first) {
853  Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.sector_size, sector, false);
854  if (!NT_SUCCESS(Status)) {
855  ERR("sync_read_phys returned %08lx\n", Status);
856  failed = true;
858  break;
859  }
860 
861  first = false;
862  } else {
863  Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.sector_size,
864  sector + Vcb->superblock.sector_size, false);
865  if (!NT_SUCCESS(Status)) {
866  ERR("sync_read_phys returned %08lx\n", Status);
867  failed = true;
869  break;
870  }
871 
872  do_xor(sector, sector + Vcb->superblock.sector_size, Vcb->superblock.sector_size);
873  }
874  } else {
875  failed = true;
876  break;
877  }
878  }
879  }
880 
881  if (!failed) {
882  if (!ptr || check_sector_csum(Vcb, sector, ptr)) {
883  RtlCopyMemory(buf + (i << Vcb->sector_shift), sector, Vcb->superblock.sector_size);
884 
885  if (!degraded)
886  ERR("recovering from checksum error at %I64x, device %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift), devices[stripe]->devitem.dev_id);
887 
888  recovered = true;
889 
890  if (!Vcb->readonly && devices[stripe] && !devices[stripe]->readonly && devices[stripe]->devobj) { // write good data over bad
891  Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj, cis[stripe].offset + off,
892  sector, Vcb->superblock.sector_size);
893  if (!NT_SUCCESS(Status)) {
894  WARN("write_data_phys returned %08lx\n", Status);
896  }
897  }
898  }
899  }
900 
901  if (!recovered) {
902  ERR("unrecoverable checksum error at %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift));
904  return STATUS_CRC_ERROR;
905  }
906  }
907 
908  if (ptr)
909  ptr = (uint8_t*)ptr + Vcb->csum_size;
910  }
911 
913  }
914 
915  return STATUS_SUCCESS;
916 }
917 
918 void raid6_recover2(uint8_t* sectors, uint16_t num_stripes, ULONG sector_size, uint16_t missing1, uint16_t missing2, uint8_t* out) {
919  if (missing1 == num_stripes - 2 || missing2 == num_stripes - 2) { // reconstruct from q and data
920  uint16_t missing = missing1 == (num_stripes - 2) ? missing2 : missing1;
922 
923  stripe = num_stripes - 3;
924 
925  if (stripe == missing)
927  else
929 
930  do {
931  stripe--;
932 
934 
935  if (stripe != missing)
937  } while (stripe > 0);
938 
939  do_xor(out, sectors + ((num_stripes - 1) * sector_size), sector_size);
940 
941  if (missing != 0)
943  } else { // reconstruct from p and q
944  uint16_t x = missing1, y = missing2, stripe;
945  uint8_t gyx, gx, denom, a, b, *p, *q, *pxy, *qxy;
946  uint32_t j;
947 
948  stripe = num_stripes - 3;
949 
950  pxy = out + sector_size;
951  qxy = out;
952 
953  if (stripe == missing1 || stripe == missing2) {
956  } else {
959  }
960 
961  do {
962  stripe--;
963 
965 
966  if (stripe != missing1 && stripe != missing2) {
969  }
970  } while (stripe > 0);
971 
972  gyx = gpow2(y > x ? (y-x) : (255-x+y));
973  gx = gpow2(255-x);
974 
975  denom = gdiv(1, gyx ^ 1);
976  a = gmul(gyx, denom);
977  b = gmul(gx, denom);
978 
979  p = sectors + ((num_stripes - 2) * sector_size);
980  q = sectors + ((num_stripes - 1) * sector_size);
981 
982  for (j = 0; j < sector_size; j++) {
983  *qxy = gmul(a, *p ^ *pxy) ^ gmul(b, *q ^ *qxy);
984 
985  p++;
986  q++;
987  pxy++;
988  qxy++;
989  }
990 
992  do_xor(out + sector_size, sectors + ((num_stripes - 2) * sector_size), sector_size);
993  }
994 }
995 
997  device** devices, uint64_t offset, uint64_t generation, chunk* c, bool degraded) {
999  bool checksum_error = false;
1000  CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
1001  uint16_t stripe = 0, j;
1002  bool no_success = true;
1003 
1004  for (j = 0; j < ci->num_stripes; j++) {
1005  if (context->stripes[j].status == ReadDataStatus_Error) {
1006  WARN("stripe %u returned error %08lx\n", j, context->stripes[j].iosb.Status);
1007 
1008  if (devices[j])
1010  return context->stripes[j].iosb.Status;
1011  } else if (context->stripes[j].status == ReadDataStatus_Success) {
1012  stripe = j;
1013  no_success = false;
1014  }
1015  }
1016 
1017  if (c) { // check partial stripes
1018  LIST_ENTRY* le;
1019  uint64_t ps_length = (ci->num_stripes - 2) * ci->stripe_length;
1020 
1021  ExAcquireResourceSharedLite(&c->partial_stripes_lock, true);
1022 
1023  le = c->partial_stripes.Flink;
1024  while (le != &c->partial_stripes) {
1026 
1027  if (ps->address + ps_length > addr && ps->address < addr + length) {
1028  ULONG runlength, index;
1029 
1030  runlength = RtlFindFirstRunClear(&ps->bmp, &index);
1031 
1032  while (runlength != 0) {
1033  if (index >= ps->bmplen)
1034  break;
1035 
1036  if (index + runlength >= ps->bmplen) {
1037  runlength = ps->bmplen - index;
1038 
1039  if (runlength == 0)
1040  break;
1041  }
1042 
1043  uint64_t runstart = ps->address + (index << Vcb->sector_shift);
1044  uint64_t runend = runstart + (runlength << Vcb->sector_shift);
1045  uint64_t start = max(runstart, addr);
1046  uint64_t end = min(runend, addr + length);
1047 
1048  if (end > start)
1049  RtlCopyMemory(buf + start - addr, &ps->data[start - ps->address], (ULONG)(end - start));
1050 
1051  runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index);
1052  }
1053  } else if (ps->address >= addr + length)
1054  break;
1055 
1056  le = le->Flink;
1057  }
1058 
1059  ExReleaseResourceLite(&c->partial_stripes_lock);
1060  }
1061 
1062  if (context->tree) {
1063  tree_header* th = (tree_header*)buf;
1064 
1065  if (addr != th->address || !check_tree_checksum(Vcb, th)) {
1066  checksum_error = true;
1067  if (!no_success && !degraded && devices[stripe])
1069  } else if (generation != 0 && generation != th->generation) {
1070  checksum_error = true;
1071  if (!no_success && !degraded && devices[stripe])
1073  }
1074  } else if (context->csum) {
1075  Status = check_csum(Vcb, buf, length >> Vcb->sector_shift, context->csum);
1076 
1077  if (Status == STATUS_CRC_ERROR) {
1078  if (!degraded)
1079  WARN("checksum error\n");
1080  checksum_error = true;
1081  } else if (!NT_SUCCESS(Status)) {
1082  ERR("check_csum returned %08lx\n", Status);
1083  return Status;
1084  }
1085  } else if (degraded)
1086  checksum_error = true;
1087 
1088  if (!checksum_error)
1089  return STATUS_SUCCESS;
1090 
1091  if (context->tree) {
1092  uint8_t* sector;
1093  uint16_t k, physstripe, parity1, parity2, error_stripe = 0;
1094  uint64_t off;
1095  bool recovered = false, failed = false;
1096  ULONG num_errors = 0;
1097 
1098  sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size * (ci->num_stripes + 2), ALLOC_TAG);
1099  if (!sector) {
1100  ERR("out of memory\n");
1102  }
1103 
1104  get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 2, &off, &stripe);
1105 
1106  parity1 = (((addr - offset) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
1107  parity2 = (parity1 + 1) % ci->num_stripes;
1108 
1109  physstripe = (parity2 + stripe + 1) % ci->num_stripes;
1110 
1111  j = (parity2 + 1) % ci->num_stripes;
1112 
1113  for (k = 0; k < ci->num_stripes - 1; k++) {
1114  if (j != physstripe) {
1115  if (devices[j] && devices[j]->devobj) {
1116  Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.node_size,
1117  sector + (k * Vcb->superblock.node_size), false);
1118  if (!NT_SUCCESS(Status)) {
1119  ERR("sync_read_phys returned %08lx\n", Status);
1121  num_errors++;
1122  error_stripe = k;
1123 
1124  if (num_errors > 1) {
1125  failed = true;
1126  break;
1127  }
1128  }
1129  } else {
1130  num_errors++;
1131  error_stripe = k;
1132 
1133  if (num_errors > 1) {
1134  failed = true;
1135  break;
1136  }
1137  }
1138  }
1139 
1140  j = (j + 1) % ci->num_stripes;
1141  }
1142 
1143  if (!failed) {
1144  if (num_errors == 0) {
1145  tree_header* th = (tree_header*)(sector + (stripe * Vcb->superblock.node_size));
1146 
1147  RtlCopyMemory(sector + (stripe * Vcb->superblock.node_size), sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size),
1148  Vcb->superblock.node_size);
1149 
1150  for (j = 0; j < ci->num_stripes - 2; j++) {
1151  if (j != stripe)
1152  do_xor(sector + (stripe * Vcb->superblock.node_size), sector + (j * Vcb->superblock.node_size), Vcb->superblock.node_size);
1153  }
1154 
1155  if (th->address == addr && check_tree_checksum(Vcb, th) && (generation == 0 || th->generation == generation)) {
1156  RtlCopyMemory(buf, sector + (stripe * Vcb->superblock.node_size), Vcb->superblock.node_size);
1157 
1158  if (devices[physstripe] && devices[physstripe]->devobj)
1159  ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[physstripe]->devitem.dev_id);
1160 
1161  recovered = true;
1162 
1163  if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1164  Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off,
1165  sector + (stripe * Vcb->superblock.node_size), Vcb->superblock.node_size);
1166  if (!NT_SUCCESS(Status)) {
1167  WARN("write_data_phys returned %08lx\n", Status);
1169  }
1170  }
1171  }
1172  }
1173 
1174  if (!recovered) {
1175  tree_header* th = (tree_header*)(sector + (ci->num_stripes * Vcb->superblock.node_size));
1176  bool read_q = false;
1177 
1178  if (devices[parity2] && devices[parity2]->devobj) {
1179  Status = sync_read_phys(devices[parity2]->devobj, devices[parity2]->fileobj, cis[parity2].offset + off,
1180  Vcb->superblock.node_size, sector + ((ci->num_stripes - 1) * Vcb->superblock.node_size), false);
1181  if (!NT_SUCCESS(Status)) {
1182  ERR("sync_read_phys returned %08lx\n", Status);
1184  } else
1185  read_q = true;
1186  }
1187 
1188  if (read_q) {
1189  if (num_errors == 1) {
1190  raid6_recover2(sector, ci->num_stripes, Vcb->superblock.node_size, stripe, error_stripe, sector + (ci->num_stripes * Vcb->superblock.node_size));
1191 
1192  if (th->address == addr && check_tree_checksum(Vcb, th) && (generation == 0 || th->generation == generation))
1193  recovered = true;
1194  } else {
1195  for (j = 0; j < ci->num_stripes - 1; j++) {
1196  if (j != stripe) {
1197  raid6_recover2(sector, ci->num_stripes, Vcb->superblock.node_size, stripe, j, sector + (ci->num_stripes * Vcb->superblock.node_size));
1198 
1199  if (th->address == addr && check_tree_checksum(Vcb, th) && (generation == 0 || th->generation == generation)) {
1200  recovered = true;
1201  error_stripe = j;
1202  break;
1203  }
1204  }
1205  }
1206  }
1207  }
1208 
1209  if (recovered) {
1210  uint16_t error_stripe_phys = (parity2 + error_stripe + 1) % ci->num_stripes;
1211 
1212  if (devices[physstripe] && devices[physstripe]->devobj)
1213  ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[physstripe]->devitem.dev_id);
1214 
1215  RtlCopyMemory(buf, sector + (ci->num_stripes * Vcb->superblock.node_size), Vcb->superblock.node_size);
1216 
1217  if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1218  Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off,
1219  sector + (ci->num_stripes * Vcb->superblock.node_size), Vcb->superblock.node_size);
1220  if (!NT_SUCCESS(Status)) {
1221  WARN("write_data_phys returned %08lx\n", Status);
1223  }
1224  }
1225 
1226  if (devices[error_stripe_phys] && devices[error_stripe_phys]->devobj) {
1227  if (error_stripe == ci->num_stripes - 2) {
1228  ERR("recovering from parity error at %I64x, device %I64x\n", addr, devices[error_stripe_phys]->devitem.dev_id);
1229 
1231 
1232  RtlZeroMemory(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), Vcb->superblock.node_size);
1233 
1234  for (j = 0; j < ci->num_stripes - 2; j++) {
1235  if (j == stripe) {
1236  do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), sector + (ci->num_stripes * Vcb->superblock.node_size),
1237  Vcb->superblock.node_size);
1238  } else {
1239  do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), sector + (j * Vcb->superblock.node_size),
1240  Vcb->superblock.node_size);
1241  }
1242  }
1243  } else {
1244  ERR("recovering from checksum error at %I64x, device %I64x\n", addr + ((error_stripe - stripe) * ci->stripe_length),
1245  devices[error_stripe_phys]->devitem.dev_id);
1246 
1248 
1249  RtlCopyMemory(sector + (error_stripe * Vcb->superblock.node_size),
1250  sector + ((ci->num_stripes + 1) * Vcb->superblock.node_size), Vcb->superblock.node_size);
1251  }
1252  }
1253 
1254  if (!Vcb->readonly && devices[error_stripe_phys] && devices[error_stripe_phys]->devobj && !devices[error_stripe_phys]->readonly) { // write good data over bad
1255  Status = write_data_phys(devices[error_stripe_phys]->devobj, devices[error_stripe_phys]->fileobj, cis[error_stripe_phys].offset + off,
1256  sector + (error_stripe * Vcb->superblock.node_size), Vcb->superblock.node_size);
1257  if (!NT_SUCCESS(Status)) {
1258  WARN("write_data_phys returned %08lx\n", Status);
1260  }
1261  }
1262  }
1263  }
1264  }
1265 
1266  if (!recovered) {
1267  ERR("unrecoverable checksum error at %I64x\n", addr);
1268  ExFreePool(sector);
1269  return STATUS_CRC_ERROR;
1270  }
1271 
1272  ExFreePool(sector);
1273  } else {
1274  ULONG sectors = length >> Vcb->sector_shift;
1275  uint8_t* sector;
1276  void* ptr = context->csum;
1277 
1278  sector = ExAllocatePoolWithTag(NonPagedPool, (ci->num_stripes + 2) << Vcb->sector_shift, ALLOC_TAG);
1279  if (!sector) {
1280  ERR("out of memory\n");
1282  }
1283 
1284  for (ULONG i = 0; i < sectors; i++) {
1285  uint64_t off;
1286  uint16_t physstripe, parity1, parity2;
1287 
1288  get_raid0_offset(addr - offset + ((uint64_t)i << Vcb->sector_shift), ci->stripe_length,
1289  ci->num_stripes - 2, &off, &stripe);
1290 
1291  parity1 = (((addr - offset + ((uint64_t)i << Vcb->sector_shift)) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
1292  parity2 = (parity1 + 1) % ci->num_stripes;
1293 
1294  physstripe = (parity2 + stripe + 1) % ci->num_stripes;
1295 
1296  if (!devices[physstripe] || !devices[physstripe]->devobj || (context->csum && !check_sector_csum(Vcb, buf + (i << Vcb->sector_shift), ptr))) {
1297  uint16_t error_stripe = 0;
1298  bool recovered = false, failed = false;
1299  ULONG num_errors = 0;
1300 
1301  if (devices[physstripe] && devices[physstripe]->devobj)
1303 
1304  j = (parity2 + 1) % ci->num_stripes;
1305 
1306  for (uint16_t k = 0; k < ci->num_stripes - 1; k++) {
1307  if (j != physstripe) {
1308  if (devices[j] && devices[j]->devobj) {
1309  Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.sector_size,
1310  sector + ((ULONG)k << Vcb->sector_shift), false);
1311  if (!NT_SUCCESS(Status)) {
1312  ERR("sync_read_phys returned %08lx\n", Status);
1314  num_errors++;
1315  error_stripe = k;
1316 
1317  if (num_errors > 1) {
1318  failed = true;
1319  break;
1320  }
1321  }
1322  } else {
1323  num_errors++;
1324  error_stripe = k;
1325 
1326  if (num_errors > 1) {
1327  failed = true;
1328  break;
1329  }
1330  }
1331  }
1332 
1333  j = (j + 1) % ci->num_stripes;
1334  }
1335 
1336  if (!failed) {
1337  if (num_errors == 0) {
1338  RtlCopyMemory(sector + ((unsigned int)stripe << Vcb->sector_shift), sector + ((unsigned int)(ci->num_stripes - 2) << Vcb->sector_shift), Vcb->superblock.sector_size);
1339 
1340  for (j = 0; j < ci->num_stripes - 2; j++) {
1341  if (j != stripe)
1342  do_xor(sector + ((unsigned int)stripe << Vcb->sector_shift), sector + ((unsigned int)j << Vcb->sector_shift), Vcb->superblock.sector_size);
1343  }
1344 
1345  if (!ptr || check_sector_csum(Vcb, sector + ((unsigned int)stripe << Vcb->sector_shift), ptr)) {
1346  RtlCopyMemory(buf + (i << Vcb->sector_shift), sector + ((unsigned int)stripe << Vcb->sector_shift), Vcb->superblock.sector_size);
1347 
1348  if (devices[physstripe] && devices[physstripe]->devobj)
1349  ERR("recovering from checksum error at %I64x, device %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift),
1350  devices[physstripe]->devitem.dev_id);
1351 
1352  recovered = true;
1353 
1354  if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1355  Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off,
1356  sector + ((unsigned int)stripe << Vcb->sector_shift), Vcb->superblock.sector_size);
1357  if (!NT_SUCCESS(Status)) {
1358  WARN("write_data_phys returned %08lx\n", Status);
1360  }
1361  }
1362  }
1363  }
1364 
1365  if (!recovered) {
1366  bool read_q = false;
1367 
1368  if (devices[parity2] && devices[parity2]->devobj) {
1369  Status = sync_read_phys(devices[parity2]->devobj, devices[parity2]->fileobj, cis[parity2].offset + off,
1370  Vcb->superblock.sector_size, sector + ((unsigned int)(ci->num_stripes - 1) << Vcb->sector_shift), false);
1371  if (!NT_SUCCESS(Status)) {
1372  ERR("sync_read_phys returned %08lx\n", Status);
1374  } else
1375  read_q = true;
1376  }
1377 
1378  if (read_q) {
1379  if (num_errors == 1) {
1380  raid6_recover2(sector, ci->num_stripes, Vcb->superblock.sector_size, stripe, error_stripe, sector + ((unsigned int)ci->num_stripes << Vcb->sector_shift));
1381 
1382  if (!devices[physstripe] || !devices[physstripe]->devobj)
1383  recovered = true;
1384  else
1385  recovered = check_sector_csum(Vcb, sector + ((unsigned int)ci->num_stripes << Vcb->sector_shift), ptr);
1386  } else {
1387  for (j = 0; j < ci->num_stripes - 1; j++) {
1388  if (j != stripe) {
1389  raid6_recover2(sector, ci->num_stripes, Vcb->superblock.sector_size, stripe, j, sector + ((unsigned int)ci->num_stripes << Vcb->sector_shift));
1390 
1391  if (check_sector_csum(Vcb, sector + ((unsigned int)ci->num_stripes << Vcb->sector_shift), ptr)) {
1392  recovered = true;
1393  error_stripe = j;
1394  break;
1395  }
1396  }
1397  }
1398  }
1399  }
1400 
1401  if (recovered) {
1402  uint16_t error_stripe_phys = (parity2 + error_stripe + 1) % ci->num_stripes;
1403 
1404  if (devices[physstripe] && devices[physstripe]->devobj)
1405  ERR("recovering from checksum error at %I64x, device %I64x\n",
1406  addr + ((uint64_t)i << Vcb->sector_shift), devices[physstripe]->devitem.dev_id);
1407 
1408  RtlCopyMemory(buf + (i << Vcb->sector_shift), sector + ((unsigned int)ci->num_stripes << Vcb->sector_shift), Vcb->superblock.sector_size);
1409 
1410  if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1411  Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off,
1412  sector + ((unsigned int)ci->num_stripes << Vcb->sector_shift), Vcb->superblock.sector_size);
1413  if (!NT_SUCCESS(Status)) {
1414  WARN("write_data_phys returned %08lx\n", Status);
1416  }
1417  }
1418 
1419  if (devices[error_stripe_phys] && devices[error_stripe_phys]->devobj) {
1420  if (error_stripe == ci->num_stripes - 2) {
1421  ERR("recovering from parity error at %I64x, device %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift),
1422  devices[error_stripe_phys]->devitem.dev_id);
1423 
1425 
1426  RtlZeroMemory(sector + ((unsigned int)(ci->num_stripes - 2) << Vcb->sector_shift), Vcb->superblock.sector_size);
1427 
1428  for (j = 0; j < ci->num_stripes - 2; j++) {
1429  if (j == stripe) {
1430  do_xor(sector + ((unsigned int)(ci->num_stripes - 2) << Vcb->sector_shift), sector + ((unsigned int)ci->num_stripes << Vcb->sector_shift),
1431  Vcb->superblock.sector_size);
1432  } else {
1433  do_xor(sector + ((unsigned int)(ci->num_stripes - 2) << Vcb->sector_shift), sector + ((unsigned int)j << Vcb->sector_shift),
1434  Vcb->superblock.sector_size);
1435  }
1436  }
1437  } else {
1438  ERR("recovering from checksum error at %I64x, device %I64x\n",
1439  addr + ((uint64_t)i << Vcb->sector_shift) + ((error_stripe - stripe) * ci->stripe_length),
1440  devices[error_stripe_phys]->devitem.dev_id);
1441 
1443 
1444  RtlCopyMemory(sector + ((unsigned int)error_stripe << Vcb->sector_shift),
1445  sector + ((unsigned int)(ci->num_stripes + 1) << Vcb->sector_shift), Vcb->superblock.sector_size);
1446  }
1447  }
1448 
1449  if (!Vcb->readonly && devices[error_stripe_phys] && devices[error_stripe_phys]->devobj && !devices[error_stripe_phys]->readonly) { // write good data over bad
1450  Status = write_data_phys(devices[error_stripe_phys]->devobj, devices[error_stripe_phys]->fileobj, cis[error_stripe_phys].offset + off,
1451  sector + ((unsigned int)error_stripe << Vcb->sector_shift), Vcb->superblock.sector_size);
1452  if (!NT_SUCCESS(Status)) {
1453  WARN("write_data_phys returned %08lx\n", Status);
1455  }
1456  }
1457  }
1458  }
1459  }
1460 
1461  if (!recovered) {
1462  ERR("unrecoverable checksum error at %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift));
1463  ExFreePool(sector);
1464  return STATUS_CRC_ERROR;
1465  }
1466  }
1467 
1468  if (ptr)
1469  ptr = (uint8_t*)ptr + Vcb->csum_size;
1470  }
1471 
1472  ExFreePool(sector);
1473  }
1474 
1475  return STATUS_SUCCESS;
1476 }
1477 
1480  _In_ ULONG priority) {
1481  CHUNK_ITEM* ci;
1482  CHUNK_ITEM_STRIPE* cis;
1484  uint64_t type, offset, total_reading = 0;
1485  NTSTATUS Status;
1486  device** devices = NULL;
1487  uint16_t i, startoffstripe, allowed_missing, missing_devices = 0;
1488  uint8_t* dummypage = NULL;
1489  PMDL dummy_mdl = NULL;
1490  bool need_to_wait;
1491  uint64_t lockaddr, locklen;
1492 
1493  if (Vcb->log_to_phys_loaded) {
1494  if (!c) {
1496 
1497  if (!c) {
1498  ERR("get_chunk_from_address failed\n");
1499  return STATUS_INTERNAL_ERROR;
1500  }
1501  }
1502 
1503  ci = c->chunk_item;
1504  offset = c->offset;
1505  devices = c->devices;
1506 
1507  if (pc)
1508  *pc = c;
1509  } else {
1510  LIST_ENTRY* le = Vcb->sys_chunks.Flink;
1511 
1512  ci = NULL;
1513 
1514  c = NULL;
1515  while (le != &Vcb->sys_chunks) {
1517 
1518  if (sc->key.obj_id == 0x100 && sc->key.obj_type == TYPE_CHUNK_ITEM && sc->key.offset <= addr) {
1519  CHUNK_ITEM* chunk_item = sc->data;
1520 
1521  if ((addr - sc->key.offset) < chunk_item->size && chunk_item->num_stripes > 0) {
1522  ci = chunk_item;
1523  offset = sc->key.offset;
1524  cis = (CHUNK_ITEM_STRIPE*)&chunk_item[1];
1525 
1527  if (!devices) {
1528  ERR("out of memory\n");
1530  }
1531 
1532  for (i = 0; i < ci->num_stripes; i++) {
1533  devices[i] = find_device_from_uuid(Vcb, &cis[i].dev_uuid);
1534  }
1535 
1536  break;
1537  }
1538  }
1539 
1540  le = le->Flink;
1541  }
1542 
1543  if (!ci) {
1544  ERR("could not find chunk for %I64x in bootstrap\n", addr);
1545  return STATUS_INTERNAL_ERROR;
1546  }
1547 
1548  if (pc)
1549  *pc = NULL;
1550  }
1551 
1552  if (ci->type & BLOCK_FLAG_DUPLICATE) {
1554  allowed_missing = ci->num_stripes - 1;
1555  } else if (ci->type & BLOCK_FLAG_RAID0) {
1557  allowed_missing = 0;
1558  } else if (ci->type & BLOCK_FLAG_RAID1) {
1560  allowed_missing = 1;
1561  } else if (ci->type & BLOCK_FLAG_RAID10) {
1563  allowed_missing = 1;
1564  } else if (ci->type & BLOCK_FLAG_RAID5) {
1566  allowed_missing = 1;
1567  } else if (ci->type & BLOCK_FLAG_RAID6) {
1569  allowed_missing = 2;
1570  } else if (ci->type & BLOCK_FLAG_RAID1C3) {
1572  allowed_missing = 2;
1573  } else if (ci->type & BLOCK_FLAG_RAID1C4) {
1575  allowed_missing = 3;
1576  } else { // SINGLE
1578  allowed_missing = 0;
1579  }
1580 
1581  cis = (CHUNK_ITEM_STRIPE*)&ci[1];
1582 
1585 
1587  if (!context.stripes) {
1588  ERR("out of memory\n");
1590  }
1591 
1592  if (c && (type == BLOCK_FLAG_RAID5 || type == BLOCK_FLAG_RAID6)) {
1593  get_raid56_lock_range(c, addr, length, &lockaddr, &locklen);
1594  chunk_lock_range(Vcb, c, lockaddr, locklen);
1595  }
1596 
1597  RtlZeroMemory(context.stripes, sizeof(read_data_stripe) * ci->num_stripes);
1598 
1599  context.buflen = length;
1600  context.num_stripes = ci->num_stripes;
1601  context.stripes_left = context.num_stripes;
1602  context.sector_size = Vcb->superblock.sector_size;
1603  context.csum = csum;
1604  context.tree = is_tree;
1605  context.type = type;
1606 
1607  if (type == BLOCK_FLAG_RAID0) {
1608  uint64_t startoff, endoff;
1609  uint16_t endoffstripe, stripe;
1610  uint32_t *stripeoff, pos;
1611  PMDL master_mdl;
1612  PFN_NUMBER* pfns;
1613 
1614  // FIXME - test this still works if page size isn't the same as sector size
1615 
1616  // This relies on the fact that MDLs are followed in memory by the page file numbers,
1617  // so with a bit of jiggery-pokery you can trick your disks into deinterlacing your RAID0
1618  // data for you without doing a memcpy yourself.
1619  // MDLs are officially opaque, so this might very well break in future versions of Windows.
1620 
1621  get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes, &startoff, &startoffstripe);
1622  get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes, &endoff, &endoffstripe);
1623 
1624  if (file_read) {
1625  // Unfortunately we can't avoid doing at least one memcpy, as Windows can give us an MDL
1626  // with duplicated dummy PFNs, which confuse check_csum. Ah well.
1627  // See https://msdn.microsoft.com/en-us/library/windows/hardware/Dn614012.aspx if you're interested.
1628 
1630 
1631  if (!context.va) {
1632  ERR("out of memory\n");
1634  goto exit;
1635  }
1636  } else
1637  context.va = buf;
1638 
1639  master_mdl = IoAllocateMdl(context.va, length, false, false, NULL);
1640  if (!master_mdl) {
1641  ERR("out of memory\n");
1643  goto exit;
1644  }
1645 
1647 
1648  _SEH2_TRY {
1652  } _SEH2_END;
1653 
1654  if (!NT_SUCCESS(Status)) {
1655  ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
1656  IoFreeMdl(master_mdl);
1657  goto exit;
1658  }
1659 
1660  pfns = (PFN_NUMBER*)(master_mdl + 1);
1661 
1662  for (i = 0; i < ci->num_stripes; i++) {
1663  if (startoffstripe > i)
1664  context.stripes[i].stripestart = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
1665  else if (startoffstripe == i)
1666  context.stripes[i].stripestart = startoff;
1667  else
1668  context.stripes[i].stripestart = startoff - (startoff % ci->stripe_length);
1669 
1670  if (endoffstripe > i)
1671  context.stripes[i].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
1672  else if (endoffstripe == i)
1673  context.stripes[i].stripeend = endoff + 1;
1674  else
1675  context.stripes[i].stripeend = endoff - (endoff % ci->stripe_length);
1676 
1677  if (context.stripes[i].stripestart != context.stripes[i].stripeend) {
1678  context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), false, false, NULL);
1679 
1680  if (!context.stripes[i].mdl) {
1681  ERR("IoAllocateMdl failed\n");
1682  MmUnlockPages(master_mdl);
1683  IoFreeMdl(master_mdl);
1685  goto exit;
1686  }
1687  }
1688  }
1689 
1690  stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes, ALLOC_TAG);
1691  if (!stripeoff) {
1692  ERR("out of memory\n");
1693  MmUnlockPages(master_mdl);
1694  IoFreeMdl(master_mdl);
1696  goto exit;
1697  }
1698 
1699  RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes);
1700 
1701  pos = 0;
1702  stripe = startoffstripe;
1703  while (pos < length) {
1704  PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
1705 
1706  if (pos == 0) {
1707  uint32_t readlen = (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length));
1708 
1709  RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1710 
1711  stripeoff[stripe] += readlen;
1712  pos += readlen;
1713  } else if (length - pos < ci->stripe_length) {
1714  RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1715 
1716  pos = length;
1717  } else {
1718  RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1719 
1720  stripeoff[stripe] += (uint32_t)ci->stripe_length;
1721  pos += (uint32_t)ci->stripe_length;
1722  }
1723 
1724  stripe = (stripe + 1) % ci->num_stripes;
1725  }
1726 
1727  MmUnlockPages(master_mdl);
1728  IoFreeMdl(master_mdl);
1729 
1730  ExFreePool(stripeoff);
1731  } else if (type == BLOCK_FLAG_RAID10) {
1732  uint64_t startoff, endoff;
1733  uint16_t endoffstripe, j, stripe;
1734  ULONG orig_ls;
1735  PMDL master_mdl;
1736  PFN_NUMBER* pfns;
1737  uint32_t* stripeoff, pos;
1738  read_data_stripe** stripes;
1739 
1740  if (c)
1741  orig_ls = c->last_stripe;
1742  else
1743  orig_ls = 0;
1744 
1745  get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &startoff, &startoffstripe);
1746  get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &endoff, &endoffstripe);
1747 
1748  if ((ci->num_stripes % ci->sub_stripes) != 0) {
1749  ERR("chunk %I64x: num_stripes %x was not a multiple of sub_stripes %x!\n", offset, ci->num_stripes, ci->sub_stripes);
1751  goto exit;
1752  }
1753 
1754  if (file_read) {
1756 
1757  if (!context.va) {
1758  ERR("out of memory\n");
1760  goto exit;
1761  }
1762  } else
1763  context.va = buf;
1764 
1765  context.firstoff = (uint16_t)((startoff % ci->stripe_length) >> Vcb->sector_shift);
1766  context.startoffstripe = startoffstripe;
1767  context.sectors_per_stripe = (uint16_t)(ci->stripe_length >> Vcb->sector_shift);
1768 
1769  startoffstripe *= ci->sub_stripes;
1770  endoffstripe *= ci->sub_stripes;
1771 
1772  if (c)
1773  c->last_stripe = (orig_ls + 1) % ci->sub_stripes;
1774 
1775  master_mdl = IoAllocateMdl(context.va, length, false, false, NULL);
1776  if (!master_mdl) {
1777  ERR("out of memory\n");
1779  goto exit;
1780  }
1781 
1783 
1784  _SEH2_TRY {
1788  } _SEH2_END;
1789 
1790  if (!NT_SUCCESS(Status)) {
1791  ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
1792  IoFreeMdl(master_mdl);
1793  goto exit;
1794  }
1795 
1796  pfns = (PFN_NUMBER*)(master_mdl + 1);
1797 
1799  if (!stripes) {
1800  ERR("out of memory\n");
1801  MmUnlockPages(master_mdl);
1802  IoFreeMdl(master_mdl);
1804  goto exit;
1805  }
1806 
1807  RtlZeroMemory(stripes, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes);
1808 
1809  for (i = 0; i < ci->num_stripes; i += ci->sub_stripes) {
1810  uint64_t sstart, send;
1811  bool stripeset = false;
1812 
1813  if (startoffstripe > i)
1814  sstart = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
1815  else if (startoffstripe == i)
1816  sstart = startoff;
1817  else
1818  sstart = startoff - (startoff % ci->stripe_length);
1819 
1820  if (endoffstripe > i)
1821  send = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
1822  else if (endoffstripe == i)
1823  send = endoff + 1;
1824  else
1825  send = endoff - (endoff % ci->stripe_length);
1826 
1827  for (j = 0; j < ci->sub_stripes; j++) {
1828  if (j == orig_ls && devices[i+j] && devices[i+j]->devobj) {
1829  context.stripes[i+j].stripestart = sstart;
1830  context.stripes[i+j].stripeend = send;
1831  stripes[i / ci->sub_stripes] = &context.stripes[i+j];
1832 
1833  if (sstart != send) {
1834  context.stripes[i+j].mdl = IoAllocateMdl(context.va, (ULONG)(send - sstart), false, false, NULL);
1835 
1836  if (!context.stripes[i+j].mdl) {
1837  ERR("IoAllocateMdl failed\n");
1838  MmUnlockPages(master_mdl);
1839  IoFreeMdl(master_mdl);
1841  goto exit;
1842  }
1843  }
1844 
1845  stripeset = true;
1846  } else
1847  context.stripes[i+j].status = ReadDataStatus_Skip;
1848  }
1849 
1850  if (!stripeset) {
1851  for (j = 0; j < ci->sub_stripes; j++) {
1852  if (devices[i+j] && devices[i+j]->devobj) {
1853  context.stripes[i+j].stripestart = sstart;
1854  context.stripes[i+j].stripeend = send;
1855  context.stripes[i+j].status = ReadDataStatus_Pending;
1856  stripes[i / ci->sub_stripes] = &context.stripes[i+j];
1857 
1858  if (sstart != send) {
1859  context.stripes[i+j].mdl = IoAllocateMdl(context.va, (ULONG)(send - sstart), false, false, NULL);
1860 
1861  if (!context.stripes[i+j].mdl) {
1862  ERR("IoAllocateMdl failed\n");
1863  MmUnlockPages(master_mdl);
1864  IoFreeMdl(master_mdl);
1866  goto exit;
1867  }
1868  }
1869 
1870  stripeset = true;
1871  break;
1872  }
1873  }
1874 
1875  if (!stripeset) {
1876  ERR("could not find stripe to read\n");
1878  goto exit;
1879  }
1880  }
1881  }
1882 
1883  stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG);
1884  if (!stripeoff) {
1885  ERR("out of memory\n");
1886  MmUnlockPages(master_mdl);
1887  IoFreeMdl(master_mdl);
1889  goto exit;
1890  }
1891 
1892  RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes / ci->sub_stripes);
1893 
1894  pos = 0;
1895  stripe = startoffstripe / ci->sub_stripes;
1896  while (pos < length) {
1897  PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(stripes[stripe]->mdl + 1);
1898 
1899  if (pos == 0) {
1900  uint32_t readlen = (uint32_t)min(stripes[stripe]->stripeend - stripes[stripe]->stripestart,
1901  ci->stripe_length - (stripes[stripe]->stripestart % ci->stripe_length));
1902 
1903  RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1904 
1905  stripeoff[stripe] += readlen;
1906  pos += readlen;
1907  } else if (length - pos < ci->stripe_length) {
1908  RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1909 
1910  pos = length;
1911  } else {
1912  RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1913 
1914  stripeoff[stripe] += (ULONG)ci->stripe_length;
1915  pos += (ULONG)ci->stripe_length;
1916  }
1917 
1918  stripe = (stripe + 1) % (ci->num_stripes / ci->sub_stripes);
1919  }
1920 
1921  MmUnlockPages(master_mdl);
1922  IoFreeMdl(master_mdl);
1923 
1924  ExFreePool(stripeoff);
1925  ExFreePool(stripes);
1926  } else if (type == BLOCK_FLAG_DUPLICATE) {
1927  uint64_t orig_ls;
1928 
1929  if (c)
1930  orig_ls = i = c->last_stripe;
1931  else
1932  orig_ls = i = 0;
1933 
1934  while (!devices[i] || !devices[i]->devobj) {
1935  i = (i + 1) % ci->num_stripes;
1936 
1937  if (i == orig_ls) {
1938  ERR("no devices available to service request\n");
1940  goto exit;
1941  }
1942  }
1943 
1944  if (c)
1945  c->last_stripe = (i + 1) % ci->num_stripes;
1946 
1947  context.stripes[i].stripestart = addr - offset;
1948  context.stripes[i].stripeend = context.stripes[i].stripestart + length;
1949 
1950  if (file_read) {
1952 
1953  if (!context.va) {
1954  ERR("out of memory\n");
1956  goto exit;
1957  }
1958 
1959  context.stripes[i].mdl = IoAllocateMdl(context.va, length, false, false, NULL);
1960  if (!context.stripes[i].mdl) {
1961  ERR("IoAllocateMdl failed\n");
1963  goto exit;
1964  }
1965 
1966  MmBuildMdlForNonPagedPool(context.stripes[i].mdl);
1967  } else {
1968  context.stripes[i].mdl = IoAllocateMdl(buf, length, false, false, NULL);
1969 
1970  if (!context.stripes[i].mdl) {
1971  ERR("IoAllocateMdl failed\n");
1973  goto exit;
1974  }
1975 
1977 
1978  _SEH2_TRY {
1982  } _SEH2_END;
1983 
1984  if (!NT_SUCCESS(Status)) {
1985  ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
1986  goto exit;
1987  }
1988  }
1989  } else if (type == BLOCK_FLAG_RAID5) {
1990  uint64_t startoff, endoff;
1991  uint16_t endoffstripe, parity;
1992  uint32_t *stripeoff, pos;
1993  PMDL master_mdl;
1994  PFN_NUMBER *pfns, dummy = 0;
1995  bool need_dummy = false;
1996 
1997  get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 1, &startoff, &startoffstripe);
1998  get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 1, &endoff, &endoffstripe);
1999 
2000  if (file_read) {
2002 
2003  if (!context.va) {
2004  ERR("out of memory\n");
2006  goto exit;
2007  }
2008  } else
2009  context.va = buf;
2010 
2011  master_mdl = IoAllocateMdl(context.va, length, false, false, NULL);
2012  if (!master_mdl) {
2013  ERR("out of memory\n");
2015  goto exit;
2016  }
2017 
2019 
2020  _SEH2_TRY {
2024  } _SEH2_END;
2025 
2026  if (!NT_SUCCESS(Status)) {
2027  ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
2028  IoFreeMdl(master_mdl);
2029  goto exit;
2030  }
2031 
2032  pfns = (PFN_NUMBER*)(master_mdl + 1);
2033 
2034  pos = 0;
2035  while (pos < length) {
2036  parity = (((addr - offset + pos) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
2037 
2038  if (pos == 0) {
2039  uint16_t stripe = (parity + startoffstripe + 1) % ci->num_stripes;
2040  ULONG skip, readlen;
2041 
2042  i = startoffstripe;
2043  while (stripe != parity) {
2044  if (i == startoffstripe) {
2045  readlen = min(length, (ULONG)(ci->stripe_length - (startoff % ci->stripe_length)));
2046 
2047  context.stripes[stripe].stripestart = startoff;
2048  context.stripes[stripe].stripeend = startoff + readlen;
2049 
2050  pos += readlen;
2051 
2052  if (pos == length)
2053  break;
2054  } else {
2055  readlen = min(length - pos, (ULONG)ci->stripe_length);
2056 
2057  context.stripes[stripe].stripestart = startoff - (startoff % ci->stripe_length);
2058  context.stripes[stripe].stripeend = context.stripes[stripe].stripestart + readlen;
2059 
2060  pos += readlen;
2061 
2062  if (pos == length)
2063  break;
2064  }
2065 
2066  i++;
2067  stripe = (stripe + 1) % ci->num_stripes;
2068  }
2069 
2070  if (pos == length)
2071  break;
2072 
2073  for (i = 0; i < startoffstripe; i++) {
2074  uint16_t stripe2 = (parity + i + 1) % ci->num_stripes;
2075 
2076  context.stripes[stripe2].stripestart = context.stripes[stripe2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2077  }
2078 
2079  context.stripes[parity].stripestart = context.stripes[parity].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2080 
2081  if (length - pos > ci->num_stripes * (ci->num_stripes - 1) * ci->stripe_length) {
2082  skip = (ULONG)(((length - pos) / (ci->num_stripes * (ci->num_stripes - 1) * ci->stripe_length)) - 1);
2083 
2084  for (i = 0; i < ci->num_stripes; i++) {
2085  context.stripes[i].stripeend += skip * ci->num_stripes * ci->stripe_length;
2086  }
2087 
2088  pos += (uint32_t)(skip * (ci->num_stripes - 1) * ci->num_stripes * ci->stripe_length);
2089  need_dummy = true;
2090  }
2091  } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 1)) {
2092  for (i = 0; i < ci->num_stripes; i++) {
2093  context.stripes[i].stripeend += ci->stripe_length;
2094  }
2095 
2096  pos += (uint32_t)(ci->stripe_length * (ci->num_stripes - 1));
2097  need_dummy = true;
2098  } else {
2099  uint16_t stripe = (parity + 1) % ci->num_stripes;
2100 
2101  i = 0;
2102  while (stripe != parity) {
2103  if (endoffstripe == i) {
2104  context.stripes[stripe].stripeend = endoff + 1;
2105  break;
2106  } else if (endoffstripe > i)
2107  context.stripes[stripe].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
2108 
2109  i++;
2110  stripe = (stripe + 1) % ci->num_stripes;
2111  }
2112 
2113  break;
2114  }
2115  }
2116 
2117  for (i = 0; i < ci->num_stripes; i++) {
2118  if (context.stripes[i].stripestart != context.stripes[i].stripeend) {
2119  context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart),
2120  false, false, NULL);
2121 
2122  if (!context.stripes[i].mdl) {
2123  ERR("IoAllocateMdl failed\n");
2124  MmUnlockPages(master_mdl);
2125  IoFreeMdl(master_mdl);
2127  goto exit;
2128  }
2129  }
2130  }
2131 
2132  if (need_dummy) {
2134  if (!dummypage) {
2135  ERR("out of memory\n");
2136  MmUnlockPages(master_mdl);
2137  IoFreeMdl(master_mdl);
2139  goto exit;
2140  }
2141 
2142  dummy_mdl = IoAllocateMdl(dummypage, PAGE_SIZE, false, false, NULL);
2143  if (!dummy_mdl) {
2144  ERR("IoAllocateMdl failed\n");
2145  MmUnlockPages(master_mdl);
2146  IoFreeMdl(master_mdl);
2148  goto exit;
2149  }
2150 
2151  MmBuildMdlForNonPagedPool(dummy_mdl);
2152 
2153  dummy = *(PFN_NUMBER*)(dummy_mdl + 1);
2154  }
2155 
2156  stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes, ALLOC_TAG);
2157  if (!stripeoff) {
2158  ERR("out of memory\n");
2159  MmUnlockPages(master_mdl);
2160  IoFreeMdl(master_mdl);
2162  goto exit;
2163  }
2164 
2165  RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes);
2166 
2167  pos = 0;
2168 
2169  while (pos < length) {
2170  PFN_NUMBER* stripe_pfns;
2171 
2172  parity = (((addr - offset + pos) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
2173 
2174  if (pos == 0) {
2175  uint16_t stripe = (parity + startoffstripe + 1) % ci->num_stripes;
2176  uint32_t readlen = min(length - pos, (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart,
2177  ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length)));
2178 
2179  stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2180 
2181  RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2182 
2183  stripeoff[stripe] = readlen;
2184  pos += readlen;
2185 
2186  stripe = (stripe + 1) % ci->num_stripes;
2187 
2188  while (stripe != parity) {
2189  stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2190  readlen = min(length - pos, (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2191 
2192  if (readlen == 0)
2193  break;
2194 
2195  RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2196 
2197  stripeoff[stripe] = readlen;
2198  pos += readlen;
2199 
2200  stripe = (stripe + 1) % ci->num_stripes;
2201  }
2202  } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 1)) {
2203  uint16_t stripe = (parity + 1) % ci->num_stripes;
2204  ULONG k;
2205 
2206  while (stripe != parity) {
2207  stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2208 
2209  RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
2210 
2211  stripeoff[stripe] += (uint32_t)ci->stripe_length;
2212  pos += (uint32_t)ci->stripe_length;
2213 
2214  stripe = (stripe + 1) % ci->num_stripes;
2215  }
2216 
2217  stripe_pfns = (PFN_NUMBER*)(context.stripes[parity].mdl + 1);
2218 
2219  for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) {
2220  stripe_pfns[stripeoff[parity] >> PAGE_SHIFT] = dummy;
2221  stripeoff[parity] += PAGE_SIZE;
2222  }
2223  } else {
2224  uint16_t stripe = (parity + 1) % ci->num_stripes;
2225  uint32_t readlen;
2226 
2227  while (pos < length) {
2228  stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2229  readlen = min(length - pos, (ULONG)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2230 
2231  if (readlen == 0)
2232  break;
2233 
2234  RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2235 
2236  stripeoff[stripe] += readlen;
2237  pos += readlen;
2238 
2239  stripe = (stripe + 1) % ci->num_stripes;
2240  }
2241  }
2242  }
2243 
2244  MmUnlockPages(master_mdl);
2245  IoFreeMdl(master_mdl);
2246 
2247  ExFreePool(stripeoff);
2248  } else if (type == BLOCK_FLAG_RAID6) {
2249  uint64_t startoff, endoff;
2250  uint16_t endoffstripe, parity1;
2251  uint32_t *stripeoff, pos;
2252  PMDL master_mdl;
2253  PFN_NUMBER *pfns, dummy = 0;
2254  bool need_dummy = false;
2255 
2256  get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 2, &startoff, &startoffstripe);
2257  get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 2, &endoff, &endoffstripe);
2258 
2259  if (file_read) {
2261 
2262  if (!context.va) {
2263  ERR("out of memory\n");
2265  goto exit;
2266  }
2267  } else
2268  context.va = buf;
2269 
2270  master_mdl = IoAllocateMdl(context.va, length, false, false, NULL);
2271  if (!master_mdl) {
2272  ERR("out of memory\n");
2274  goto exit;
2275  }
2276 
2278 
2279  _SEH2_TRY {
2283  } _SEH2_END;
2284 
2285  if (!NT_SUCCESS(Status)) {
2286  ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
2287  IoFreeMdl(master_mdl);
2288  goto exit;
2289  }
2290 
2291  pfns = (PFN_NUMBER*)(master_mdl + 1);
2292 
2293  pos = 0;
2294  while (pos < length) {
2295  parity1 = (((addr - offset + pos) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
2296 
2297  if (pos == 0) {
2298  uint16_t stripe = (parity1 + startoffstripe + 2) % ci->num_stripes, parity2;
2299  ULONG skip, readlen;
2300 
2301  i = startoffstripe;
2302  while (stripe != parity1) {
2303  if (i == startoffstripe) {
2304  readlen = (ULONG)min(length, ci->stripe_length - (startoff % ci->stripe_length));
2305 
2306  context.stripes[stripe].stripestart = startoff;
2307  context.stripes[stripe].stripeend = startoff + readlen;
2308 
2309  pos += readlen;
2310 
2311  if (pos == length)
2312  break;
2313  } else {
2314  readlen = min(length - pos, (ULONG)ci->stripe_length);
2315 
2316  context.stripes[stripe].stripestart = startoff - (startoff % ci->stripe_length);
2317  context.stripes[stripe].stripeend = context.stripes[stripe].stripestart + readlen;
2318 
2319  pos += readlen;
2320 
2321  if (pos == length)
2322  break;
2323  }
2324 
2325  i++;
2326  stripe = (stripe + 1) % ci->num_stripes;
2327  }
2328 
2329  if (pos == length)
2330  break;
2331 
2332  for (i = 0; i < startoffstripe; i++) {
2333  uint16_t stripe2 = (parity1 + i + 2) % ci->num_stripes;
2334 
2335  context.stripes[stripe2].stripestart = context.stripes[stripe2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2336  }
2337 
2338  context.stripes[parity1].stripestart = context.stripes[parity1].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2339 
2340  parity2 = (parity1 + 1) % ci->num_stripes;
2341  context.stripes[parity2].stripestart = context.stripes[parity2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2342 
2343  if (length - pos > ci->num_stripes * (ci->num_stripes - 2) * ci->stripe_length) {
2344  skip = (ULONG)(((length - pos) / (ci->num_stripes * (ci->num_stripes - 2) * ci->stripe_length)) - 1);
2345 
2346  for (i = 0; i < ci->num_stripes; i++) {
2347  context.stripes[i].stripeend += skip * ci->num_stripes * ci->stripe_length;
2348  }
2349 
2350  pos += (uint32_t)(skip * (ci->num_stripes - 2) * ci->num_stripes * ci->stripe_length);
2351  need_dummy = true;
2352  }
2353  } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 2)) {
2354  for (i = 0; i < ci->num_stripes; i++) {
2355  context.stripes[i].stripeend += ci->stripe_length;
2356  }
2357 
2358  pos += (uint32_t)(ci->stripe_length * (ci->num_stripes - 2));
2359  need_dummy = true;
2360  } else {
2361  uint16_t stripe = (parity1 + 2) % ci->num_stripes;
2362 
2363  i = 0;
2364  while (stripe != parity1) {
2365  if (endoffstripe == i) {
2366  context.stripes[stripe].stripeend = endoff + 1;
2367  break;
2368  } else if (endoffstripe > i)
2369  context.stripes[stripe].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
2370 
2371  i++;
2372  stripe = (stripe + 1) % ci->num_stripes;
2373  }
2374 
2375  break;
2376  }
2377  }
2378 
2379  for (i = 0; i < ci->num_stripes; i++) {
2380  if (context.stripes[i].stripestart != context.stripes[i].stripeend) {
2381  context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), false, false, NULL);
2382 
2383  if (!context.stripes[i].mdl) {
2384  ERR("IoAllocateMdl failed\n");
2385  MmUnlockPages(master_mdl);
2386  IoFreeMdl(master_mdl);
2388  goto exit;
2389  }
2390  }
2391  }
2392 
2393  if (need_dummy) {
2395  if (!dummypage) {
2396  ERR("out of memory\n");
2397  MmUnlockPages(master_mdl);
2398  IoFreeMdl(master_mdl);
2400  goto exit;
2401  }
2402 
2403  dummy_mdl = IoAllocateMdl(dummypage, PAGE_SIZE, false, false, NULL);
2404  if (!dummy_mdl) {
2405  ERR("IoAllocateMdl failed\n");
2406  MmUnlockPages(master_mdl);
2407  IoFreeMdl(master_mdl);
2409  goto exit;
2410  }
2411 
2412  MmBuildMdlForNonPagedPool(dummy_mdl);
2413 
2414  dummy = *(PFN_NUMBER*)(dummy_mdl + 1);
2415  }
2416 
2417  stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes, ALLOC_TAG);
2418  if (!stripeoff) {
2419  ERR("out of memory\n");
2420  MmUnlockPages(master_mdl);
2421  IoFreeMdl(master_mdl);
2423  goto exit;
2424  }
2425 
2426  RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes);
2427 
2428  pos = 0;
2429 
2430  while (pos < length) {
2431  PFN_NUMBER* stripe_pfns;
2432 
2433  parity1 = (((addr - offset + pos) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
2434 
2435  if (pos == 0) {
2436  uint16_t stripe = (parity1 + startoffstripe + 2) % ci->num_stripes;
2437  uint32_t readlen = min(length - pos, (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart,
2438  ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length)));
2439 
2440  stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2441 
2442  RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2443 
2444  stripeoff[stripe] = readlen;
2445  pos += readlen;
2446 
2447  stripe = (stripe + 1) % ci->num_stripes;
2448 
2449  while (stripe != parity1) {
2450  stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2451  readlen = (uint32_t)min(length - pos, min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2452 
2453  if (readlen == 0)
2454  break;
2455 
2456  RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2457 
2458  stripeoff[stripe] = readlen;
2459  pos += readlen;
2460 
2461  stripe = (stripe + 1) % ci->num_stripes;
2462  }
2463  } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 2)) {
2464  uint16_t stripe = (parity1 + 2) % ci->num_stripes;
2465  uint16_t parity2 = (parity1 + 1) % ci->num_stripes;
2466  ULONG k;
2467 
2468  while (stripe != parity1) {
2469  stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2470 
2471  RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
2472 
2473  stripeoff[stripe] += (uint32_t)ci->stripe_length;
2474  pos += (uint32_t)ci->stripe_length;
2475 
2476  stripe = (stripe + 1) % ci->num_stripes;
2477  }
2478 
2479  stripe_pfns = (PFN_NUMBER*)(context.stripes[parity1].mdl + 1);
2480 
2481  for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) {
2482  stripe_pfns[stripeoff[parity1] >> PAGE_SHIFT] = dummy;
2483  stripeoff[parity1] += PAGE_SIZE;
2484  }
2485 
2486  stripe_pfns = (PFN_NUMBER*)(context.stripes[parity2].mdl + 1);
2487 
2488  for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) {
2489  stripe_pfns[stripeoff[parity2] >> PAGE_SHIFT] = dummy;
2490  stripeoff[parity2] += PAGE_SIZE;
2491  }
2492  } else {
2493  uint16_t stripe = (parity1 + 2) % ci->num_stripes;
2494  uint32_t readlen;
2495 
2496  while (pos < length) {
2497  stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2498  readlen = (uint32_t)min(length - pos, min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2499 
2500  if (readlen == 0)
2501  break;
2502 
2503  RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2504 
2505  stripeoff[stripe] += readlen;
2506  pos += readlen;
2507 
2508  stripe = (stripe + 1) % ci->num_stripes;
2509  }
2510  }
2511  }
2512 
2513  MmUnlockPages(master_mdl);
2514  IoFreeMdl(master_mdl);
2515 
2516  ExFreePool(stripeoff);
2517  }
2518 
2519  context.address = addr;
2520 
2521  for (i = 0; i < ci->num_stripes; i++) {
2522  if (!devices[i] || !devices[i]->devobj || context.stripes[i].stripestart == context.stripes[i].stripeend) {
2523  context.stripes[i].status = ReadDataStatus_MissingDevice;
2524  context.stripes_left--;
2525 
2526  if (!devices[i] || !devices[i]->devobj)
2527  missing_devices++;
2528  }
2529  }
2530 
2531  if (missing_devices > allowed_missing) {
2532  ERR("not enough devices to service request (%u missing)\n", missing_devices);
2534  goto exit;
2535  }
2536 
2537  for (i = 0; i < ci->num_stripes; i++) {
2539 
2540  if (devices[i] && devices[i]->devobj && context.stripes[i].stripestart != context.stripes[i].stripeend && context.stripes[i].status != ReadDataStatus_Skip) {
2541  context.stripes[i].context = (struct read_data_context*)&context;
2542 
2543  if (type == BLOCK_FLAG_RAID10) {
2544  context.stripes[i].stripenum = i / ci->sub_stripes;
2545  }
2546 
2547  if (!Irp) {
2548  context.stripes[i].Irp = IoAllocateIrp(devices[i]->devobj->StackSize, false);
2549 
2550  if (!context.stripes[i].Irp) {
2551  ERR("IoAllocateIrp failed\n");
2553  goto exit;
2554  }
2555  } else {
2556  context.stripes[i].Irp = IoMakeAssociatedIrp(Irp, devices[i]->devobj->StackSize);
2557 
2558  if (!context.stripes[i].Irp) {
2559  ERR("IoMakeAssociatedIrp failed\n");
2561  goto exit;
2562  }
2563  }
2564 
2565  IrpSp = IoGetNextIrpStackLocation(context.stripes[i].Irp);
2568  IrpSp->FileObject = devices[i]->fileobj;
2569 
2570  if (devices[i]->devobj->Flags & DO_BUFFERED_IO) {
2571  context.stripes[i].Irp->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), ALLOC_TAG);
2572  if (!context.stripes[i].Irp->AssociatedIrp.SystemBuffer) {
2573  ERR("out of memory\n");
2575  goto exit;
2576  }
2577 
2579 
2580  context.stripes[i].Irp->UserBuffer = MmGetSystemAddressForMdlSafe(context.stripes[i].mdl, priority);
2581  } else if (devices[i]->devobj->Flags & DO_DIRECT_IO)
2582  context.stripes[i].Irp->MdlAddress = context.stripes[i].mdl;
2583  else
2584  context.stripes[i].Irp->UserBuffer = MmGetSystemAddressForMdlSafe(context.stripes[i].mdl, priority);
2585 
2586  IrpSp->Parameters.Read.Length = (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart);
2587  IrpSp->Parameters.Read.ByteOffset.QuadPart = context.stripes[i].stripestart + cis[i].offset;
2588 
2589  total_reading += IrpSp->Parameters.Read.Length;
2590 
2591  context.stripes[i].Irp->UserIosb = &context.stripes[i].iosb;
2592 
2593  IoSetCompletionRoutine(context.stripes[i].Irp, read_data_completion, &context.stripes[i], true, true, true);
2594 
2595  context.stripes[i].status = ReadDataStatus_Pending;
2596  }
2597  }
2598 
2599  need_to_wait = false;
2600  for (i = 0; i < ci->num_stripes; i++) {
2601  if (context.stripes[i].status != ReadDataStatus_MissingDevice && context.stripes[i].status != ReadDataStatus_Skip) {
2602  IoCallDriver(devices[i]->devobj, context.stripes[i].Irp);
2603  need_to_wait = true;
2604  }
2605  }
2606 
2607  if (need_to_wait)
2609 
2610  if (diskacc)
2611  fFsRtlUpdateDiskCounters(total_reading, 0);
2612 
2613  // check if any of the devices return a "user-induced" error
2614 
2615  for (i = 0; i < ci->num_stripes; i++) {
2616  if (context.stripes[i].status == ReadDataStatus_Error && IoIsErrorUserInduced(context.stripes[i].iosb.Status)) {
2617  Status = context.stripes[i].iosb.Status;
2618  goto exit;
2619  }
2620  }
2621 
2622  if (type == BLOCK_FLAG_RAID0) {
2623  Status = read_data_raid0(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, generation, offset);
2624  if (!NT_SUCCESS(Status)) {
2625  ERR("read_data_raid0 returned %08lx\n", Status);
2626 
2627  if (file_read)
2628  ExFreePool(context.va);
2629 
2630  goto exit;
2631  }
2632 
2633  if (file_read) {
2635  ExFreePool(context.va);
2636  }
2637  } else if (type == BLOCK_FLAG_RAID10) {
2638  Status = read_data_raid10(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, generation, offset);
2639 
2640  if (!NT_SUCCESS(Status)) {
2641  ERR("read_data_raid10 returned %08lx\n", Status);
2642 
2643  if (file_read)
2644  ExFreePool(context.va);
2645 
2646  goto exit;
2647  }
2648 
2649  if (file_read) {
2651  ExFreePool(context.va);
2652  }
2653  } else if (type == BLOCK_FLAG_DUPLICATE) {
2654  Status = read_data_dup(Vcb, file_read ? context.va : buf, addr, &context, ci, devices, generation);
2655  if (!NT_SUCCESS(Status)) {
2656  ERR("read_data_dup returned %08lx\n", Status);
2657 
2658  if (file_read)
2659  ExFreePool(context.va);
2660 
2661  goto exit;
2662  }
2663 
2664  if (file_read) {
2666  ExFreePool(context.va);
2667  }
2668  } else if (type == BLOCK_FLAG_RAID5) {
2669  Status = read_data_raid5(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, offset, generation, c, missing_devices > 0 ? true : false);
2670  if (!NT_SUCCESS(Status)) {
2671  ERR("read_data_raid5 returned %08lx\n", Status);
2672 
2673  if (file_read)
2674  ExFreePool(context.va);
2675 
2676  goto exit;
2677  }
2678 
2679  if (file_read) {
2681  ExFreePool(context.va);
2682  }
2683  } else if (type == BLOCK_FLAG_RAID6) {
2684  Status = read_data_raid6(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, offset, generation, c, missing_devices > 0 ? true : false);
2685  if (!NT_SUCCESS(Status)) {
2686  ERR("read_data_raid6 returned %08lx\n", Status);
2687 
2688  if (file_read)
2689  ExFreePool(context.va);
2690 
2691  goto exit;
2692  }
2693 
2694  if (file_read) {
2696  ExFreePool(context.va);
2697  }
2698  }
2699 
2700 exit:
2701  if (c && (type == BLOCK_FLAG_RAID5 || type == BLOCK_FLAG_RAID6))
2702  chunk_unlock_range(Vcb, c, lockaddr, locklen);
2703 
2704  if (dummy_mdl)
2705  IoFreeMdl(dummy_mdl);
2706 
2707  if (dummypage)
2708  ExFreePool(dummypage);
2709 
2710  for (i = 0; i < ci->num_stripes; i++) {
2711  if (context.stripes[i].mdl) {
2712  if (context.stripes[i].mdl->MdlFlags & MDL_PAGES_LOCKED)
2713  MmUnlockPages(context.stripes[i].mdl);
2714 
2715  IoFreeMdl(context.stripes[i].mdl);
2716  }
2717 
2718  if (context.stripes[i].Irp)
2719  IoFreeIrp(context.stripes[i].Irp);
2720  }
2721 
2722  ExFreePool(context.stripes);
2723 
2724  if (!Vcb->log_to_phys_loaded)
2726 
2727  return Status;
2728 }
2729 
2730 __attribute__((nonnull(1, 2)))
2732  ULONG readlen;
2733 
2734  TRACE("(%p, %p, %I64x, %lx, %p)\n", fcb, data, start, length, pbr);
2735 
2736  if (pbr) *pbr = 0;
2737 
2738  if (start >= fcb->adsdata.Length) {
2739  TRACE("tried to read beyond end of stream\n");
2740  return STATUS_END_OF_FILE;
2741  }
2742 
2743  if (length == 0) {
2744  WARN("tried to read zero bytes\n");
2745  return STATUS_SUCCESS;
2746  }
2747 
2748  if (start + length < fcb->adsdata.Length)
2749  readlen = length;
2750  else
2751  readlen = fcb->adsdata.Length - (ULONG)start;
2752 
2753  if (readlen > 0)
2754  RtlCopyMemory(data, fcb->adsdata.Buffer + start, readlen);
2755 
2756  if (pbr) *pbr = readlen;
2757 
2758  return STATUS_SUCCESS;
2759 }
2760 
2761 typedef struct {
2767 
2768 typedef struct {
2774  void* csum;
2777  bool buf_free;
2779  bool mdl;
2780  void* data;
2782  unsigned int num_extents;
2783  read_part_extent extents[1];
2784 } read_part;
2785 
2786 typedef struct {
2789  void* decomp;
2790  void* data;
2791  unsigned int offset;
2792  size_t length;
2793 } comp_calc_job;
2794 
2795 __attribute__((nonnull(1, 2)))
2797  NTSTATUS Status;
2798  uint32_t bytes_read = 0;
2799  uint64_t last_end;
2800  LIST_ENTRY* le;
2801  POOL_TYPE pool_type;
2802  LIST_ENTRY read_parts, calc_jobs;
2803 
2804  TRACE("(%p, %p, %I64x, %I64x, %p)\n", fcb, data, start, length, pbr);
2805 
2806  if (pbr)
2807  *pbr = 0;
2808 
2809  if (start >= fcb->inode_item.st_size) {
2810  WARN("Tried to read beyond end of file\n");
2811  return STATUS_END_OF_FILE;
2812  }
2813 
2814  InitializeListHead(&read_parts);
2815  InitializeListHead(&calc_jobs);
2816 
2817  pool_type = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? NonPagedPool : PagedPool;
2818 
2819  le = fcb->extents.Flink;
2820 
2821  last_end = start;
2822 
2823  while (le != &fcb->extents) {
2825 
2826  if (!ext->ignore) {
2827  EXTENT_DATA* ed = &ext->extent_data;
2828  uint64_t len;
2829 
2830  if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC)
2831  len = ((EXTENT_DATA2*)ed->data)->num_bytes;
2832  else
2833  len = ed->decoded_size;
2834 
2835  if (ext->offset + len <= start) {
2836  last_end = ext->offset + len;
2837  goto nextitem;
2838  }
2839 
2840  if (ext->offset > last_end && ext->offset > start + bytes_read) {
2841  uint32_t read = (uint32_t)min(length, ext->offset - max(start, last_end));
2842 
2843  RtlZeroMemory(data + bytes_read, read);
2844  bytes_read += read;
2845  length -= read;
2846  }
2847 
2848  if (length == 0 || ext->offset > start + bytes_read + length)
2849  break;
2850 
2851  if (ed->encryption != BTRFS_ENCRYPTION_NONE) {
2852  WARN("Encryption not supported\n");
2854  goto exit;
2855  }
2856 
2857  if (ed->encoding != BTRFS_ENCODING_NONE) {
2858  WARN("Other encodings not supported\n");
2860  goto exit;
2861  }
2862 
2863  switch (ed->type) {
2864  case EXTENT_TYPE_INLINE:
2865  {
2866  uint64_t off = start + bytes_read - ext->offset;
2867  uint32_t read;
2868 
2869  if (ed->compression == BTRFS_COMPRESSION_NONE) {
2870  read = (uint32_t)min(min(len, ext->datalen) - off, length);
2871 
2872  RtlCopyMemory(data + bytes_read, &ed->data[off], read);
2874  uint8_t* decomp;
2875  bool decomp_alloc;
2876  uint16_t inlen = ext->datalen - (uint16_t)offsetof(EXTENT_DATA, data[0]);
2877 
2878  if (ed->decoded_size == 0 || ed->decoded_size > 0xffffffff) {
2879  ERR("ed->decoded_size was invalid (%I64x)\n", ed->decoded_size);
2881  goto exit;
2882  }
2883 
2884  read = (uint32_t)min(ed->decoded_size - off, length);
2885 
2886  if (off > 0) {
2888  if (!decomp) {
2889  ERR("out of memory\n");
2891  goto exit;
2892  }
2893 
2894  decomp_alloc = true;
2895  } else {
2896  decomp = data + bytes_read;
2897  decomp_alloc = false;
2898  }
2899 
2900  if (ed->compression == BTRFS_COMPRESSION_ZLIB) {
2901  Status = zlib_decompress(ed->data, inlen, decomp, (uint32_t)(read + off));
2902  if (!NT_SUCCESS(Status)) {
2903  ERR("zlib_decompress returned %08lx\n", Status);
2904  if (decomp_alloc) ExFreePool(decomp);
2905  goto exit;
2906  }
2907  } else if (ed->compression == BTRFS_COMPRESSION_LZO) {
2908  if (inlen < sizeof(uint32_t)) {
2909  ERR("extent data was truncated\n");
2911  if (decomp_alloc) ExFreePool(decomp);
2912  goto exit;
2913  } else
2914  inlen -= sizeof(uint32_t);
2915 
2916  Status = lzo_decompress(ed->data + sizeof(uint32_t), inlen, decomp, (uint32_t)(read + off), sizeof(uint32_t));
2917  if (!NT_SUCCESS(Status)) {
2918  ERR("lzo_decompress returned %08lx\n", Status);
2919  if (decomp_alloc) ExFreePool(decomp);
2920  goto exit;
2921  }
2922  } else if (ed->compression == BTRFS_COMPRESSION_ZSTD) {
2923  Status = zstd_decompress(ed->data, inlen, decomp, (uint32_t)(read + off));
2924  if (!NT_SUCCESS(Status)) {
2925  ERR("zstd_decompress returned %08lx\n", Status);
2926  if (decomp_alloc) ExFreePool(decomp);
2927  goto exit;
2928  }
2929  }
2930 
2931  if (decomp_alloc) {
2932  RtlCopyMemory(data + bytes_read, decomp + off, read);
2933  ExFreePool(decomp);
2934  }
2935  } else {
2936  ERR("unhandled compression type %x\n", ed->compression);
2938  goto exit;
2939  }
2940 
2941  bytes_read += read;
2942  length -= read;
2943 
2944  break;
2945  }
2946 
2947  case EXTENT_TYPE_REGULAR:
2948  {
2949  EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
2950  read_part* rp;
2951 
2952  rp = ExAllocatePoolWithTag(pool_type, sizeof(read_part), ALLOC_TAG);
2953  if (!rp) {
2954  ERR("out of memory\n");
2956  goto exit;
2957  }
2958 
2959  rp->mdl = (Irp && Irp->MdlAddress) ? true : false;
2960  rp->extents[0].off = start + bytes_read - ext->offset;
2961  rp->bumpoff = 0;
2962  rp->num_extents = 1;
2963  rp->csum_free = false;
2964 
2965  rp->read = (uint32_t)(len - rp->extents[0].off);
2966  if (rp->read > length) rp->read = (uint32_t)length;
2967 
2968  if (ed->compression == BTRFS_COMPRESSION_NONE) {
2969  rp->addr = ed2->address + ed2->offset + rp->extents[0].off;
2970  rp->to_read = (uint32_t)sector_align(rp->read, fcb->Vcb->superblock.sector_size);
2971 
2972  if (rp->addr & (fcb->Vcb->superblock.sector_size - 1)) {
2973  rp->bumpoff = rp->addr & (fcb->Vcb->superblock.sector_size - 1);
2974  rp->addr -= rp->bumpoff;
2975  rp->to_read = (uint32_t)sector_align(rp->read + rp->bumpoff, fcb->Vcb->superblock.sector_size);
2976  }
2977  } else {
2978  rp->addr = ed2->address;
2979  rp->to_read = (uint32_t)sector_align(ed2->size, fcb->Vcb->superblock.sector_size);
2980  }
2981 
2982  if (ed->compression == BTRFS_COMPRESSION_NONE && (start & (fcb->Vcb->superblock.sector_size - 1)) == 0 &&
2983  (length & (fcb->Vcb->superblock.sector_size - 1)) == 0) {
2984  rp->buf = data + bytes_read;
2985  rp->buf_free = false;
2986  } else {
2987  rp->buf = ExAllocatePoolWithTag(pool_type, rp->to_read, ALLOC_TAG);
2988  rp->buf_free = true;
2989 
2990  if (!rp->buf) {
2991  ERR("out of memory\n");
2993  ExFreePool(rp);
2994  goto exit;
2995  }
2996 
2997  rp->mdl = false;
2998  }
2999 
3000  rp->c = get_chunk_from_address(fcb->Vcb, rp->addr);
3001 
3002  if (!rp->c) {
3003  ERR("get_chunk_from_address(%I64x) failed\n", rp->addr);
3004 
3005  if (rp->buf_free)
3006  ExFreePool(rp->buf);
3007 
3008  ExFreePool(rp);
3009 
3011  goto exit;
3012  }
3013 
3014  if (ext->csum) {
3015  if (ed->compression == BTRFS_COMPRESSION_NONE) {
3016  rp->csum = (uint8_t*)ext->csum + (fcb->Vcb->csum_size * (rp->extents[0].off >> fcb->Vcb->sector_shift));
3017  } else
3018  rp->csum = ext->csum;
3019  } else
3020  rp->csum = NULL;
3021 
3022  rp->data = data + bytes_read;
3023  rp->compression = ed->compression;
3024  rp->extents[0].ed_offset = ed2->offset;
3025  rp->extents[0].ed_size = ed2->size;
3026  rp->extents[0].ed_num_bytes = ed2->num_bytes;
3027 
3028  InsertTailList(&read_parts, &rp->list_entry);
3029 
3030  bytes_read += rp->read;
3031  length -= rp->read;
3032 
3033  break;
3034  }
3035 
3036  case EXTENT_TYPE_PREALLOC:
3037  {
3038  uint64_t off = start + bytes_read - ext->offset;
3039  uint32_t read = (uint32_t)(len - off);
3040 
3041  if (read > length) read = (uint32_t)length;
3042 
3043  RtlZeroMemory(data + bytes_read, read);
3044 
3045  bytes_read += read;
3046  length -= read;
3047 
3048  break;
3049  }
3050 
3051  default:
3052  WARN("Unsupported extent data type %u\n", ed->type);
3054  goto exit;
3055  }
3056 
3057  last_end = ext->offset + len;
3058 
3059  if (length == 0)
3060  break;
3061  }
3062 
3063 nextitem:
3064  le = le->Flink;
3065  }
3066 
3067  if (!IsListEmpty(&read_parts) && read_parts.Flink->Flink != &read_parts) { // at least two entries in list
3068  read_part* last_rp = CONTAINING_RECORD(read_parts.Flink, read_part, list_entry);
3069 
3070  le = read_parts.Flink->Flink;
3071  while (le != &read_parts) {
3072  LIST_ENTRY* le2 = le->Flink;
3074 
3075  // merge together runs
3076  if (rp->compression != BTRFS_COMPRESSION_NONE && rp->compression == last_rp->compression && rp->addr == last_rp->addr + last_rp->to_read &&
3077  rp->data == (uint8_t*)last_rp->data + last_rp->read && rp->c == last_rp->c && ((rp->csum && last_rp->csum) || (!rp->csum && !last_rp->csum))) {
3078  read_part* rp2;
3079 
3080  rp2 = ExAllocatePoolWithTag(pool_type, offsetof(read_part, extents) + (sizeof(read_part_extent) * (last_rp->num_extents + 1)), ALLOC_TAG);
3081 
3082  rp2->addr = last_rp->addr;
3083  rp2->c = last_rp->c;
3084  rp2->read = last_rp->read + rp->read;
3085  rp2->to_read = last_rp->to_read + rp->to_read;
3086  rp2->csum_free = false;
3087 
3088  if (last_rp->csum) {
3089  uint32_t sectors = (last_rp->to_read + rp->to_read) >> fcb->Vcb->sector_shift;
3090 
3091  rp2->csum = ExAllocatePoolWithTag(pool_type, sectors * fcb->Vcb->csum_size, ALLOC_TAG);
3092  if (!rp2->csum) {
3093  ERR("out of memory\n");
3094  ExFreePool(rp2);
3096  goto exit;
3097  }
3098 
3099  RtlCopyMemory(rp2->csum, last_rp->csum, (last_rp->to_read * fcb->Vcb->csum_size) >> fcb->Vcb->sector_shift);
3100  RtlCopyMemory((uint8_t*)rp2->csum + ((last_rp->to_read * fcb->Vcb->csum_size) >> fcb->Vcb->sector_shift), rp->csum,
3101  (rp->to_read * fcb->Vcb->csum_size) >> fcb->Vcb->sector_shift);
3102 
3103  rp2->csum_free = true;
3104  } else
3105  rp2->csum = NULL;
3106 
3107  rp2->buf = ExAllocatePoolWithTag(pool_type, rp2->to_read, ALLOC_TAG);
3108  if (!rp2->buf) {
3109  ERR("out of memory\n");
3110 
3111  if (rp2->csum)
3112  ExFreePool(rp2->csum);
3113 
3114  ExFreePool(rp2);
3116  goto exit;
3117  }
3118 
3119  rp2->buf_free = true;
3120  rp2->bumpoff = 0;
3121  rp2->mdl = false;
3122  rp2->data = last_rp->data;
3123  rp2->compression = last_rp->compression;
3124  rp2->num_extents = last_rp->num_extents + 1;
3125 
3126  RtlCopyMemory(rp2->extents, last_rp->extents, last_rp->num_extents * sizeof(read_part_extent));
3127  RtlCopyMemory(&rp2->extents[last_rp->num_extents], rp->extents, sizeof(read_part_extent));
3128 
3129  InsertHeadList(le->Blink, &rp2->list_entry);
3130 
3131  if (rp->buf_free)
3132  ExFreePool(rp->buf);
3133 
3134  if (rp->csum_free)
3135  ExFreePool(rp->csum);
3136 
3138 
3139  ExFreePool(rp);
3140 
3141  if (last_rp->buf_free)
3142  ExFreePool(last_rp->buf);
3143 
3144  if (last_rp->csum_free)
3145  ExFreePool(last_rp->csum);
3146 
3147  RemoveEntryList(&last_rp->list_entry);
3148 
3149  ExFreePool(last_rp);
3150 
3151  last_rp = rp2;
3152  } else
3153  last_rp = rp;
3154 
3155  le = le2;
3156  }
3157  }
3158 
3159  le = read_parts.Flink;
3160  while (le != &read_parts) {
3162 
3163  Status = read_data(fcb->Vcb, rp->addr, rp->to_read, rp->csum, false, rp->buf, rp->c, NULL, Irp, 0, rp->mdl,
3165  if (!NT_SUCCESS(Status)) {
3166  ERR("read_data returned %08lx\n", Status);
3167  goto exit;
3168  }
3169 
3170  if (rp->compression == BTRFS_COMPRESSION_NONE) {
3171  if (rp->buf_free)
3172  RtlCopyMemory(rp->data, rp->buf + rp->bumpoff, rp->read);
3173  } else {
3174  uint8_t* buf = rp->buf;
3175 
3176  for (unsigned int i = 0; i < rp->num_extents; i++) {
3177  uint8_t *decomp = NULL, *buf2;
3178  ULONG outlen, inlen, off2;
3179  uint32_t inpageoff = 0;
3180  comp_calc_job* ccj;
3181 
3182  off2 = (ULONG)(rp->extents[i].ed_offset + rp->extents[i].off);
3183  buf2 = buf;
3184  inlen = (ULONG)rp->extents[i].ed_size;
3185 
3187  ULONG inoff = sizeof(uint32_t);
3188 
3189  inlen -= sizeof(uint32_t);
3190 
3191  // If reading a few sectors in, skip to the interesting bit
3192  while (off2 > LZO_PAGE_SIZE) {
3193  uint32_t partlen;
3194 
3195  if (inlen < sizeof(uint32_t))
3196  break;
3197 
3198  partlen = *(uint32_t*)(buf2 + inoff);
3199 
3200  if (partlen < inlen) {
3201  off2 -= LZO_PAGE_SIZE;
3202  inoff += partlen + sizeof(uint32_t);
3203  inlen -= partlen + sizeof(uint32_t);
3204 
3205  if (LZO_PAGE_SIZE - (inoff % LZO_PAGE_SIZE) < sizeof(uint32_t))
3206  inoff = ((inoff / LZO_PAGE_SIZE) + 1) * LZO_PAGE_SIZE;
3207  } else
3208  break;
3209  }
3210 
3211  buf2 = &buf2[inoff];
3212  inpageoff = inoff % LZO_PAGE_SIZE;
3213  }
3214 
3215  /* Previous versions of this code decompressed directly into the destination buffer,
3216  * but unfortunately that can't be relied on - Windows likes to use dummy pages sometimes
3217  * when mmap-ing, which breaks the backtracking used by e.g. zstd. */
3218 
3219  if (off2 != 0)
3220  outlen = off2 + min(rp->read, (uint32_t)(rp->extents[i].ed_num_bytes - rp->extents[i].off));
3221  else
3222  outlen = min(rp->read, (uint32_t)(rp->extents[i].ed_num_bytes - rp->extents[i].off));
3223 
3224  decomp = ExAllocatePoolWithTag(pool_type, outlen, ALLOC_TAG);
3225  if (!decomp) {
3226  ERR("out of memory\n");
3228  goto exit;
3229  }
3230 
3231  ccj = (comp_calc_job*)ExAllocatePoolWithTag(pool_type, sizeof(comp_calc_job), ALLOC_TAG);
3232  if (!ccj) {
3233  ERR("out of memory\n");
3234 
3235  ExFreePool(decomp);
3236 
3238  goto exit;
3239  }
3240 
3241  ccj->data = rp->data;
3242  ccj->decomp = decomp;
3243 
3244  ccj->offset = off2;
3245  ccj->length = (size_t)min(rp->read, rp->extents[i].ed_num_bytes - rp->extents[i].off);
3246 
3247  Status = add_calc_job_decomp(fcb->Vcb, rp->compression, buf2, inlen, decomp, outlen,
3248  inpageoff, &ccj->cj);
3249  if (!NT_SUCCESS(Status)) {
3250  ERR("add_calc_job_decomp returned %08lx\n", Status);
3251 
3252  ExFreePool(decomp);
3253  ExFreePool(ccj);
3254 
3255  goto exit;
3256  }
3257 
3258  InsertTailList(&calc_jobs, &ccj->list_entry);
3259 
3260  buf += rp->extents[i].ed_size;
3261  rp->data = (uint8_t*)rp->data + rp->extents[i].ed_num_bytes - rp->extents[i].off;
3262  rp->read -= (uint32_t)(rp->extents[i].ed_num_bytes - rp->extents[i].off);
3263  }
3264  }
3265 
3266  le = le->Flink;
3267  }
3268 
3269  if (length > 0 && start + bytes_read < fcb->inode_item.st_size) {
3270  uint32_t read = (uint32_t)min(fcb->inode_item.st_size - start - bytes_read, length);
3271 
3272  RtlZeroMemory(data + bytes_read, read);
3273 
3274  bytes_read += read;
3275  length -= read;
3276  }
3277 
3279 
3280  while (!IsListEmpty(&calc_jobs)) {
3282 
3283  calc_thread_main(fcb->Vcb, ccj->cj);
3284 
3286 
3287  if (!NT_SUCCESS(ccj->cj->Status))
3288  Status = ccj->cj->Status;
3289 
3290  RtlCopyMemory(ccj->data, (uint8_t*)ccj->decomp + ccj->offset, ccj->length);
3291  ExFreePool(ccj->decomp);
3292 
3293  ExFreePool(ccj);
3294  }
3295 
3296  if (pbr)
3297  *pbr = bytes_read;
3298 
3299 exit:
3300  while (!IsListEmpty(&read_parts)) {
3302 
3303  if (rp->buf_free)
3304  ExFreePool(rp->buf);
3305 
3306  if (rp->csum_free)
3307  ExFreePool(rp->csum);
3308 
3309  ExFreePool(rp);
3310  }
3311 
3312  while (!IsListEmpty(&calc_jobs)) {
3314 
3316 
3317  if (ccj->decomp)
3318  ExFreePool(ccj->decomp);
3319 
3320  ExFreePool(ccj->cj);
3321 
3322  ExFreePool(ccj);
3323  }
3324 
3325  return Status;
3326 }
3327 
3328 NTSTATUS do_read(PIRP Irp, bool wait, ULONG* bytes_read) {
3331  fcb* fcb = FileObject->FsContext;
3332  uint8_t* data = NULL;
3333  ULONG length = IrpSp->Parameters.Read.Length, addon = 0;
3334  uint64_t start = IrpSp->Parameters.Read.ByteOffset.QuadPart;
3335 
3336  *bytes_read = 0;
3337 
3338  if (!fcb || !fcb->Vcb || !fcb->subvol)
3339  return STATUS_INTERNAL_ERROR;
3340 
3341  TRACE("fcb = %p\n", fcb);
3342  TRACE("offset = %I64x, length = %lx\n", start, length);
3343  TRACE("paging_io = %s, no cache = %s\n", Irp->Flags & IRP_PAGING_IO ? "true" : "false", Irp->Flags & IRP_NOCACHE ? "true" : "false");
3344 
3345  if (!fcb->ads && fcb->type == BTRFS_TYPE_DIRECTORY)
3347 
3348  if (!(Irp->Flags & IRP_PAGING_IO) && !FsRtlCheckLockForReadAccess(&fcb->lock, Irp)) {
3349  WARN("tried to read locked region\n");
3351  }
3352 
3353  if (length == 0) {
3354  TRACE("tried to read zero bytes\n");
3355  return STATUS_SUCCESS;
3356  }
3357 
3358  if (start >= (uint64_t)fcb->Header.FileSize.QuadPart) {
3359  TRACE("tried to read with offset after file end (%I64x >= %I64x)\n", start, fcb->Header.FileSize.QuadPart);
3360  return STATUS_END_OF_FILE;
3361  }
3362 
3363  TRACE("FileObject %p fcb %p FileSize = %I64x st_size = %I64x (%p)\n", FileObject, fcb, fcb->Header.FileSize.QuadPart, fcb->inode_item.st_size, &fcb->inode_item.st_size);
3364 
3365  if (!(Irp->Flags & IRP_NOCACHE) && IrpSp->MinorFunction & IRP_MN_MDL) {
3367 
3368  _SEH2_TRY {
3369  if (!FileObject->PrivateCacheMap) {
3370  CC_FILE_SIZES ccfs;
3371 
3372  ccfs.AllocationSize = fcb->Header.AllocationSize;
3373  ccfs.FileSize = fcb->Header.FileSize;
3374  ccfs.ValidDataLength = fcb->Header.ValidDataLength;
3375 
3376  init_file_cache(FileObject, &ccfs);
3377  }
3378 
3379  CcMdlRead(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, &Irp->MdlAddress, &Irp->IoStatus);
3382  } _SEH2_END;
3383 
3384  if (NT_SUCCESS(Status)) {
3385  Status = Irp->IoStatus.Status;
3386  Irp->IoStatus.Information += addon;
3387  *bytes_read = (ULONG)Irp->IoStatus.Information;
3388  } else
3389  ERR("EXCEPTION - %08lx\n", Status);
3390 
3391  return Status;
3392  }
3393 
3395 
3396  if (Irp->MdlAddress && !data) {
3397  ERR("MmGetSystemAddressForMdlSafe returned NULL\n");
3399  }
3400 
3401  if (start >= (uint64_t)fcb->Header.ValidDataLength.QuadPart) {
3402  length = (ULONG)min(length, min(start + length, (uint64_t)fcb->Header.FileSize.QuadPart) - fcb->Header.ValidDataLength.QuadPart);
3404  Irp->IoStatus.Information = *bytes_read = length;
3405  return STATUS_SUCCESS;
3406  }
3407 
3408  if (length + start > (uint64_t)fcb->Header.ValidDataLength.QuadPart) {
3409  addon = (ULONG)(min(start + length, (uint64_t)fcb->Header.FileSize.QuadPart) - fcb->Header.ValidDataLength.QuadPart);
3410  RtlZeroMemory(data + (fcb->Header.ValidDataLength.QuadPart - start), addon);
3411  length = (ULONG)(fcb->Header.ValidDataLength.QuadPart - start);
3412  }
3413 
3414  if (!(Irp->Flags & IRP_NOCACHE)) {
3416 
3417  _SEH2_TRY {
3418  if (!FileObject->PrivateCacheMap) {
3419  CC_FILE_SIZES ccfs;
3420 
3421  ccfs.AllocationSize = fcb->Header.AllocationSize;
3422  ccfs.FileSize = fcb->Header.FileSize;
3423  ccfs.ValidDataLength = fcb->Header.ValidDataLength;
3424 
3425  init_file_cache(FileObject, &ccfs);
3426  }
3427 
3428  if (fCcCopyReadEx) {
3429  TRACE("CcCopyReadEx(%p, %I64x, %lx, %u, %p, %p, %p)\n", FileObject, IrpSp->Parameters.Read.ByteOffset.QuadPart,
3430  length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread);
3431  TRACE("sizes = %I64x, %I64x, %I64x\n", fcb->Header.AllocationSize.QuadPart, fcb->Header.FileSize.QuadPart, fcb->Header.ValidDataLength.QuadPart);
3432  if (!fCcCopyReadEx(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread)) {
3433  TRACE("CcCopyReadEx could not wait\n");
3434 
3436  return STATUS_PENDING;
3437  }
3438  TRACE("CcCopyReadEx finished\n");
3439  } else {
3440  TRACE("CcCopyRead(%p, %I64x, %lx, %u, %p, %p)\n", FileObject, IrpSp->Parameters.Read.ByteOffset.QuadPart, length, wait, data, &Irp->IoStatus);
3441  TRACE("sizes = %I64x, %I64x, %I64x\n", fcb->Header.AllocationSize.QuadPart, fcb->Header.FileSize.QuadPart, fcb->Header.ValidDataLength.QuadPart);
3442  if (!CcCopyRead(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus)) {
3443  TRACE("CcCopyRead could not wait\n");
3444 
3446  return STATUS_PENDING;
3447  }
3448  TRACE("CcCopyRead finished\n");
3449  }
3452  } _SEH2_END;
3453 
3454  if (NT_SUCCESS(Status)) {
3455  Status = Irp->IoStatus.Status;
3456  Irp->IoStatus.Information += addon;
3457  *bytes_read = (ULONG)Irp->IoStatus.Information;
3458  } else
3459  ERR("EXCEPTION - %08lx\n", Status);
3460 
3461  return Status;
3462  } else {
3463  NTSTATUS Status;
3464 
3465  if (!wait) {
3467  return STATUS_PENDING;
3468  }
3469 
3470  if (fcb->ads) {
3471  Status = read_stream(fcb, data, start, length, bytes_read);
3472 
3473  if (!NT_SUCCESS(Status))
3474  ERR("read_stream returned %08lx\n", Status);
3475  } else {
3476  Status = read_file(fcb, data, start, length, bytes_read, Irp);
3477 
3478  if (!NT_SUCCESS(Status))
3479  ERR("read_file returned %08lx\n", Status);
3480  }
3481 
3482  *bytes_read += addon;
3483  TRACE("read %lu bytes\n", *bytes_read);
3484 
3485  Irp->IoStatus.Information = *bytes_read;
3486 
3487  if (diskacc && Status != STATUS_PENDING) {
3488  PETHREAD thread = NULL;
3489 
3490  if (Irp->Tail.Overlay.Thread && !IoIsSystemThread(Irp->Tail.Overlay.Thread))
3491  thread = Irp->Tail.Overlay.Thread;
3492  else if (!IoIsSystemThread(PsGetCurrentThread()))
3496 
3497  if (thread)
3498  fPsUpdateDiskCounters(PsGetThreadProcess(thread), *bytes_read, 0, 1, 0, 0);
3499  }
3500 
3501  return Status;
3502  }
3503 }
3504 
3508  device_extension* Vcb = DeviceObject->DeviceExtension;
3511  ULONG bytes_read = 0;
3512  NTSTATUS Status;
3513  bool top_level;
3514  fcb* fcb;
3515  ccb* ccb;
3516  bool acquired_fcb_lock = false, wait;
3517 
3519 
3520  top_level = is_top_level(Irp);
3521 
3522  TRACE("read\n");
3523 
3524  if (Vcb && Vcb->type == VCB_TYPE_VOLUME) {
3526  goto exit2;
3527  } else if (!Vcb || Vcb->type != VCB_TYPE_FS) {
3529  goto end;
3530  }
3531 
3532  Irp->IoStatus.Information = 0;
3533 
3535  CcMdlReadComplete(IrpSp->FileObject, Irp->MdlAddress);
3536 
3537  Irp->MdlAddress = NULL;
3539 
3540  goto exit;
3541  }
3542 
3543  fcb = FileObject->FsContext;
3544 
3545  if (!fcb) {
3546  ERR("fcb was NULL\n");
3548  goto exit;
3549  }
3550 
3551  ccb = FileObject->FsContext2;
3552 
3553  if (!ccb) {
3554  ERR("ccb was NULL\n");
3556  goto exit;
3557  }
3558 
3559  if (Irp->RequestorMode == UserMode && !(ccb->access & FILE_READ_DATA)) {
3560  WARN("insufficient privileges\n");
3562  goto exit;
3563  }
3564 
3565  if (fcb == Vcb->volume_fcb) {
3566  TRACE("reading volume FCB\n");
3567