ReactOS  0.4.14-dev-606-g14ebc0b
read.c
Go to the documentation of this file.
1 /* Copyright (c) Mark Harmstone 2016-17
2  *
3  * This file is part of WinBtrfs.
4  *
5  * WinBtrfs is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public Licence as published by
7  * the Free Software Foundation, either version 3 of the Licence, or
8  * (at your option) any later version.
9  *
10  * WinBtrfs is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser General Public Licence for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public Licence
16  * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
17 
18 #include "btrfs_drv.h"
19 
26 };
27 
28 struct read_data_context;
29 
30 typedef struct {
33  bool rewrite;
41 
42 typedef struct {
45  chunk* c;
53  bool tree;
57 
58 extern bool diskacc;
62 
63 #define LZO_PAGE_SIZE 4096
64 
65 _Function_class_(IO_COMPLETION_ROUTINE)
66 static NTSTATUS __stdcall read_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
67  read_data_stripe* stripe = conptr;
69 
71 
72  stripe->iosb = Irp->IoStatus;
73 
74  if (NT_SUCCESS(Irp->IoStatus.Status))
76  else
77  stripe->status = ReadDataStatus_Error;
78 
79  if (InterlockedDecrement(&context->stripes_left) == 0)
80  KeSetEvent(&context->Event, 0, false);
81 
83 }
84 
87  calc_job* cj;
88  uint32_t* csum2;
89 
90  // From experimenting, it seems that 40 sectors is roughly the crossover
91  // point where offloading the crc32 calculation becomes worth it.
92 
93  if (sectors < 40 || get_num_of_processors() < 2) {
94  ULONG j;
95 
96  for (j = 0; j < sectors; j++) {
97  uint32_t crc32 = ~calc_crc32c(0xffffffff, data + (j * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
98 
99  if (crc32 != csum[j]) {
100  return STATUS_CRC_ERROR;
101  }
102  }
103 
104  return STATUS_SUCCESS;
105  }
106 
108  if (!csum2) {
109  ERR("out of memory\n");
111  }
112 
113  Status = add_calc_job(Vcb, data, sectors, csum2, &cj);
114  if (!NT_SUCCESS(Status)) {
115  ERR("add_calc_job returned %08x\n", Status);
116  ExFreePool(csum2);
117  return Status;
118  }
119 
120  KeWaitForSingleObject(&cj->event, Executive, KernelMode, false, NULL);
121 
122  if (RtlCompareMemory(csum2, csum, sectors * sizeof(uint32_t)) != sectors * sizeof(uint32_t)) {
123  free_calc_job(cj);
124  ExFreePool(csum2);
125  return STATUS_CRC_ERROR;
126  }
127 
128  free_calc_job(cj);
129  ExFreePool(csum2);
130 
131  return STATUS_SUCCESS;
132 }
133 
136  ULONG i;
137  bool checksum_error = false;
138  uint16_t j, stripe = 0;
140  CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
141 
142  for (j = 0; j < ci->num_stripes; j++) {
143  if (context->stripes[j].status == ReadDataStatus_Error) {
144  WARN("stripe %u returned error %08x\n", j, context->stripes[j].iosb.Status);
146  return context->stripes[j].iosb.Status;
147  } else if (context->stripes[j].status == ReadDataStatus_Success) {
148  stripe = j;
149  break;
150  }
151  }
152 
153  if (context->stripes[stripe].status != ReadDataStatus_Success)
154  return STATUS_INTERNAL_ERROR;
155 
156  if (context->tree) {
157  tree_header* th = (tree_header*)buf;
158  uint32_t crc32;
159 
160  crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, context->buflen - sizeof(th->csum));
161 
162  if (th->address != context->address || crc32 != *((uint32_t*)th->csum)) {
163  checksum_error = true;
165  } else if (generation != 0 && th->generation != generation) {
166  checksum_error = true;
168  }
169  } else if (context->csum) {
170  Status = check_csum(Vcb, buf, (ULONG)context->stripes[stripe].Irp->IoStatus.Information / context->sector_size, context->csum);
171 
172  if (Status == STATUS_CRC_ERROR) {
173  checksum_error = true;
175  } else if (!NT_SUCCESS(Status)) {
176  ERR("check_csum returned %08x\n", Status);
177  return Status;
178  }
179  }
180 
181  if (!checksum_error)
182  return STATUS_SUCCESS;
183 
184  if (ci->num_stripes == 1)
185  return STATUS_CRC_ERROR;
186 
187  if (context->tree) {
188  tree_header* t2;
189  bool recovered = false;
190 
191  t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
192  if (!t2) {
193  ERR("out of memory\n");
195  }
196 
197  for (j = 0; j < ci->num_stripes; j++) {
198  if (j != stripe && devices[j] && devices[j]->devobj) {
199  Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + context->stripes[stripe].stripestart,
200  Vcb->superblock.node_size, (uint8_t*)t2, false);
201  if (!NT_SUCCESS(Status)) {
202  WARN("sync_read_phys returned %08x\n", Status);
204  } else {
205  uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&t2->fs_uuid, Vcb->superblock.node_size - sizeof(t2->csum));
206 
207  if (t2->address == addr && crc32 == *((uint32_t*)t2->csum) && (generation == 0 || t2->generation == generation)) {
208  RtlCopyMemory(buf, t2, Vcb->superblock.node_size);
209  ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id);
210  recovered = true;
211 
212  if (!Vcb->readonly && !devices[stripe]->readonly) { // write good data over bad
213  Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj, cis[stripe].offset + context->stripes[stripe].stripestart,
214  t2, Vcb->superblock.node_size);
215  if (!NT_SUCCESS(Status)) {
216  WARN("write_data_phys returned %08x\n", Status);
218  }
219  }
220 
221  break;
222  } else if (t2->address != addr || crc32 != *((uint32_t*)t2->csum))
224  else
226  }
227  }
228  }
229 
230  if (!recovered) {
231  ERR("unrecoverable checksum error at %I64x\n", addr);
232  ExFreePool(t2);
233  return STATUS_CRC_ERROR;
234  }
235 
236  ExFreePool(t2);
237  } else {
238  ULONG sectors = (ULONG)context->stripes[stripe].Irp->IoStatus.Information / Vcb->superblock.sector_size;
239  uint8_t* sector;
240 
241  sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size, ALLOC_TAG);
242  if (!sector) {
243  ERR("out of memory\n");
245  }
246 
247  for (i = 0; i < sectors; i++) {
248  uint32_t crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
249 
250  if (context->csum[i] != crc32) {
251  bool recovered = false;
252 
253  for (j = 0; j < ci->num_stripes; j++) {
254  if (j != stripe && devices[j] && devices[j]->devobj) {
255  Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj,
256  cis[j].offset + context->stripes[stripe].stripestart + UInt32x32To64(i, Vcb->superblock.sector_size),
257  Vcb->superblock.sector_size, sector, false);
258  if (!NT_SUCCESS(Status)) {
259  WARN("sync_read_phys returned %08x\n", Status);
261  } else {
262  uint32_t crc32b = ~calc_crc32c(0xffffffff, sector, Vcb->superblock.sector_size);
263 
264  if (crc32b == context->csum[i]) {
265  RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size);
266  ERR("recovering from checksum error at %I64x, device %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe]->devitem.dev_id);
267  recovered = true;
268 
269  if (!Vcb->readonly && !devices[stripe]->readonly) { // write good data over bad
270  Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj,
271  cis[stripe].offset + context->stripes[stripe].stripestart + UInt32x32To64(i, Vcb->superblock.sector_size),
272  sector, Vcb->superblock.sector_size);
273  if (!NT_SUCCESS(Status)) {
274  WARN("write_data_phys returned %08x\n", Status);
276  }
277  }
278 
279  break;
280  } else
282  }
283  }
284  }
285 
286  if (!recovered) {
287  ERR("unrecoverable checksum error at %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size));
289  return STATUS_CRC_ERROR;
290  }
291  }
292  }
293 
295  }
296 
297  return STATUS_SUCCESS;
298 }
299 
302  uint64_t i;
303 
304  for (i = 0; i < ci->num_stripes; i++) {
305  if (context->stripes[i].status == ReadDataStatus_Error) {
306  WARN("stripe %I64u returned error %08x\n", i, context->stripes[i].iosb.Status);
308  return context->stripes[i].iosb.Status;
309  }
310  }
311 
312  if (context->tree) { // shouldn't happen, as trees shouldn't cross stripe boundaries
313  tree_header* th = (tree_header*)buf;
314  uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
315 
316  if (crc32 != *((uint32_t*)th->csum) || addr != th->address || (generation != 0 && generation != th->generation)) {
317  uint64_t off;
319 
321 
322  ERR("unrecoverable checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id);
323 
324  if (crc32 != *((uint32_t*)th->csum)) {
325  WARN("crc32 was %08x, expected %08x\n", crc32, *((uint32_t*)th->csum));
327  return STATUS_CRC_ERROR;
328  } else if (addr != th->address) {
329  WARN("address of tree was %I64x, not %I64x as expected\n", th->address, addr);
331  return STATUS_CRC_ERROR;
332  } else if (generation != 0 && generation != th->generation) {
333  WARN("generation of tree was %I64x, not %I64x as expected\n", th->generation, generation);
335  return STATUS_CRC_ERROR;
336  }
337  }
338  } else if (context->csum) {
340 
341  Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum);
342 
343  if (Status == STATUS_CRC_ERROR) {
344  for (i = 0; i < length / Vcb->superblock.sector_size; i++) {
345  uint32_t crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
346 
347  if (context->csum[i] != crc32) {
348  uint64_t off;
350 
351  get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length, ci->num_stripes, &off, &stripe);
352 
353  ERR("unrecoverable checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id);
354 
356 
357  return Status;
358  }
359  }
360 
361  return Status;
362  } else if (!NT_SUCCESS(Status)) {
363  ERR("check_csum returned %08x\n", Status);
364  return Status;
365  }
366  }
367 
368  return STATUS_SUCCESS;
369 }
370 
373  uint64_t i;
374  uint16_t j, stripe;
376  bool checksum_error = false;
377  CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
378 
379  for (j = 0; j < ci->num_stripes; j++) {
380  if (context->stripes[j].status == ReadDataStatus_Error) {
381  WARN("stripe %I64u returned error %08x\n", j, context->stripes[j].iosb.Status);
383  return context->stripes[j].iosb.Status;
384  } else if (context->stripes[j].status == ReadDataStatus_Success)
385  stripe = j;
386  }
387 
388  if (context->tree) {
389  tree_header* th = (tree_header*)buf;
390  uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
391 
392  if (crc32 != *((uint32_t*)th->csum)) {
393  WARN("crc32 was %08x, expected %08x\n", crc32, *((uint32_t*)th->csum));
394  checksum_error = true;
396  } else if (addr != th->address) {
397  WARN("address of tree was %I64x, not %I64x as expected\n", th->address, addr);
398  checksum_error = true;
400  } else if (generation != 0 && generation != th->generation) {
401  WARN("generation of tree was %I64x, not %I64x as expected\n", th->generation, generation);
402  checksum_error = true;
404  }
405  } else if (context->csum) {
406  Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum);
407 
408  if (Status == STATUS_CRC_ERROR)
409  checksum_error = true;
410  else if (!NT_SUCCESS(Status)) {
411  ERR("check_csum returned %08x\n", Status);
412  return Status;
413  }
414  }
415 
416  if (!checksum_error)
417  return STATUS_SUCCESS;
418 
419  if (context->tree) {
420  tree_header* t2;
421  uint64_t off;
422  uint16_t badsubstripe = 0;
423  bool recovered = false;
424 
425  t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
426  if (!t2) {
427  ERR("out of memory\n");
429  }
430 
432 
433  stripe *= ci->sub_stripes;
434 
435  for (j = 0; j < ci->sub_stripes; j++) {
436  if (context->stripes[stripe + j].status == ReadDataStatus_Success) {
437  badsubstripe = j;
438  break;
439  }
440  }
441 
442  for (j = 0; j < ci->sub_stripes; j++) {
443  if (context->stripes[stripe + j].status != ReadDataStatus_Success && devices[stripe + j] && devices[stripe + j]->devobj) {
444  Status = sync_read_phys(devices[stripe + j]->devobj, devices[stripe + j]->fileobj, cis[stripe + j].offset + off,
445  Vcb->superblock.node_size, (uint8_t*)t2, false);
446  if (!NT_SUCCESS(Status)) {
447  WARN("sync_read_phys returned %08x\n", Status);
449  } else {
450  uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&t2->fs_uuid, Vcb->superblock.node_size - sizeof(t2->csum));
451 
452  if (t2->address == addr && crc32 == *((uint32_t*)t2->csum) && (generation == 0 || t2->generation == generation)) {
453  RtlCopyMemory(buf, t2, Vcb->superblock.node_size);
454  ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[stripe + j]->devitem.dev_id);
455  recovered = true;
456 
457  if (!Vcb->readonly && !devices[stripe + badsubstripe]->readonly && devices[stripe + badsubstripe]->devobj) { // write good data over bad
458  Status = write_data_phys(devices[stripe + badsubstripe]->devobj, devices[stripe + badsubstripe]->fileobj,
459  cis[stripe + badsubstripe].offset + off, t2, Vcb->superblock.node_size);
460  if (!NT_SUCCESS(Status)) {
461  WARN("write_data_phys returned %08x\n", Status);
463  }
464  }
465 
466  break;
467  } else if (t2->address != addr || crc32 != *((uint32_t*)t2->csum))
469  else
471  }
472  }
473  }
474 
475  if (!recovered) {
476  ERR("unrecoverable checksum error at %I64x\n", addr);
477  ExFreePool(t2);
478  return STATUS_CRC_ERROR;
479  }
480 
481  ExFreePool(t2);
482  } else {
483  ULONG sectors = length / Vcb->superblock.sector_size;
484  uint8_t* sector;
485 
486  sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size, ALLOC_TAG);
487  if (!sector) {
488  ERR("out of memory\n");
490  }
491 
492  for (i = 0; i < sectors; i++) {
493  uint32_t crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
494 
495  if (context->csum[i] != crc32) {
496  uint64_t off;
497  uint16_t stripe2, badsubstripe = 0;
498  bool recovered = false;
499 
500  get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length,
501  ci->num_stripes / ci->sub_stripes, &off, &stripe2);
502 
503  stripe2 *= ci->sub_stripes;
504 
505  for (j = 0; j < ci->sub_stripes; j++) {
506  if (context->stripes[stripe2 + j].status == ReadDataStatus_Success) {
507  badsubstripe = j;
508  break;
509  }
510  }
511 
513 
514  for (j = 0; j < ci->sub_stripes; j++) {
515  if (context->stripes[stripe2 + j].status != ReadDataStatus_Success && devices[stripe2 + j] && devices[stripe2 + j]->devobj) {
516  Status = sync_read_phys(devices[stripe2 + j]->devobj, devices[stripe2 + j]->fileobj, cis[stripe2 + j].offset + off,
517  Vcb->superblock.sector_size, sector, false);
518  if (!NT_SUCCESS(Status)) {
519  WARN("sync_read_phys returned %08x\n", Status);
521  } else {
522  uint32_t crc32b = ~calc_crc32c(0xffffffff, sector, Vcb->superblock.sector_size);
523 
524  if (crc32b == context->csum[i]) {
525  RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size);
526  ERR("recovering from checksum error at %I64x, device %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe2 + j]->devitem.dev_id);
527  recovered = true;
528 
529  if (!Vcb->readonly && !devices[stripe2 + badsubstripe]->readonly && devices[stripe2 + badsubstripe]->devobj) { // write good data over bad
530  Status = write_data_phys(devices[stripe2 + badsubstripe]->devobj, devices[stripe2 + badsubstripe]->fileobj,
531  cis[stripe2 + badsubstripe].offset + off, sector, Vcb->superblock.sector_size);
532  if (!NT_SUCCESS(Status)) {
533  WARN("write_data_phys returned %08x\n", Status);
534  log_device_error(Vcb, devices[stripe2 + badsubstripe], BTRFS_DEV_STAT_READ_ERRORS);
535  }
536  }
537 
538  break;
539  } else
541  }
542  }
543  }
544 
545  if (!recovered) {
546  ERR("unrecoverable checksum error at %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size));
548  return STATUS_CRC_ERROR;
549  }
550  }
551  }
552 
554  }
555 
556  return STATUS_SUCCESS;
557 }
558 
560  device** devices, uint64_t offset, uint64_t generation, chunk* c, bool degraded) {
561  ULONG i;
563  bool checksum_error = false;
564  CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
565  uint16_t j, stripe;
566  bool no_success = true;
567 
568  for (j = 0; j < ci->num_stripes; j++) {
569  if (context->stripes[j].status == ReadDataStatus_Error) {
570  WARN("stripe %u returned error %08x\n", j, context->stripes[j].iosb.Status);
572  return context->stripes[j].iosb.Status;
573  } else if (context->stripes[j].status == ReadDataStatus_Success) {
574  stripe = j;
575  no_success = false;
576  }
577  }
578 
579  if (c) { // check partial stripes
580  LIST_ENTRY* le;
581  uint64_t ps_length = (ci->num_stripes - 1) * ci->stripe_length;
582 
583  ExAcquireResourceSharedLite(&c->partial_stripes_lock, true);
584 
585  le = c->partial_stripes.Flink;
586  while (le != &c->partial_stripes) {
588 
589  if (ps->address + ps_length > addr && ps->address < addr + length) {
590  ULONG runlength, index;
591 
592  runlength = RtlFindFirstRunClear(&ps->bmp, &index);
593 
594  while (runlength != 0) {
595 #ifdef __REACTOS__
596  uint64_t runstart, runend, start, end;
597 #endif
598  if (index >= ps->bmplen)
599  break;
600 
601  if (index + runlength >= ps->bmplen) {
602  runlength = ps->bmplen - index;
603 
604  if (runlength == 0)
605  break;
606  }
607 
608 #ifndef __REACTOS__
609  uint64_t runstart = ps->address + (index * Vcb->superblock.sector_size);
610  uint64_t runend = runstart + (runlength * Vcb->superblock.sector_size);
611  uint64_t start = max(runstart, addr);
612  uint64_t end = min(runend, addr + length);
613 #else
614  runstart = ps->address + (index * Vcb->superblock.sector_size);
615  runend = runstart + (runlength * Vcb->superblock.sector_size);
616  start = max(runstart, addr);
617  end = min(runend, addr + length);
618 #endif
619 
620  if (end > start)
621  RtlCopyMemory(buf + start - addr, &ps->data[start - ps->address], (ULONG)(end - start));
622 
623  runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index);
624  }
625  } else if (ps->address >= addr + length)
626  break;
627 
628  le = le->Flink;
629  }
630 
631  ExReleaseResourceLite(&c->partial_stripes_lock);
632  }
633 
634  if (context->tree) {
635  tree_header* th = (tree_header*)buf;
636  uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
637 
638  if (addr != th->address || crc32 != *((uint32_t*)th->csum)) {
639  checksum_error = true;
640  if (!no_success && !degraded)
642  } else if (generation != 0 && generation != th->generation) {
643  checksum_error = true;
644  if (!no_success && !degraded)
646  }
647  } else if (context->csum) {
648  Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum);
649 
650  if (Status == STATUS_CRC_ERROR) {
651  if (!degraded)
652  WARN("checksum error\n");
653  checksum_error = true;
654  } else if (!NT_SUCCESS(Status)) {
655  ERR("check_csum returned %08x\n", Status);
656  return Status;
657  }
658  } else if (degraded)
659  checksum_error = true;
660 
661  if (!checksum_error)
662  return STATUS_SUCCESS;
663 
664  if (context->tree) {
665  uint16_t parity;
666  uint64_t off;
667  bool recovered = false, first = true, failed = false;
668  uint8_t* t2;
669 
670  t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size * 2, ALLOC_TAG);
671  if (!t2) {
672  ERR("out of memory\n");
674  }
675 
677 
678  parity = (((addr - offset) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
679 
680  stripe = (parity + stripe + 1) % ci->num_stripes;
681 
682  for (j = 0; j < ci->num_stripes; j++) {
683  if (j != stripe) {
684  if (devices[j] && devices[j]->devobj) {
685  if (first) {
686  Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.node_size, t2, false);
687  if (!NT_SUCCESS(Status)) {
688  ERR("sync_read_phys returned %08x\n", Status);
690  failed = true;
691  break;
692  }
693 
694  first = false;
695  } else {
696  Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.node_size, t2 + Vcb->superblock.node_size, false);
697  if (!NT_SUCCESS(Status)) {
698  ERR("sync_read_phys returned %08x\n", Status);
700  failed = true;
701  break;
702  }
703 
704  do_xor(t2, t2 + Vcb->superblock.node_size, Vcb->superblock.node_size);
705  }
706  } else {
707  failed = true;
708  break;
709  }
710  }
711  }
712 
713  if (!failed) {
714  tree_header* t3 = (tree_header*)t2;
715  uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&t3->fs_uuid, Vcb->superblock.node_size - sizeof(t3->csum));
716 
717  if (t3->address == addr && crc32 == *((uint32_t*)t3->csum) && (generation == 0 || t3->generation == generation)) {
718  RtlCopyMemory(buf, t2, Vcb->superblock.node_size);
719 
720  if (!degraded)
721  ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id);
722 
723  recovered = true;
724 
725  if (!Vcb->readonly && devices[stripe] && !devices[stripe]->readonly && devices[stripe]->devobj) { // write good data over bad
726  Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj, cis[stripe].offset + off, t2, Vcb->superblock.node_size);
727  if (!NT_SUCCESS(Status)) {
728  WARN("write_data_phys returned %08x\n", Status);
730  }
731  }
732  }
733  }
734 
735  if (!recovered) {
736  ERR("unrecoverable checksum error at %I64x\n", addr);
737  ExFreePool(t2);
738  return STATUS_CRC_ERROR;
739  }
740 
741  ExFreePool(t2);
742  } else {
743  ULONG sectors = length / Vcb->superblock.sector_size;
744  uint8_t* sector;
745 
746  sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size * 2, ALLOC_TAG);
747  if (!sector) {
748  ERR("out of memory\n");
750  }
751 
752  for (i = 0; i < sectors; i++) {
753  uint16_t parity;
754  uint64_t off;
755  uint32_t crc32;
756 
757  if (context->csum)
758  crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
759 
760  get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length,
761  ci->num_stripes - 1, &off, &stripe);
762 
763  parity = (((addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size)) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
764 
765  stripe = (parity + stripe + 1) % ci->num_stripes;
766 
767  if (!devices[stripe] || !devices[stripe]->devobj || (context->csum && context->csum[i] != crc32)) {
768  bool recovered = false, first = true, failed = false;
769 
770  if (devices[stripe] && devices[stripe]->devobj)
772 
773  for (j = 0; j < ci->num_stripes; j++) {
774  if (j != stripe) {
775  if (devices[j] && devices[j]->devobj) {
776  if (first) {
777  Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.sector_size, sector, false);
778  if (!NT_SUCCESS(Status)) {
779  ERR("sync_read_phys returned %08x\n", Status);
780  failed = true;
782  break;
783  }
784 
785  first = false;
786  } else {
787  Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.sector_size,
788  sector + Vcb->superblock.sector_size, false);
789  if (!NT_SUCCESS(Status)) {
790  ERR("sync_read_phys returned %08x\n", Status);
791  failed = true;
793  break;
794  }
795 
796  do_xor(sector, sector + Vcb->superblock.sector_size, Vcb->superblock.sector_size);
797  }
798  } else {
799  failed = true;
800  break;
801  }
802  }
803  }
804 
805  if (!failed) {
806  if (context->csum)
807  crc32 = ~calc_crc32c(0xffffffff, sector, Vcb->superblock.sector_size);
808 
809  if (!context->csum || crc32 == context->csum[i]) {
810  RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size);
811 
812  if (!degraded)
813  ERR("recovering from checksum error at %I64x, device %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe]->devitem.dev_id);
814 
815  recovered = true;
816 
817  if (!Vcb->readonly && devices[stripe] && !devices[stripe]->readonly && devices[stripe]->devobj) { // write good data over bad
818  Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj, cis[stripe].offset + off,
819  sector, Vcb->superblock.sector_size);
820  if (!NT_SUCCESS(Status)) {
821  WARN("write_data_phys returned %08x\n", Status);
823  }
824  }
825  }
826  }
827 
828  if (!recovered) {
829  ERR("unrecoverable checksum error at %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size));
831  return STATUS_CRC_ERROR;
832  }
833  }
834  }
835 
837  }
838 
839  return STATUS_SUCCESS;
840 }
841 
842 void raid6_recover2(uint8_t* sectors, uint16_t num_stripes, ULONG sector_size, uint16_t missing1, uint16_t missing2, uint8_t* out) {
843  if (missing1 == num_stripes - 2 || missing2 == num_stripes - 2) { // reconstruct from q and data
844  uint16_t missing = missing1 == (num_stripes - 2) ? missing2 : missing1;
846 
847  stripe = num_stripes - 3;
848 
849  if (stripe == missing)
851  else
853 
854  do {
855  stripe--;
856 
858 
859  if (stripe != missing)
861  } while (stripe > 0);
862 
863  do_xor(out, sectors + ((num_stripes - 1) * sector_size), sector_size);
864 
865  if (missing != 0)
867  } else { // reconstruct from p and q
868  uint16_t x, y, stripe;
869  uint8_t gyx, gx, denom, a, b, *p, *q, *pxy, *qxy;
870  uint32_t j;
871 
872  stripe = num_stripes - 3;
873 
874  pxy = out + sector_size;
875  qxy = out;
876 
877  if (stripe == missing1 || stripe == missing2) {
880 
881  if (stripe == missing1)
882  x = stripe;
883  else
884  y = stripe;
885  } else {
888  }
889 
890  do {
891  stripe--;
892 
894 
895  if (stripe != missing1 && stripe != missing2) {
898  } else if (stripe == missing1)
899  x = stripe;
900  else if (stripe == missing2)
901  y = stripe;
902  } while (stripe > 0);
903 
904  gyx = gpow2(y > x ? (y-x) : (255-x+y));
905  gx = gpow2(255-x);
906 
907  denom = gdiv(1, gyx ^ 1);
908  a = gmul(gyx, denom);
909  b = gmul(gx, denom);
910 
911  p = sectors + ((num_stripes - 2) * sector_size);
912  q = sectors + ((num_stripes - 1) * sector_size);
913 
914  for (j = 0; j < sector_size; j++) {
915  *qxy = gmul(a, *p ^ *pxy) ^ gmul(b, *q ^ *qxy);
916 
917  p++;
918  q++;
919  pxy++;
920  qxy++;
921  }
922 
924  do_xor(out + sector_size, sectors + ((num_stripes - 2) * sector_size), sector_size);
925  }
926 }
927 
929  device** devices, uint64_t offset, uint64_t generation, chunk* c, bool degraded) {
931  ULONG i;
932  bool checksum_error = false;
933  CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
934  uint16_t stripe, j;
935  bool no_success = true;
936 
937  for (j = 0; j < ci->num_stripes; j++) {
938  if (context->stripes[j].status == ReadDataStatus_Error) {
939  WARN("stripe %u returned error %08x\n", j, context->stripes[j].iosb.Status);
940 
941  if (devices[j])
943  return context->stripes[j].iosb.Status;
944  } else if (context->stripes[j].status == ReadDataStatus_Success) {
945  stripe = j;
946  no_success = false;
947  }
948  }
949 
950  if (c) { // check partial stripes
951  LIST_ENTRY* le;
952  uint64_t ps_length = (ci->num_stripes - 2) * ci->stripe_length;
953 
954  ExAcquireResourceSharedLite(&c->partial_stripes_lock, true);
955 
956  le = c->partial_stripes.Flink;
957  while (le != &c->partial_stripes) {
959 
960  if (ps->address + ps_length > addr && ps->address < addr + length) {
961  ULONG runlength, index;
962 
963  runlength = RtlFindFirstRunClear(&ps->bmp, &index);
964 
965  while (runlength != 0) {
966 #ifdef __REACTOS__
967  uint64_t runstart, runend, start, end;
968 #endif
969  if (index >= ps->bmplen)
970  break;
971 
972  if (index + runlength >= ps->bmplen) {
973  runlength = ps->bmplen - index;
974 
975  if (runlength == 0)
976  break;
977  }
978 
979 #ifndef __REACTOS__
980  uint64_t runstart = ps->address + (index * Vcb->superblock.sector_size);
981  uint64_t runend = runstart + (runlength * Vcb->superblock.sector_size);
982  uint64_t start = max(runstart, addr);
983  uint64_t end = min(runend, addr + length);
984 #else
985  runstart = ps->address + (index * Vcb->superblock.sector_size);
986  runend = runstart + (runlength * Vcb->superblock.sector_size);
987  start = max(runstart, addr);
988  end = min(runend, addr + length);
989 #endif
990 
991  if (end > start)
992  RtlCopyMemory(buf + start - addr, &ps->data[start - ps->address], (ULONG)(end - start));
993 
994  runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index);
995  }
996  } else if (ps->address >= addr + length)
997  break;
998 
999  le = le->Flink;
1000  }
1001 
1002  ExReleaseResourceLite(&c->partial_stripes_lock);
1003  }
1004 
1005  if (context->tree) {
1006  tree_header* th = (tree_header*)buf;
1007  uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1008 
1009  if (addr != th->address || crc32 != *((uint32_t*)th->csum)) {
1010  checksum_error = true;
1011  if (!no_success && !degraded && devices[stripe])
1013  } else if (generation != 0 && generation != th->generation) {
1014  checksum_error = true;
1015  if (!no_success && !degraded && devices[stripe])
1017  }
1018  } else if (context->csum) {
1019  Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum);
1020 
1021  if (Status == STATUS_CRC_ERROR) {
1022  if (!degraded)
1023  WARN("checksum error\n");
1024  checksum_error = true;
1025  } else if (!NT_SUCCESS(Status)) {
1026  ERR("check_csum returned %08x\n", Status);
1027  return Status;
1028  }
1029  } else if (degraded)
1030  checksum_error = true;
1031 
1032  if (!checksum_error)
1033  return STATUS_SUCCESS;
1034 
1035  if (context->tree) {
1036  uint8_t* sector;
1037  uint16_t k, physstripe, parity1, parity2, error_stripe;
1038  uint64_t off;
1039  bool recovered = false, failed = false;
1040  ULONG num_errors = 0;
1041 
1042  sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size * (ci->num_stripes + 2), ALLOC_TAG);
1043  if (!sector) {
1044  ERR("out of memory\n");
1046  }
1047 
1049 
1050  parity1 = (((addr - offset) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
1051  parity2 = (parity1 + 1) % ci->num_stripes;
1052 
1053  physstripe = (parity2 + stripe + 1) % ci->num_stripes;
1054 
1055  j = (parity2 + 1) % ci->num_stripes;
1056 
1057  for (k = 0; k < ci->num_stripes - 1; k++) {
1058  if (j != physstripe) {
1059  if (devices[j] && devices[j]->devobj) {
1060  Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.node_size,
1061  sector + (k * Vcb->superblock.node_size), false);
1062  if (!NT_SUCCESS(Status)) {
1063  ERR("sync_read_phys returned %08x\n", Status);
1065  num_errors++;
1066  error_stripe = k;
1067 
1068  if (num_errors > 1) {
1069  failed = true;
1070  break;
1071  }
1072  }
1073  } else {
1074  num_errors++;
1075  error_stripe = k;
1076 
1077  if (num_errors > 1) {
1078  failed = true;
1079  break;
1080  }
1081  }
1082  }
1083 
1084  j = (j + 1) % ci->num_stripes;
1085  }
1086 
1087  if (!failed) {
1088  if (num_errors == 0) {
1089  tree_header* th = (tree_header*)(sector + (stripe * Vcb->superblock.node_size));
1090  uint32_t crc32;
1091 
1092  RtlCopyMemory(sector + (stripe * Vcb->superblock.node_size), sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size),
1093  Vcb->superblock.node_size);
1094 
1095  for (j = 0; j < ci->num_stripes - 2; j++) {
1096  if (j != stripe)
1097  do_xor(sector + (stripe * Vcb->superblock.node_size), sector + (j * Vcb->superblock.node_size), Vcb->superblock.node_size);
1098  }
1099 
1100  crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1101 
1102  if (th->address == addr && crc32 == *((uint32_t*)th->csum) && (generation == 0 || th->generation == generation)) {
1103  RtlCopyMemory(buf, sector + (stripe * Vcb->superblock.node_size), Vcb->superblock.node_size);
1104 
1105  if (devices[physstripe] && devices[physstripe]->devobj)
1106  ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[physstripe]->devitem.dev_id);
1107 
1108  recovered = true;
1109 
1110  if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1111  Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off,
1112  sector + (stripe * Vcb->superblock.node_size), Vcb->superblock.node_size);
1113  if (!NT_SUCCESS(Status)) {
1114  WARN("write_data_phys returned %08x\n", Status);
1116  }
1117  }
1118  }
1119  }
1120 
1121  if (!recovered) {
1122  uint32_t crc32;
1123  tree_header* th = (tree_header*)(sector + (ci->num_stripes * Vcb->superblock.node_size));
1124  bool read_q = false;
1125 
1126  if (devices[parity2] && devices[parity2]->devobj) {
1127  Status = sync_read_phys(devices[parity2]->devobj, devices[parity2]->fileobj, cis[parity2].offset + off,
1128  Vcb->superblock.node_size, sector + ((ci->num_stripes - 1) * Vcb->superblock.node_size), false);
1129  if (!NT_SUCCESS(Status)) {
1130  ERR("sync_read_phys returned %08x\n", Status);
1132  } else
1133  read_q = true;
1134  }
1135 
1136  if (read_q) {
1137  if (num_errors == 1) {
1138  raid6_recover2(sector, ci->num_stripes, Vcb->superblock.node_size, stripe, error_stripe, sector + (ci->num_stripes * Vcb->superblock.node_size));
1139 
1140  crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1141 
1142  if (th->address == addr && crc32 == *((uint32_t*)th->csum) && (generation == 0 || th->generation == generation))
1143  recovered = true;
1144  } else {
1145  for (j = 0; j < ci->num_stripes - 1; j++) {
1146  if (j != stripe) {
1147  raid6_recover2(sector, ci->num_stripes, Vcb->superblock.node_size, stripe, j, sector + (ci->num_stripes * Vcb->superblock.node_size));
1148 
1149  crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1150 
1151  if (th->address == addr && crc32 == *((uint32_t*)th->csum) && (generation == 0 || th->generation == generation)) {
1152  recovered = true;
1153  error_stripe = j;
1154  break;
1155  }
1156  }
1157  }
1158  }
1159  }
1160 
1161  if (recovered) {
1162  uint16_t error_stripe_phys = (parity2 + error_stripe + 1) % ci->num_stripes;
1163 
1164  if (devices[physstripe] && devices[physstripe]->devobj)
1165  ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[physstripe]->devitem.dev_id);
1166 
1167  RtlCopyMemory(buf, sector + (ci->num_stripes * Vcb->superblock.node_size), Vcb->superblock.node_size);
1168 
1169  if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1170  Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off,
1171  sector + (ci->num_stripes * Vcb->superblock.node_size), Vcb->superblock.node_size);
1172  if (!NT_SUCCESS(Status)) {
1173  WARN("write_data_phys returned %08x\n", Status);
1175  }
1176  }
1177 
1178  if (devices[error_stripe_phys] && devices[error_stripe_phys]->devobj) {
1179  if (error_stripe == ci->num_stripes - 2) {
1180  ERR("recovering from parity error at %I64x, device %I64x\n", addr, devices[error_stripe_phys]->devitem.dev_id);
1181 
1183 
1184  RtlZeroMemory(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), Vcb->superblock.node_size);
1185 
1186  for (j = 0; j < ci->num_stripes - 2; j++) {
1187  if (j == stripe) {
1188  do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), sector + (ci->num_stripes * Vcb->superblock.node_size),
1189  Vcb->superblock.node_size);
1190  } else {
1191  do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), sector + (j * Vcb->superblock.node_size),
1192  Vcb->superblock.node_size);
1193  }
1194  }
1195  } else {
1196  ERR("recovering from checksum error at %I64x, device %I64x\n", addr + ((error_stripe - stripe) * ci->stripe_length),
1197  devices[error_stripe_phys]->devitem.dev_id);
1198 
1200 
1201  RtlCopyMemory(sector + (error_stripe * Vcb->superblock.node_size),
1202  sector + ((ci->num_stripes + 1) * Vcb->superblock.node_size), Vcb->superblock.node_size);
1203  }
1204  }
1205 
1206  if (!Vcb->readonly && devices[error_stripe_phys] && devices[error_stripe_phys]->devobj && !devices[error_stripe_phys]->readonly) { // write good data over bad
1207  Status = write_data_phys(devices[error_stripe_phys]->devobj, devices[error_stripe_phys]->fileobj, cis[error_stripe_phys].offset + off,
1208  sector + (error_stripe * Vcb->superblock.node_size), Vcb->superblock.node_size);
1209  if (!NT_SUCCESS(Status)) {
1210  WARN("write_data_phys returned %08x\n", Status);
1212  }
1213  }
1214  }
1215  }
1216  }
1217 
1218  if (!recovered) {
1219  ERR("unrecoverable checksum error at %I64x\n", addr);
1220  ExFreePool(sector);
1221  return STATUS_CRC_ERROR;
1222  }
1223 
1224  ExFreePool(sector);
1225  } else {
1226  ULONG sectors = length / Vcb->superblock.sector_size;
1227  uint8_t* sector;
1228 
1229  sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size * (ci->num_stripes + 2), ALLOC_TAG);
1230  if (!sector) {
1231  ERR("out of memory\n");
1233  }
1234 
1235  for (i = 0; i < sectors; i++) {
1236  uint64_t off;
1237  uint16_t physstripe, parity1, parity2;
1238  uint32_t crc32;
1239 
1240  if (context->csum)
1241  crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1242 
1243  get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length,
1244  ci->num_stripes - 2, &off, &stripe);
1245 
1246  parity1 = (((addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size)) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
1247  parity2 = (parity1 + 1) % ci->num_stripes;
1248 
1249  physstripe = (parity2 + stripe + 1) % ci->num_stripes;
1250 
1251  if (!devices[physstripe] || !devices[physstripe]->devobj || (context->csum && context->csum[i] != crc32)) {
1252  uint16_t k, error_stripe;
1253  bool recovered = false, failed = false;
1254  ULONG num_errors = 0;
1255 
1256  if (devices[physstripe] && devices[physstripe]->devobj)
1258 
1259  j = (parity2 + 1) % ci->num_stripes;
1260 
1261  for (k = 0; k < ci->num_stripes - 1; k++) {
1262  if (j != physstripe) {
1263  if (devices[j] && devices[j]->devobj) {
1264  Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.sector_size,
1265  sector + (k * Vcb->superblock.sector_size), false);
1266  if (!NT_SUCCESS(Status)) {
1267  ERR("sync_read_phys returned %08x\n", Status);
1269  num_errors++;
1270  error_stripe = k;
1271 
1272  if (num_errors > 1) {
1273  failed = true;
1274  break;
1275  }
1276  }
1277  } else {
1278  num_errors++;
1279  error_stripe = k;
1280 
1281  if (num_errors > 1) {
1282  failed = true;
1283  break;
1284  }
1285  }
1286  }
1287 
1288  j = (j + 1) % ci->num_stripes;
1289  }
1290 
1291  if (!failed) {
1292  if (num_errors == 0) {
1293  RtlCopyMemory(sector + (stripe * Vcb->superblock.sector_size), sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1294 
1295  for (j = 0; j < ci->num_stripes - 2; j++) {
1296  if (j != stripe)
1297  do_xor(sector + (stripe * Vcb->superblock.sector_size), sector + (j * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1298  }
1299 
1300  if (context->csum)
1301  crc32 = ~calc_crc32c(0xffffffff, sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1302 
1303  if (!context->csum || crc32 == context->csum[i]) {
1304  RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1305 
1306  if (devices[physstripe] && devices[physstripe]->devobj)
1307  ERR("recovering from checksum error at %I64x, device %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size),
1308  devices[physstripe]->devitem.dev_id);
1309 
1310  recovered = true;
1311 
1312  if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1313  Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off,
1314  sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1315  if (!NT_SUCCESS(Status)) {
1316  WARN("write_data_phys returned %08x\n", Status);
1318  }
1319  }
1320  }
1321  }
1322 
1323  if (!recovered) {
1324  bool read_q = false;
1325 
1326  if (devices[parity2] && devices[parity2]->devobj) {
1327  Status = sync_read_phys(devices[parity2]->devobj, devices[parity2]->fileobj, cis[parity2].offset + off,
1328  Vcb->superblock.sector_size, sector + ((ci->num_stripes - 1) * Vcb->superblock.sector_size), false);
1329  if (!NT_SUCCESS(Status)) {
1330  ERR("sync_read_phys returned %08x\n", Status);
1332  } else
1333  read_q = true;
1334  }
1335 
1336  if (read_q) {
1337  if (num_errors == 1) {
1338  raid6_recover2(sector, ci->num_stripes, Vcb->superblock.sector_size, stripe, error_stripe, sector + (ci->num_stripes * Vcb->superblock.sector_size));
1339 
1340  if (!devices[physstripe] || !devices[physstripe]->devobj)
1341  recovered = true;
1342  else {
1343  crc32 = ~calc_crc32c(0xffffffff, sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1344 
1345  if (crc32 == context->csum[i])
1346  recovered = true;
1347  }
1348  } else {
1349  for (j = 0; j < ci->num_stripes - 1; j++) {
1350  if (j != stripe) {
1351  raid6_recover2(sector, ci->num_stripes, Vcb->superblock.sector_size, stripe, j, sector + (ci->num_stripes * Vcb->superblock.sector_size));
1352 
1353  crc32 = ~calc_crc32c(0xffffffff, sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1354 
1355  if (crc32 == context->csum[i]) {
1356  recovered = true;
1357  error_stripe = j;
1358  break;
1359  }
1360  }
1361  }
1362  }
1363  }
1364 
1365  if (recovered) {
1366  uint16_t error_stripe_phys = (parity2 + error_stripe + 1) % ci->num_stripes;
1367 
1368  if (devices[physstripe] && devices[physstripe]->devobj)
1369  ERR("recovering from checksum error at %I64x, device %I64x\n",
1370  addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[physstripe]->devitem.dev_id);
1371 
1372  RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1373 
1374  if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1375  Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off,
1376  sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1377  if (!NT_SUCCESS(Status)) {
1378  WARN("write_data_phys returned %08x\n", Status);
1380  }
1381  }
1382 
1383  if (devices[error_stripe_phys] && devices[error_stripe_phys]->devobj) {
1384  if (error_stripe == ci->num_stripes - 2) {
1385  ERR("recovering from parity error at %I64x, device %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size),
1386  devices[error_stripe_phys]->devitem.dev_id);
1387 
1389 
1390  RtlZeroMemory(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1391 
1392  for (j = 0; j < ci->num_stripes - 2; j++) {
1393  if (j == stripe) {
1394  do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), sector + (ci->num_stripes * Vcb->superblock.sector_size),
1395  Vcb->superblock.sector_size);
1396  } else {
1397  do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), sector + (j * Vcb->superblock.sector_size),
1398  Vcb->superblock.sector_size);
1399  }
1400  }
1401  } else {
1402  ERR("recovering from checksum error at %I64x, device %I64x\n",
1403  addr + UInt32x32To64(i, Vcb->superblock.sector_size) + ((error_stripe - stripe) * ci->stripe_length),
1404  devices[error_stripe_phys]->devitem.dev_id);
1405 
1407 
1408  RtlCopyMemory(sector + (error_stripe * Vcb->superblock.sector_size),
1409  sector + ((ci->num_stripes + 1) * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1410  }
1411  }
1412 
1413  if (!Vcb->readonly && devices[error_stripe_phys] && devices[error_stripe_phys]->devobj && !devices[error_stripe_phys]->readonly) { // write good data over bad
1414  Status = write_data_phys(devices[error_stripe_phys]->devobj, devices[error_stripe_phys]->fileobj, cis[error_stripe_phys].offset + off,
1415  sector + (error_stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1416  if (!NT_SUCCESS(Status)) {
1417  WARN("write_data_phys returned %08x\n", Status);
1419  }
1420  }
1421  }
1422  }
1423  }
1424 
1425  if (!recovered) {
1426  ERR("unrecoverable checksum error at %I64x\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size));
1427  ExFreePool(sector);
1428  return STATUS_CRC_ERROR;
1429  }
1430  }
1431  }
1432 
1433  ExFreePool(sector);
1434  }
1435 
1436  return STATUS_SUCCESS;
1437 }
1438 
1441  _In_ ULONG priority) {
1442  CHUNK_ITEM* ci;
1443  CHUNK_ITEM_STRIPE* cis;
1445  uint64_t type, offset, total_reading = 0;
1446  NTSTATUS Status;
1447  device** devices = NULL;
1448  uint16_t i, startoffstripe, allowed_missing, missing_devices = 0;
1449  uint8_t* dummypage = NULL;
1450  PMDL dummy_mdl = NULL;
1451  bool need_to_wait;
1452  uint64_t lockaddr, locklen;
1453 
1454  if (Vcb->log_to_phys_loaded) {
1455  if (!c) {
1457 
1458  if (!c) {
1459  ERR("get_chunk_from_address failed\n");
1460  return STATUS_INTERNAL_ERROR;
1461  }
1462  }
1463 
1464  ci = c->chunk_item;
1465  offset = c->offset;
1466  devices = c->devices;
1467 
1468  if (pc)
1469  *pc = c;
1470  } else {
1471  LIST_ENTRY* le = Vcb->sys_chunks.Flink;
1472 
1473  ci = NULL;
1474 
1475  c = NULL;
1476  while (le != &Vcb->sys_chunks) {
1478 
1479  if (sc->key.obj_id == 0x100 && sc->key.obj_type == TYPE_CHUNK_ITEM && sc->key.offset <= addr) {
1480  CHUNK_ITEM* chunk_item = sc->data;
1481 
1482  if ((addr - sc->key.offset) < chunk_item->size && chunk_item->num_stripes > 0) {
1483  ci = chunk_item;
1484  offset = sc->key.offset;
1485  cis = (CHUNK_ITEM_STRIPE*)&chunk_item[1];
1486 
1488  if (!devices) {
1489  ERR("out of memory\n");
1491  }
1492 
1493  for (i = 0; i < ci->num_stripes; i++) {
1494  devices[i] = find_device_from_uuid(Vcb, &cis[i].dev_uuid);
1495  }
1496 
1497  break;
1498  }
1499  }
1500 
1501  le = le->Flink;
1502  }
1503 
1504  if (!ci) {
1505  ERR("could not find chunk for %I64x in bootstrap\n", addr);
1506  return STATUS_INTERNAL_ERROR;
1507  }
1508 
1509  if (pc)
1510  *pc = NULL;
1511  }
1512 
1513  if (ci->type & BLOCK_FLAG_DUPLICATE) {
1515  allowed_missing = ci->num_stripes - 1;
1516  } else if (ci->type & BLOCK_FLAG_RAID0) {
1518  allowed_missing = 0;
1519  } else if (ci->type & BLOCK_FLAG_RAID1) {
1521  allowed_missing = 1;
1522  } else if (ci->type & BLOCK_FLAG_RAID10) {
1524  allowed_missing = 1;
1525  } else if (ci->type & BLOCK_FLAG_RAID5) {
1527  allowed_missing = 1;
1528  } else if (ci->type & BLOCK_FLAG_RAID6) {
1530  allowed_missing = 2;
1531  } else { // SINGLE
1533  allowed_missing = 0;
1534  }
1535 
1536  cis = (CHUNK_ITEM_STRIPE*)&ci[1];
1537 
1540 
1542  if (!context.stripes) {
1543  ERR("out of memory\n");
1545  }
1546 
1547  if (c && (type == BLOCK_FLAG_RAID5 || type == BLOCK_FLAG_RAID6)) {
1548  get_raid56_lock_range(c, addr, length, &lockaddr, &locklen);
1549  chunk_lock_range(Vcb, c, lockaddr, locklen);
1550  }
1551 
1552  RtlZeroMemory(context.stripes, sizeof(read_data_stripe) * ci->num_stripes);
1553 
1554  context.buflen = length;
1555  context.num_stripes = ci->num_stripes;
1556  context.stripes_left = context.num_stripes;
1557  context.sector_size = Vcb->superblock.sector_size;
1558  context.csum = csum;
1559  context.tree = is_tree;
1560  context.type = type;
1561 
1562  if (type == BLOCK_FLAG_RAID0) {
1563  uint64_t startoff, endoff;
1564  uint16_t endoffstripe, stripe;
1565  uint32_t *stripeoff, pos;
1566  PMDL master_mdl;
1567  PFN_NUMBER* pfns;
1568 
1569  // FIXME - test this still works if page size isn't the same as sector size
1570 
1571  // This relies on the fact that MDLs are followed in memory by the page file numbers,
1572  // so with a bit of jiggery-pokery you can trick your disks into deinterlacing your RAID0
1573  // data for you without doing a memcpy yourself.
1574  // MDLs are officially opaque, so this might very well break in future versions of Windows.
1575 
1576  get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes, &startoff, &startoffstripe);
1577  get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes, &endoff, &endoffstripe);
1578 
1579  if (file_read) {
1580  // Unfortunately we can't avoid doing at least one memcpy, as Windows can give us an MDL
1581  // with duplicated dummy PFNs, which confuse check_csum. Ah well.
1582  // See https://msdn.microsoft.com/en-us/library/windows/hardware/Dn614012.aspx if you're interested.
1583 
1585 
1586  if (!context.va) {
1587  ERR("out of memory\n");
1589  goto exit;
1590  }
1591  } else
1592  context.va = buf;
1593 
1594  master_mdl = IoAllocateMdl(context.va, length, false, false, NULL);
1595  if (!master_mdl) {
1596  ERR("out of memory\n");
1598  goto exit;
1599  }
1600 
1602 
1603  _SEH2_TRY {
1607  } _SEH2_END;
1608 
1609  if (!NT_SUCCESS(Status)) {
1610  ERR("MmProbeAndLockPages threw exception %08x\n", Status);
1611  IoFreeMdl(master_mdl);
1612  goto exit;
1613  }
1614 
1615  pfns = (PFN_NUMBER*)(master_mdl + 1);
1616 
1617  for (i = 0; i < ci->num_stripes; i++) {
1618  if (startoffstripe > i)
1619  context.stripes[i].stripestart = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
1620  else if (startoffstripe == i)
1621  context.stripes[i].stripestart = startoff;
1622  else
1623  context.stripes[i].stripestart = startoff - (startoff % ci->stripe_length);
1624 
1625  if (endoffstripe > i)
1626  context.stripes[i].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
1627  else if (endoffstripe == i)
1628  context.stripes[i].stripeend = endoff + 1;
1629  else
1630  context.stripes[i].stripeend = endoff - (endoff % ci->stripe_length);
1631 
1632  if (context.stripes[i].stripestart != context.stripes[i].stripeend) {
1633  context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), false, false, NULL);
1634 
1635  if (!context.stripes[i].mdl) {
1636  ERR("IoAllocateMdl failed\n");
1637  MmUnlockPages(master_mdl);
1638  IoFreeMdl(master_mdl);
1640  goto exit;
1641  }
1642  }
1643  }
1644 
1645  stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes, ALLOC_TAG);
1646  if (!stripeoff) {
1647  ERR("out of memory\n");
1648  MmUnlockPages(master_mdl);
1649  IoFreeMdl(master_mdl);
1651  goto exit;
1652  }
1653 
1654  RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes);
1655 
1656  pos = 0;
1657  stripe = startoffstripe;
1658  while (pos < length) {
1659  PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
1660 
1661  if (pos == 0) {
1662  uint32_t readlen = (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length));
1663 
1664  RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1665 
1666  stripeoff[stripe] += readlen;
1667  pos += readlen;
1668  } else if (length - pos < ci->stripe_length) {
1669  RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1670 
1671  pos = length;
1672  } else {
1673  RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1674 
1675  stripeoff[stripe] += (uint32_t)ci->stripe_length;
1676  pos += (uint32_t)ci->stripe_length;
1677  }
1678 
1679  stripe = (stripe + 1) % ci->num_stripes;
1680  }
1681 
1682  MmUnlockPages(master_mdl);
1683  IoFreeMdl(master_mdl);
1684 
1685  ExFreePool(stripeoff);
1686  } else if (type == BLOCK_FLAG_RAID10) {
1687  uint64_t startoff, endoff;
1688  uint16_t endoffstripe, j, stripe;
1689  ULONG orig_ls;
1690  PMDL master_mdl;
1691  PFN_NUMBER* pfns;
1692  uint32_t* stripeoff, pos;
1693  read_data_stripe** stripes;
1694 
1695  if (c)
1696  orig_ls = c->last_stripe;
1697  else
1698  orig_ls = 0;
1699 
1700  get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &startoff, &startoffstripe);
1701  get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &endoff, &endoffstripe);
1702 
1703  if ((ci->num_stripes % ci->sub_stripes) != 0) {
1704  ERR("chunk %I64x: num_stripes %x was not a multiple of sub_stripes %x!\n", offset, ci->num_stripes, ci->sub_stripes);
1706  goto exit;
1707  }
1708 
1709  if (file_read) {
1711 
1712  if (!context.va) {
1713  ERR("out of memory\n");
1715  goto exit;
1716  }
1717  } else
1718  context.va = buf;
1719 
1720  context.firstoff = (uint16_t)((startoff % ci->stripe_length) / Vcb->superblock.sector_size);
1721  context.startoffstripe = startoffstripe;
1722  context.sectors_per_stripe = (uint16_t)(ci->stripe_length / Vcb->superblock.sector_size);
1723 
1724  startoffstripe *= ci->sub_stripes;
1725  endoffstripe *= ci->sub_stripes;
1726 
1727  if (c)
1728  c->last_stripe = (orig_ls + 1) % ci->sub_stripes;
1729 
1730  master_mdl = IoAllocateMdl(context.va, length, false, false, NULL);
1731  if (!master_mdl) {
1732  ERR("out of memory\n");
1734  goto exit;
1735  }
1736 
1738 
1739  _SEH2_TRY {
1743  } _SEH2_END;
1744 
1745  if (!NT_SUCCESS(Status)) {
1746  ERR("MmProbeAndLockPages threw exception %08x\n", Status);
1747  IoFreeMdl(master_mdl);
1748  goto exit;
1749  }
1750 
1751  pfns = (PFN_NUMBER*)(master_mdl + 1);
1752 
1754  if (!stripes) {
1755  ERR("out of memory\n");
1756  MmUnlockPages(master_mdl);
1757  IoFreeMdl(master_mdl);
1759  goto exit;
1760  }
1761 
1762  RtlZeroMemory(stripes, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes);
1763 
1764  for (i = 0; i < ci->num_stripes; i += ci->sub_stripes) {
1765  uint64_t sstart, send;
1766  bool stripeset = false;
1767 
1768  if (startoffstripe > i)
1769  sstart = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
1770  else if (startoffstripe == i)
1771  sstart = startoff;
1772  else
1773  sstart = startoff - (startoff % ci->stripe_length);
1774 
1775  if (endoffstripe > i)
1776  send = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
1777  else if (endoffstripe == i)
1778  send = endoff + 1;
1779  else
1780  send = endoff - (endoff % ci->stripe_length);
1781 
1782  for (j = 0; j < ci->sub_stripes; j++) {
1783  if (j == orig_ls && devices[i+j] && devices[i+j]->devobj) {
1784  context.stripes[i+j].stripestart = sstart;
1785  context.stripes[i+j].stripeend = send;
1786  stripes[i / ci->sub_stripes] = &context.stripes[i+j];
1787 
1788  if (sstart != send) {
1789  context.stripes[i+j].mdl = IoAllocateMdl(context.va, (ULONG)(send - sstart), false, false, NULL);
1790 
1791  if (!context.stripes[i+j].mdl) {
1792  ERR("IoAllocateMdl failed\n");
1793  MmUnlockPages(master_mdl);
1794  IoFreeMdl(master_mdl);
1796  goto exit;
1797  }
1798  }
1799 
1800  stripeset = true;
1801  } else
1802  context.stripes[i+j].status = ReadDataStatus_Skip;
1803  }
1804 
1805  if (!stripeset) {
1806  for (j = 0; j < ci->sub_stripes; j++) {
1807  if (devices[i+j] && devices[i+j]->devobj) {
1808  context.stripes[i+j].stripestart = sstart;
1809  context.stripes[i+j].stripeend = send;
1810  context.stripes[i+j].status = ReadDataStatus_Pending;
1811  stripes[i / ci->sub_stripes] = &context.stripes[i+j];
1812 
1813  if (sstart != send) {
1814  context.stripes[i+j].mdl = IoAllocateMdl(context.va, (ULONG)(send - sstart), false, false, NULL);
1815 
1816  if (!context.stripes[i+j].mdl) {
1817  ERR("IoAllocateMdl failed\n");
1818  MmUnlockPages(master_mdl);
1819  IoFreeMdl(master_mdl);
1821  goto exit;
1822  }
1823  }
1824 
1825  stripeset = true;
1826  break;
1827  }
1828  }
1829 
1830  if (!stripeset) {
1831  ERR("could not find stripe to read\n");
1833  goto exit;
1834  }
1835  }
1836  }
1837 
1838  stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG);
1839  if (!stripeoff) {
1840  ERR("out of memory\n");
1841  MmUnlockPages(master_mdl);
1842  IoFreeMdl(master_mdl);
1844  goto exit;
1845  }
1846 
1847  RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes / ci->sub_stripes);
1848 
1849  pos = 0;
1850  stripe = startoffstripe / ci->sub_stripes;
1851  while (pos < length) {
1852  PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(stripes[stripe]->mdl + 1);
1853 
1854  if (pos == 0) {
1855  uint32_t readlen = (uint32_t)min(stripes[stripe]->stripeend - stripes[stripe]->stripestart,
1856  ci->stripe_length - (stripes[stripe]->stripestart % ci->stripe_length));
1857 
1858  RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1859 
1860  stripeoff[stripe] += readlen;
1861  pos += readlen;
1862  } else if (length - pos < ci->stripe_length) {
1863  RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1864 
1865  pos = length;
1866  } else {
1867  RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1868 
1869  stripeoff[stripe] += (ULONG)ci->stripe_length;
1870  pos += (ULONG)ci->stripe_length;
1871  }
1872 
1873  stripe = (stripe + 1) % (ci->num_stripes / ci->sub_stripes);
1874  }
1875 
1876  MmUnlockPages(master_mdl);
1877  IoFreeMdl(master_mdl);
1878 
1879  ExFreePool(stripeoff);
1880  ExFreePool(stripes);
1881  } else if (type == BLOCK_FLAG_DUPLICATE) {
1882  uint64_t orig_ls;
1883 
1884  if (c)
1885  orig_ls = i = c->last_stripe;
1886  else
1887  orig_ls = i = 0;
1888 
1889  while (!devices[i] || !devices[i]->devobj) {
1890  i = (i + 1) % ci->num_stripes;
1891 
1892  if (i == orig_ls) {
1893  ERR("no devices available to service request\n");
1895  goto exit;
1896  }
1897  }
1898 
1899  if (c)
1900  c->last_stripe = (i + 1) % ci->num_stripes;
1901 
1902  context.stripes[i].stripestart = addr - offset;
1903  context.stripes[i].stripeend = context.stripes[i].stripestart + length;
1904 
1905  if (file_read) {
1907 
1908  if (!context.va) {
1909  ERR("out of memory\n");
1911  goto exit;
1912  }
1913 
1914  context.stripes[i].mdl = IoAllocateMdl(context.va, length, false, false, NULL);
1915  if (!context.stripes[i].mdl) {
1916  ERR("IoAllocateMdl failed\n");
1918  goto exit;
1919  }
1920 
1921  MmBuildMdlForNonPagedPool(context.stripes[i].mdl);
1922  } else {
1923  context.stripes[i].mdl = IoAllocateMdl(buf, length, false, false, NULL);
1924 
1925  if (!context.stripes[i].mdl) {
1926  ERR("IoAllocateMdl failed\n");
1928  goto exit;
1929  }
1930 
1932 
1933  _SEH2_TRY {
1937  } _SEH2_END;
1938 
1939  if (!NT_SUCCESS(Status)) {
1940  ERR("MmProbeAndLockPages threw exception %08x\n", Status);
1941  goto exit;
1942  }
1943  }
1944  } else if (type == BLOCK_FLAG_RAID5) {
1945  uint64_t startoff, endoff;
1946  uint16_t endoffstripe, parity;
1947  uint32_t *stripeoff, pos;
1948  PMDL master_mdl;
1949  PFN_NUMBER *pfns, dummy;
1950  bool need_dummy = false;
1951 
1952  get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 1, &startoff, &startoffstripe);
1953  get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 1, &endoff, &endoffstripe);
1954 
1955  if (file_read) {
1957 
1958  if (!context.va) {
1959  ERR("out of memory\n");
1961  goto exit;
1962  }
1963  } else
1964  context.va = buf;
1965 
1966  master_mdl = IoAllocateMdl(context.va, length, false, false, NULL);
1967  if (!master_mdl) {
1968  ERR("out of memory\n");
1970  goto exit;
1971  }
1972 
1974 
1975  _SEH2_TRY {
1979  } _SEH2_END;
1980 
1981  if (!NT_SUCCESS(Status)) {
1982  ERR("MmProbeAndLockPages threw exception %08x\n", Status);
1983  IoFreeMdl(master_mdl);
1984  goto exit;
1985  }
1986 
1987  pfns = (PFN_NUMBER*)(master_mdl + 1);
1988 
1989  pos = 0;
1990  while (pos < length) {
1991  parity = (((addr - offset + pos) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
1992 
1993  if (pos == 0) {
1994  uint16_t stripe = (parity + startoffstripe + 1) % ci->num_stripes;
1995  ULONG skip, readlen;
1996 
1997  i = startoffstripe;
1998  while (stripe != parity) {
1999  if (i == startoffstripe) {
2000  readlen = min(length, (ULONG)(ci->stripe_length - (startoff % ci->stripe_length)));
2001 
2002  context.stripes[stripe].stripestart = startoff;
2003  context.stripes[stripe].stripeend = startoff + readlen;
2004 
2005  pos += readlen;
2006 
2007  if (pos == length)
2008  break;
2009  } else {
2010  readlen = min(length - pos, (ULONG)ci->stripe_length);
2011 
2012  context.stripes[stripe].stripestart = startoff - (startoff % ci->stripe_length);
2013  context.stripes[stripe].stripeend = context.stripes[stripe].stripestart + readlen;
2014 
2015  pos += readlen;
2016 
2017  if (pos == length)
2018  break;
2019  }
2020 
2021  i++;
2022  stripe = (stripe + 1) % ci->num_stripes;
2023  }
2024 
2025  if (pos == length)
2026  break;
2027 
2028  for (i = 0; i < startoffstripe; i++) {
2029  uint16_t stripe2 = (parity + i + 1) % ci->num_stripes;
2030 
2031  context.stripes[stripe2].stripestart = context.stripes[stripe2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2032  }
2033 
2034  context.stripes[parity].stripestart = context.stripes[parity].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2035 
2036  if (length - pos > ci->num_stripes * (ci->num_stripes - 1) * ci->stripe_length) {
2037  skip = (ULONG)(((length - pos) / (ci->num_stripes * (ci->num_stripes - 1) * ci->stripe_length)) - 1);
2038 
2039  for (i = 0; i < ci->num_stripes; i++) {
2040  context.stripes[i].stripeend += skip * ci->num_stripes * ci->stripe_length;
2041  }
2042 
2043  pos += (uint32_t)(skip * (ci->num_stripes - 1) * ci->num_stripes * ci->stripe_length);
2044  need_dummy = true;
2045  }
2046  } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 1)) {
2047  for (i = 0; i < ci->num_stripes; i++) {
2048  context.stripes[i].stripeend += ci->stripe_length;
2049  }
2050 
2051  pos += (uint32_t)(ci->stripe_length * (ci->num_stripes - 1));
2052  need_dummy = true;
2053  } else {
2054  uint16_t stripe = (parity + 1) % ci->num_stripes;
2055 
2056  i = 0;
2057  while (stripe != parity) {
2058  if (endoffstripe == i) {
2059  context.stripes[stripe].stripeend = endoff + 1;
2060  break;
2061  } else if (endoffstripe > i)
2062  context.stripes[stripe].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
2063 
2064  i++;
2065  stripe = (stripe + 1) % ci->num_stripes;
2066  }
2067 
2068  break;
2069  }
2070  }
2071 
2072  for (i = 0; i < ci->num_stripes; i++) {
2073  if (context.stripes[i].stripestart != context.stripes[i].stripeend) {
2074  context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart),
2075  false, false, NULL);
2076 
2077  if (!context.stripes[i].mdl) {
2078  ERR("IoAllocateMdl failed\n");
2079  MmUnlockPages(master_mdl);
2080  IoFreeMdl(master_mdl);
2082  goto exit;
2083  }
2084  }
2085  }
2086 
2087  if (need_dummy) {
2089  if (!dummypage) {
2090  ERR("out of memory\n");
2091  MmUnlockPages(master_mdl);
2092  IoFreeMdl(master_mdl);
2094  goto exit;
2095  }
2096 
2097  dummy_mdl = IoAllocateMdl(dummypage, PAGE_SIZE, false, false, NULL);
2098  if (!dummy_mdl) {
2099  ERR("IoAllocateMdl failed\n");
2100  MmUnlockPages(master_mdl);
2101  IoFreeMdl(master_mdl);
2103  goto exit;
2104  }
2105 
2106  MmBuildMdlForNonPagedPool(dummy_mdl);
2107 
2108  dummy = *(PFN_NUMBER*)(dummy_mdl + 1);
2109  }
2110 
2111  stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes, ALLOC_TAG);
2112  if (!stripeoff) {
2113  ERR("out of memory\n");
2114  MmUnlockPages(master_mdl);
2115  IoFreeMdl(master_mdl);
2117  goto exit;
2118  }
2119 
2120  RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes);
2121 
2122  pos = 0;
2123 
2124  while (pos < length) {
2125  PFN_NUMBER* stripe_pfns;
2126 
2127  parity = (((addr - offset + pos) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
2128 
2129  if (pos == 0) {
2130  uint16_t stripe = (parity + startoffstripe + 1) % ci->num_stripes;
2131  uint32_t readlen = min(length - pos, (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart,
2132  ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length)));
2133 
2134  stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2135 
2136  RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2137 
2138  stripeoff[stripe] = readlen;
2139  pos += readlen;
2140 
2141  stripe = (stripe + 1) % ci->num_stripes;
2142 
2143  while (stripe != parity) {
2144  stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2145  readlen = min(length - pos, (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2146 
2147  if (readlen == 0)
2148  break;
2149 
2150  RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2151 
2152  stripeoff[stripe] = readlen;
2153  pos += readlen;
2154 
2155  stripe = (stripe + 1) % ci->num_stripes;
2156  }
2157  } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 1)) {
2158  uint16_t stripe = (parity + 1) % ci->num_stripes;
2159  ULONG k;
2160 
2161  while (stripe != parity) {
2162  stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2163 
2164  RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
2165 
2166  stripeoff[stripe] += (uint32_t)ci->stripe_length;
2167  pos += (uint32_t)ci->stripe_length;
2168 
2169  stripe = (stripe + 1) % ci->num_stripes;
2170  }
2171 
2172  stripe_pfns = (PFN_NUMBER*)(context.stripes[parity].mdl + 1);
2173 
2174  for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) {
2175  stripe_pfns[stripeoff[parity] >> PAGE_SHIFT] = dummy;
2176  stripeoff[parity] += PAGE_SIZE;
2177  }
2178  } else {
2179  uint16_t stripe = (parity + 1) % ci->num_stripes;
2180  uint32_t readlen;
2181 
2182  while (pos < length) {
2183  stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2184  readlen = min(length - pos, (ULONG)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2185 
2186  if (readlen == 0)
2187  break;
2188 
2189  RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2190 
2191  stripeoff[stripe] += readlen;
2192  pos += readlen;
2193 
2194  stripe = (stripe + 1) % ci->num_stripes;
2195  }
2196  }
2197  }
2198 
2199  MmUnlockPages(master_mdl);
2200  IoFreeMdl(master_mdl);
2201 
2202  ExFreePool(stripeoff);
2203  } else if (type == BLOCK_FLAG_RAID6) {
2204  uint64_t startoff, endoff;
2205  uint16_t endoffstripe, parity1;
2206  uint32_t *stripeoff, pos;
2207  PMDL master_mdl;
2208  PFN_NUMBER *pfns, dummy;
2209  bool need_dummy = false;
2210 
2211  get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 2, &startoff, &startoffstripe);
2212  get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 2, &endoff, &endoffstripe);
2213 
2214  if (file_read) {
2216 
2217  if (!context.va) {
2218  ERR("out of memory\n");
2220  goto exit;
2221  }
2222  } else
2223  context.va = buf;
2224 
2225  master_mdl = IoAllocateMdl(context.va, length, false, false, NULL);
2226  if (!master_mdl) {
2227  ERR("out of memory\n");
2229  goto exit;
2230  }
2231 
2233 
2234  _SEH2_TRY {
2238  } _SEH2_END;
2239 
2240  if (!NT_SUCCESS(Status)) {
2241  ERR("MmProbeAndLockPages threw exception %08x\n", Status);
2242  IoFreeMdl(master_mdl);
2243  goto exit;
2244  }
2245 
2246  pfns = (PFN_NUMBER*)(master_mdl + 1);
2247 
2248  pos = 0;
2249  while (pos < length) {
2250  parity1 = (((addr - offset + pos) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
2251 
2252  if (pos == 0) {
2253  uint16_t stripe = (parity1 + startoffstripe + 2) % ci->num_stripes, parity2;
2254  ULONG skip, readlen;
2255 
2256  i = startoffstripe;
2257  while (stripe != parity1) {
2258  if (i == startoffstripe) {
2259  readlen = (ULONG)min(length, ci->stripe_length - (startoff % ci->stripe_length));
2260 
2261  context.stripes[stripe].stripestart = startoff;
2262  context.stripes[stripe].stripeend = startoff + readlen;
2263 
2264  pos += readlen;
2265 
2266  if (pos == length)
2267  break;
2268  } else {
2269  readlen = min(length - pos, (ULONG)ci->stripe_length);
2270 
2271  context.stripes[stripe].stripestart = startoff - (startoff % ci->stripe_length);
2272  context.stripes[stripe].stripeend = context.stripes[stripe].stripestart + readlen;
2273 
2274  pos += readlen;
2275 
2276  if (pos == length)
2277  break;
2278  }
2279 
2280  i++;
2281  stripe = (stripe + 1) % ci->num_stripes;
2282  }
2283 
2284  if (pos == length)
2285  break;
2286 
2287  for (i = 0; i < startoffstripe; i++) {
2288  uint16_t stripe2 = (parity1 + i + 2) % ci->num_stripes;
2289 
2290  context.stripes[stripe2].stripestart = context.stripes[stripe2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2291  }
2292 
2293  context.stripes[parity1].stripestart = context.stripes[parity1].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2294 
2295  parity2 = (parity1 + 1) % ci->num_stripes;
2296  context.stripes[parity2].stripestart = context.stripes[parity2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2297 
2298  if (length - pos > ci->num_stripes * (ci->num_stripes - 2) * ci->stripe_length) {
2299  skip = (ULONG)(((length - pos) / (ci->num_stripes * (ci->num_stripes - 2) * ci->stripe_length)) - 1);
2300 
2301  for (i = 0; i < ci->num_stripes; i++) {
2302  context.stripes[i].stripeend += skip * ci->num_stripes * ci->stripe_length;
2303  }
2304 
2305  pos += (uint32_t)(skip * (ci->num_stripes - 2) * ci->num_stripes * ci->stripe_length);
2306  need_dummy = true;
2307  }
2308  } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 2)) {
2309  for (i = 0; i < ci->num_stripes; i++) {
2310  context.stripes[i].stripeend += ci->stripe_length;
2311  }
2312 
2313  pos += (uint32_t)(ci->stripe_length * (ci->num_stripes - 2));
2314  need_dummy = true;
2315  } else {
2316  uint16_t stripe = (parity1 + 2) % ci->num_stripes;
2317 
2318  i = 0;
2319  while (stripe != parity1) {
2320  if (endoffstripe == i) {
2321  context.stripes[stripe].stripeend = endoff + 1;
2322  break;
2323  } else if (endoffstripe > i)
2324  context.stripes[stripe].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
2325 
2326  i++;
2327  stripe = (stripe + 1) % ci->num_stripes;
2328  }
2329 
2330  break;
2331  }
2332  }
2333 
2334  for (i = 0; i < ci->num_stripes; i++) {
2335  if (context.stripes[i].stripestart != context.stripes[i].stripeend) {
2336  context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), false, false, NULL);
2337 
2338  if (!context.stripes[i].mdl) {
2339  ERR("IoAllocateMdl failed\n");
2340  MmUnlockPages(master_mdl);
2341  IoFreeMdl(master_mdl);
2343  goto exit;
2344  }
2345  }
2346  }
2347 
2348  if (need_dummy) {
2350  if (!dummypage) {
2351  ERR("out of memory\n");
2352  MmUnlockPages(master_mdl);
2353  IoFreeMdl(master_mdl);
2355  goto exit;
2356  }
2357 
2358  dummy_mdl = IoAllocateMdl(dummypage, PAGE_SIZE, false, false, NULL);
2359  if (!dummy_mdl) {
2360  ERR("IoAllocateMdl failed\n");
2361  MmUnlockPages(master_mdl);
2362  IoFreeMdl(master_mdl);
2364  goto exit;
2365  }
2366 
2367  MmBuildMdlForNonPagedPool(dummy_mdl);
2368 
2369  dummy = *(PFN_NUMBER*)(dummy_mdl + 1);
2370  }
2371 
2372  stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes, ALLOC_TAG);
2373  if (!stripeoff) {
2374  ERR("out of memory\n");
2375  MmUnlockPages(master_mdl);
2376  IoFreeMdl(master_mdl);
2378  goto exit;
2379  }
2380 
2381  RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes);
2382 
2383  pos = 0;
2384 
2385  while (pos < length) {
2386  PFN_NUMBER* stripe_pfns;
2387 
2388  parity1 = (((addr - offset + pos) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
2389 
2390  if (pos == 0) {
2391  uint16_t stripe = (parity1 + startoffstripe + 2) % ci->num_stripes;
2392  uint32_t readlen = min(length - pos, (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart,
2393  ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length)));
2394 
2395  stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2396 
2397  RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2398 
2399  stripeoff[stripe] = readlen;
2400  pos += readlen;
2401 
2402  stripe = (stripe + 1) % ci->num_stripes;
2403 
2404  while (stripe != parity1) {
2405  stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2406  readlen = (uint32_t)min(length - pos, min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2407 
2408  if (readlen == 0)
2409  break;
2410 
2411  RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2412 
2413  stripeoff[stripe] = readlen;
2414  pos += readlen;
2415 
2416  stripe = (stripe + 1) % ci->num_stripes;
2417  }
2418  } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 2)) {
2419  uint16_t stripe = (parity1 + 2) % ci->num_stripes;
2420  uint16_t parity2 = (parity1 + 1) % ci->num_stripes;
2421  ULONG k;
2422 
2423  while (stripe != parity1) {
2424  stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2425 
2426  RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
2427 
2428  stripeoff[stripe] += (uint32_t)ci->stripe_length;
2429  pos += (uint32_t)ci->stripe_length;
2430 
2431  stripe = (stripe + 1) % ci->num_stripes;
2432  }
2433 
2434  stripe_pfns = (PFN_NUMBER*)(context.stripes[parity1].mdl + 1);
2435 
2436  for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) {
2437  stripe_pfns[stripeoff[parity1] >> PAGE_SHIFT] = dummy;
2438  stripeoff[parity1] += PAGE_SIZE;
2439  }
2440 
2441  stripe_pfns = (PFN_NUMBER*)(context.stripes[parity2].mdl + 1);
2442 
2443  for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) {
2444  stripe_pfns[stripeoff[parity2] >> PAGE_SHIFT] = dummy;
2445  stripeoff[parity2] += PAGE_SIZE;
2446  }
2447  } else {
2448  uint16_t stripe = (parity1 + 2) % ci->num_stripes;
2449  uint32_t readlen;
2450 
2451  while (pos < length) {
2452  stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2453  readlen = (uint32_t)min(length - pos, min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2454 
2455  if (readlen == 0)
2456  break;
2457 
2458  RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2459 
2460  stripeoff[stripe] += readlen;
2461  pos += readlen;
2462 
2463  stripe = (stripe + 1) % ci->num_stripes;
2464  }
2465  }
2466  }
2467 
2468  MmUnlockPages(master_mdl);
2469  IoFreeMdl(master_mdl);
2470 
2471  ExFreePool(stripeoff);
2472  }
2473 
2474  context.address = addr;
2475 
2476  for (i = 0; i < ci->num_stripes; i++) {
2477  if (!devices[i] || !devices[i]->devobj || context.stripes[i].stripestart == context.stripes[i].stripeend) {
2478  context.stripes[i].status = ReadDataStatus_MissingDevice;
2479  context.stripes_left--;
2480 
2481  if (!devices[i] || !devices[i]->devobj)
2482  missing_devices++;
2483  }
2484  }
2485 
2486  if (missing_devices > allowed_missing) {
2487  ERR("not enough devices to service request (%u missing)\n", missing_devices);
2489  goto exit;
2490  }
2491 
2492  for (i = 0; i < ci->num_stripes; i++) {
2494 
2495  if (devices[i] && devices[i]->devobj && context.stripes[i].stripestart != context.stripes[i].stripeend && context.stripes[i].status != ReadDataStatus_Skip) {
2496  context.stripes[i].context = (struct read_data_context*)&context;
2497 
2498  if (type == BLOCK_FLAG_RAID10) {
2499  context.stripes[i].stripenum = i / ci->sub_stripes;
2500  }
2501 
2502  if (!Irp) {
2503  context.stripes[i].Irp = IoAllocateIrp(devices[i]->devobj->StackSize, false);
2504 
2505  if (!context.stripes[i].Irp) {
2506  ERR("IoAllocateIrp failed\n");
2508  goto exit;
2509  }
2510  } else {
2511  context.stripes[i].Irp = IoMakeAssociatedIrp(Irp, devices[i]->devobj->StackSize);
2512 
2513  if (!context.stripes[i].Irp) {
2514  ERR("IoMakeAssociatedIrp failed\n");
2516  goto exit;
2517  }
2518  }
2519 
2520  IrpSp = IoGetNextIrpStackLocation(context.stripes[i].Irp);
2523  IrpSp->FileObject = devices[i]->fileobj;
2524 
2525  if (devices[i]->devobj->Flags & DO_BUFFERED_IO) {
2526  context.stripes[i].Irp->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), ALLOC_TAG);
2527  if (!context.stripes[i].Irp->AssociatedIrp.SystemBuffer) {
2528  ERR("out of memory\n");
2530  goto exit;
2531  }
2532 
2534 
2535  context.stripes[i].Irp->UserBuffer = MmGetSystemAddressForMdlSafe(context.stripes[i].mdl, priority);
2536  } else if (devices[i]->devobj->Flags & DO_DIRECT_IO)
2537  context.stripes[i].Irp->MdlAddress = context.stripes[i].mdl;
2538  else
2539  context.stripes[i].Irp->UserBuffer = MmGetSystemAddressForMdlSafe(context.stripes[i].mdl, priority);
2540 
2541  IrpSp->Parameters.Read.Length = (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart);
2542  IrpSp->Parameters.Read.ByteOffset.QuadPart = context.stripes[i].stripestart + cis[i].offset;
2543 
2544  total_reading += IrpSp->Parameters.Read.Length;
2545 
2546  context.stripes[i].Irp->UserIosb = &context.stripes[i].iosb;
2547 
2548  IoSetCompletionRoutine(context.stripes[i].Irp, read_data_completion, &context.stripes[i], true, true, true);
2549 
2550  context.stripes[i].status = ReadDataStatus_Pending;
2551  }
2552  }
2553 
2554  need_to_wait = false;
2555  for (i = 0; i < ci->num_stripes; i++) {
2556  if (context.stripes[i].status != ReadDataStatus_MissingDevice && context.stripes[i].status != ReadDataStatus_Skip) {
2557  IoCallDriver(devices[i]->devobj, context.stripes[i].Irp);
2558  need_to_wait = true;
2559  }
2560  }
2561 
2562  if (need_to_wait)
2564 
2565  if (diskacc)
2566  fFsRtlUpdateDiskCounters(total_reading, 0);
2567 
2568  // check if any of the devices return a "user-induced" error
2569 
2570  for (i = 0; i < ci->num_stripes; i++) {
2571  if (context.stripes[i].status == ReadDataStatus_Error && IoIsErrorUserInduced(context.stripes[i].iosb.Status)) {
2572  Status = context.stripes[i].iosb.Status;
2573  goto exit;
2574  }
2575  }
2576 
2577  if (type == BLOCK_FLAG_RAID0) {
2578  Status = read_data_raid0(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, generation, offset);
2579  if (!NT_SUCCESS(Status)) {
2580  ERR("read_data_raid0 returned %08x\n", Status);
2581 
2582  if (file_read)
2583  ExFreePool(context.va);
2584 
2585  goto exit;
2586  }
2587 
2588  if (file_read) {
2590  ExFreePool(context.va);
2591  }
2592  } else if (type == BLOCK_FLAG_RAID10) {
2593  Status = read_data_raid10(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, generation, offset);
2594 
2595  if (!NT_SUCCESS(Status)) {
2596  ERR("read_data_raid10 returned %08x\n", Status);
2597 
2598  if (file_read)
2599  ExFreePool(context.va);
2600 
2601  goto exit;
2602  }
2603 
2604  if (file_read) {
2606  ExFreePool(context.va);
2607  }
2608  } else if (type == BLOCK_FLAG_DUPLICATE) {
2609  Status = read_data_dup(Vcb, file_read ? context.va : buf, addr, &context, ci, devices, generation);
2610  if (!NT_SUCCESS(Status)) {
2611  ERR("read_data_dup returned %08x\n", Status);
2612 
2613  if (file_read)
2614  ExFreePool(context.va);
2615 
2616  goto exit;
2617  }
2618 
2619  if (file_read) {
2621  ExFreePool(context.va);
2622  }
2623  } else if (type == BLOCK_FLAG_RAID5) {
2624  Status = read_data_raid5(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, offset, generation, c, missing_devices > 0 ? true : false);
2625  if (!NT_SUCCESS(Status)) {
2626  ERR("read_data_raid5 returned %08x\n", Status);
2627 
2628  if (file_read)
2629  ExFreePool(context.va);
2630 
2631  goto exit;
2632  }
2633 
2634  if (file_read) {
2636  ExFreePool(context.va);
2637  }
2638  } else if (type == BLOCK_FLAG_RAID6) {
2639  Status = read_data_raid6(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, offset, generation, c, missing_devices > 0 ? true : false);
2640  if (!NT_SUCCESS(Status)) {
2641  ERR("read_data_raid6 returned %08x\n", Status);
2642 
2643  if (file_read)
2644  ExFreePool(context.va);
2645 
2646  goto exit;
2647  }
2648 
2649  if (file_read) {
2651  ExFreePool(context.va);
2652  }
2653  }
2654 
2655 exit:
2656  if (c && (type == BLOCK_FLAG_RAID5 || type == BLOCK_FLAG_RAID6))
2657  chunk_unlock_range(Vcb, c, lockaddr, locklen);
2658 
2659  if (dummy_mdl)
2660  IoFreeMdl(dummy_mdl);
2661 
2662  if (dummypage)
2663  ExFreePool(dummypage);
2664 
2665  for (i = 0; i < ci->num_stripes; i++) {
2666  if (context.stripes[i].mdl) {
2667  if (context.stripes[i].mdl->MdlFlags & MDL_PAGES_LOCKED)
2668  MmUnlockPages(context.stripes[i].mdl);
2669 
2670  IoFreeMdl(context.stripes[i].mdl);
2671  }
2672 
2673  if (context.stripes[i].Irp)
2674  IoFreeIrp(context.stripes[i].Irp);
2675  }
2676 
2677  ExFreePool(context.stripes);
2678 
2679  if (!Vcb->log_to_phys_loaded)
2681 
2682  return Status;
2683 }
2684 
2686  ULONG readlen;
2687 
2688  TRACE("(%p, %p, %I64x, %I64x, %p)\n", fcb, data, start, length, pbr);
2689 
2690  if (pbr) *pbr = 0;
2691 
2692  if (start >= fcb->adsdata.Length) {
2693  TRACE("tried to read beyond end of stream\n");
2694  return STATUS_END_OF_FILE;
2695  }
2696 
2697  if (length == 0) {
2698  WARN("tried to read zero bytes\n");
2699  return STATUS_SUCCESS;
2700  }
2701 
2702  if (start + length < fcb->adsdata.Length)
2703  readlen = length;
2704  else
2705  readlen = fcb->adsdata.Length - (ULONG)start;
2706 
2707  if (readlen > 0)
2708  RtlCopyMemory(data + start, fcb->adsdata.Buffer, readlen);
2709 
2710  if (pbr) *pbr = readlen;
2711 
2712  return STATUS_SUCCESS;
2713 }
2714 
2716  NTSTATUS Status;
2717  EXTENT_DATA* ed;
2718  uint32_t bytes_read = 0;
2719  uint64_t last_end;
2720  LIST_ENTRY* le;
2721  POOL_TYPE pool_type;
2722 
2723  TRACE("(%p, %p, %I64x, %I64x, %p)\n", fcb, data, start, length, pbr);
2724 
2725  if (pbr)
2726  *pbr = 0;
2727 
2728  if (start >= fcb->inode_item.st_size) {
2729  WARN("Tried to read beyond end of file\n");
2731  goto exit;
2732  }
2733 
2734  pool_type = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? NonPagedPool : PagedPool;
2735 
2736  le = fcb->extents.Flink;
2737 
2738  last_end = start;
2739 
2740  while (le != &fcb->extents) {
2741  uint64_t len;
2743  EXTENT_DATA2* ed2;
2744 
2745  if (!ext->ignore) {
2746  ed = &ext->extent_data;
2747 
2749 
2750  len = ed2 ? ed2->num_bytes : ed->decoded_size;
2751 
2752  if (ext->offset + len <= start) {
2753  last_end = ext->offset + len;
2754  goto nextitem;
2755  }
2756 
2757  if (ext->offset > last_end && ext->offset > start + bytes_read) {
2758  uint32_t read = (uint32_t)min(length, ext->offset - max(start, last_end));
2759 
2760  RtlZeroMemory(data + bytes_read, read);
2761  bytes_read += read;
2762  length -= read;
2763  }
2764 
2765  if (length == 0 || ext->offset > start + bytes_read + length)
2766  break;
2767 
2769  WARN("Encryption not supported\n");
2771  goto exit;
2772  }
2773 
2774  if (ed->encoding != BTRFS_ENCODING_NONE) {
2775  WARN("Other encodings not supported\n");
2777  goto exit;
2778  }
2779 
2780  switch (ed->type) {
2781  case EXTENT_TYPE_INLINE:
2782  {
2783  uint64_t off = start + bytes_read - ext->offset;
2784  uint32_t read;
2785 
2787  read = (uint32_t)min(min(len, ext->datalen) - off, length);
2788 
2789  RtlCopyMemory(data + bytes_read, &ed->data[off], read);
2791  uint8_t* decomp;
2792  bool decomp_alloc;
2793  uint16_t inlen = ext->datalen - (uint16_t)offsetof(EXTENT_DATA, data[0]);
2794 
2795  if (ed->decoded_size == 0 || ed->decoded_size > 0xffffffff) {
2796  ERR("ed->decoded_size was invalid (%I64x)\n", ed->decoded_size);
2798  goto exit;
2799  }
2800 
2802 
2803  if (off > 0) {
2805  if (!decomp) {
2806  ERR("out of memory\n");
2808  goto exit;
2809  }
2810 
2811  decomp_alloc = true;
2812  } else {
2813  decomp = data + bytes_read;
2814  decomp_alloc = false;
2815  }
2816 
2818  Status = zlib_decompress(ed->data, inlen, decomp, (uint32_t)(read + off));
2819  if (!NT_SUCCESS(Status)) {
2820  ERR("zlib_decompress returned %08x\n", Status);
2821  if (decomp_alloc) ExFreePool(decomp);
2822  goto exit;
2823  }
2824  } else if (ed->compression == BTRFS_COMPRESSION_LZO) {
2825  if (inlen < sizeof(uint32_t)) {
2826  ERR("extent data was truncated\n");
2828  if (decomp_alloc) ExFreePool(decomp);
2829  goto exit;
2830  } else
2831  inlen -= sizeof(uint32_t);
2832 
2833  Status = lzo_decompress(ed->data + sizeof(uint32_t), inlen, decomp, (uint32_t)(read + off), sizeof(uint32_t));
2834  if (!NT_SUCCESS(Status)) {
2835  ERR("lzo_decompress returned %08x\n", Status);
2836  if (decomp_alloc) ExFreePool(decomp);
2837  goto exit;
2838  }
2839  } else if (ed->compression == BTRFS_COMPRESSION_ZSTD) {
2840  Status = zstd_decompress(ed->data, inlen, decomp, (uint32_t)(read + off));
2841  if (!NT_SUCCESS(Status)) {
2842  ERR("zstd_decompress returned %08x\n", Status);
2843  if (decomp_alloc) ExFreePool(decomp);
2844  goto exit;
2845  }
2846  }
2847 
2848  if (decomp_alloc) {
2849  RtlCopyMemory(data + bytes_read, decomp + off, read);
2850  ExFreePool(decomp);
2851  }
2852  } else {
2853  ERR("unhandled compression type %x\n", ed->compression);
2855  goto exit;
2856  }
2857 
2858  bytes_read += read;
2859  length -= read;
2860 
2861  break;
2862  }
2863 
2864  case EXTENT_TYPE_REGULAR:
2865  {
2866  uint64_t off = start + bytes_read - ext->offset;
2868  uint8_t* buf;
2869  bool mdl = (Irp && Irp->MdlAddress) ? true : false;
2870  bool buf_free;
2871  uint32_t bumpoff = 0, *csum;
2872  uint64_t addr;
2873  chunk* c;
2874 
2875  read = (uint32_t)(len - off);
2876  if (read > length) read = (uint32_t)length;
2877 
2879  addr = ed2->address + ed2->offset + off;
2880  to_read = (uint32_t)sector_align(read, fcb->Vcb->superblock.sector_size);
2881 
2882  if (addr % fcb->Vcb->superblock.sector_size > 0) {
2883  bumpoff = addr % fcb->Vcb->superblock.sector_size;
2884  addr -= bumpoff;
2885  to_read = (uint32_t)sector_align(read + bumpoff, fcb->Vcb->superblock.sector_size);
2886  }
2887  } else {
2888  addr = ed2->address;
2889  to_read = (uint32_t)sector_align(ed2->size, fcb->Vcb->superblock.sector_size);
2890  }
2891 
2892  if (ed->compression == BTRFS_COMPRESSION_NONE && start % fcb->Vcb->superblock.sector_size == 0 &&
2893  length % fcb->Vcb->superblock.sector_size == 0) {
2894  buf = data + bytes_read;
2895  buf_free = false;
2896  } else {
2897  buf = ExAllocatePoolWithTag(pool_type, to_read, ALLOC_TAG);
2898  buf_free = true;
2899 
2900  if (!buf) {
2901  ERR("out of memory\n");
2903  goto exit;
2904  }
2905 
2906  mdl = false;
2907  }
2908 
2910 
2911  if (!c) {
2912  ERR("get_chunk_from_address(%I64x) failed\n", addr);
2913 
2914  if (buf_free)
2915  ExFreePool(buf);
2916 
2917  goto exit;
2918  }
2919 
2920  if (ext->csum) {
2922  csum = &ext->csum[off / fcb->Vcb->superblock.sector_size];
2923  else
2924  csum = ext->csum;
2925  } else
2926  csum = NULL;
2927 
2928  Status = read_data(fcb->Vcb, addr, to_read, csum, false, buf, c, NULL, Irp, 0, mdl,
2930  if (!NT_SUCCESS(Status)) {
2931  ERR("read_data returned %08x\n", Status);
2932 
2933  if (buf_free)
2934  ExFreePool(buf);
2935 
2936  goto exit;
2937  }
2938 
2940  if (buf_free)
2941  RtlCopyMemory(data + bytes_read, buf + bumpoff, read);
2942  } else {
2943  uint8_t *decomp = NULL, *buf2;
2944  ULONG outlen, inlen, off2;
2945  uint32_t inpageoff = 0;
2946 
2947  off2 = (ULONG)(ed2->offset + off);
2948  buf2 = buf;
2949  inlen = (ULONG)ed2->size;
2950 
2952  ULONG inoff = sizeof(uint32_t);
2953 
2954  inlen -= sizeof(uint32_t);
2955 
2956  // If reading a few sectors in, skip to the interesting bit
2957  while (off2 > LZO_PAGE_SIZE) {
2958  uint32_t partlen;
2959 
2960  if (inlen < sizeof(uint32_t))
2961  break;
2962 
2963  partlen = *(uint32_t*)(buf2 + inoff);
2964 
2965  if (partlen < inlen) {
2966  off2 -= LZO_PAGE_SIZE;
2967  inoff += partlen + sizeof(uint32_t);
2968  inlen -= partlen + sizeof(uint32_t);
2969 
2970  if (LZO_PAGE_SIZE - (inoff % LZO_PAGE_SIZE) < sizeof(uint32_t))
2971  inoff = ((inoff / LZO_PAGE_SIZE) + 1) * LZO_PAGE_SIZE;
2972  } else
2973  break;
2974  }
2975 
2976  buf2 = &buf2[inoff];
2977  inpageoff = inoff % LZO_PAGE_SIZE;
2978  }
2979 
2980  if (off2 != 0) {
2981  outlen = off2 + min(read, (uint32_t)(ed2->num_bytes - off));
2982 
2983  decomp = ExAllocatePoolWithTag(pool_type, outlen, ALLOC_TAG);
2984  if (!decomp) {
2985  ERR("out of memory\n");
2986  ExFreePool(buf);
2988  goto exit;
2989  }
2990  } else
2991  outlen = min(read, (uint32_t)(ed2->num_bytes - off));
2992 
2994  Status = zlib_decompress(buf2, inlen, decomp ? decomp : (data + bytes_read), outlen);
2995 
2996  if (!NT_SUCCESS(Status)) {
2997  ERR("zlib_decompress returned %08x\n", Status);
2998  ExFreePool(buf);
2999 
3000  if (decomp)
3001  ExFreePool(decomp);
3002 
3003  goto exit;
3004  }
3005  } else if (ed->compression == BTRFS_COMPRESSION_LZO) {
3006  Status = lzo_decompress(buf2, inlen, decomp ? decomp : (data + bytes_read), outlen, inpageoff);
3007 
3008  if (!NT_SUCCESS(Status)) {
3009  ERR("lzo_decompress returned %08x\n", Status);
3010  ExFreePool(buf);
3011 
3012  if (decomp)
3013  ExFreePool(decomp);
3014 
3015  goto exit;
3016  }
3017  } else if (ed->compression == BTRFS_COMPRESSION_ZSTD) {
3018  Status = zstd_decompress(buf2, inlen, decomp ? decomp : (data + bytes_read), outlen);
3019 
3020  if (!NT_SUCCESS(Status)) {
3021  ERR("zstd_decompress returned %08x\n", Status);
3022  ExFreePool(buf);
3023 
3024  if (decomp)
3025  ExFreePool(decomp);
3026 
3027  goto exit;
3028  }
3029  } else {
3030  ERR("unsupported compression type %x\n", ed->compression);
3032 
3033  ExFreePool(buf);
3034 
3035  if (decomp)
3036  ExFreePool(decomp);
3037 
3038  goto exit;
3039  }
3040 
3041  if (decomp) {
3042  RtlCopyMemory(data + bytes_read, decomp + off2, (size_t)min(read, ed2->num_bytes - off));
3043  ExFreePool(decomp);
3044  }
3045  }
3046 
3047  if (buf_free)
3048  ExFreePool(buf);
3049 
3050  bytes_read += read;
3051  length -= read;
3052 
3053  break;
3054  }
3055 
3056  case EXTENT_TYPE_PREALLOC:
3057  {
3058  uint64_t off = start + bytes_read - ext->offset;
3059  uint32_t read = (uint32_t)(len - off);
3060 
3061  if (read > length) read = (uint32_t)length;
3062 
3063  RtlZeroMemory(data + bytes_read, read);
3064 
3065  bytes_read += read;
3066  length -= read;
3067 
3068  break;
3069  }
3070 
3071  default:
3072  WARN("Unsupported extent data type %u\n", ed->type);
3074  goto exit;
3075  }
3076 
3077  last_end = ext->offset + len;
3078 
3079  if (length == 0)
3080  break;
3081  }
3082 
3083 nextitem:
3084  le = le->Flink;
3085  }
3086 
3087  if (length > 0 && start + bytes_read < fcb->inode_item.st_size) {
3088  uint32_t read = (uint32_t)min(fcb->inode_item.st_size - start - bytes_read, length);
3089 
3090  RtlZeroMemory(data + bytes_read, read);
3091 
3092  bytes_read += read;
3093  length -= read;
3094  }
3095 
3097  if (pbr)
3098  *pbr = bytes_read;
3099 
3100 exit:
3101  return Status;
3102 }
3103 
3104 NTSTATUS do_read(PIRP Irp, bool wait, ULONG* bytes_read) {
3107  fcb* fcb = FileObject->FsContext;
3108  uint8_t* data = NULL;
3109  ULONG length = IrpSp->Parameters.Read.Length, addon = 0;
3110  uint64_t start = IrpSp->Parameters.Read.ByteOffset.QuadPart;
3111 
3112  *bytes_read = 0;
3113 
3114  if (!fcb || !fcb->Vcb || !fcb->subvol)
3115  return STATUS_INTERNAL_ERROR;
3116 
3117  TRACE("fcb = %p\n", fcb);
3118  TRACE("offset = %I64x, length = %x\n", start, length);
3119  TRACE("paging_io = %s, no cache = %s\n", Irp->Flags & IRP_PAGING_IO ? "true" : "false", Irp->Flags & IRP_NOCACHE ? "true" : "false");
3120 
3121  if (!fcb->ads && fcb->type == BTRFS_TYPE_DIRECTORY)
3123 
3124  if (!(Irp->Flags & IRP_PAGING_IO) && !FsRtlCheckLockForReadAccess(&fcb->lock, Irp)) {
3125  WARN("tried to read locked region\n");
3127  }
3128 
3129  if (length == 0) {
3130  TRACE("tried to read zero bytes\n");
3131  return STATUS_SUCCESS;
3132  }
3133 
3134  if (start >= (uint64_t)fcb->Header.FileSize.QuadPart) {
3135  TRACE("tried to read with offset after file end (%I64x >= %I64x)\n", start, fcb->Header.FileSize.QuadPart);
3136  return STATUS_END_OF_FILE;
3137  }
3138 
3139  TRACE("FileObject %p fcb %p FileSize = %I64x st_size = %I64x (%p)\n", FileObject, fcb, fcb->Header.FileSize.QuadPart, fcb->inode_item.st_size, &fcb->inode_item.st_size);
3140 
3141  if (Irp->Flags & IRP_NOCACHE || !(IrpSp->MinorFunction & IRP_MN_MDL)) {
3143 
3144  if (Irp->MdlAddress && !data) {
3145  ERR("MmGetSystemAddressForMdlSafe returned NULL\n");
3147  }
3148 
3149  if (start >= (uint64_t)fcb->Header.ValidDataLength.QuadPart) {
3150  length = (ULONG)min(length, min(start + length, (uint64_t)fcb->Header.FileSize.QuadPart) - fcb->Header.ValidDataLength.QuadPart);
3152  Irp->IoStatus.Information = *bytes_read = length;
3153  return STATUS_SUCCESS;
3154  }
3155 
3156  if (length + start > (uint64_t)fcb->Header.ValidDataLength.QuadPart) {
3157  addon = (ULONG)(min(start + length, (uint64_t)fcb->Header.FileSize.QuadPart) - fcb->Header.ValidDataLength.QuadPart);
3158  RtlZeroMemory(data + (fcb->Header.ValidDataLength.QuadPart - start), addon);
3159  length = (ULONG)(fcb->Header.ValidDataLength.QuadPart - start);
3160  }
3161  }
3162 
3163  if (!(Irp->Flags & IRP_NOCACHE)) {
3165 
3166  _SEH2_TRY {
3167  if (!FileObject->PrivateCacheMap) {
3168  CC_FILE_SIZES ccfs;
3169 
3170  ccfs.AllocationSize = fcb->Header.AllocationSize;
3171  ccfs.FileSize = fcb->Header.FileSize;
3172  ccfs.ValidDataLength = fcb->Header.ValidDataLength;
3173 
3174  init_file_cache(FileObject, &ccfs);
3175  }
3176 
3177  if (IrpSp->MinorFunction & IRP_MN_MDL) {
3178  CcMdlRead(FileObject,&IrpSp->Parameters.Read.ByteOffset, length, &Irp->MdlAddress, &Irp->IoStatus);
3179  } else {
3180  if (fCcCopyReadEx) {
3181  TRACE("CcCopyReadEx(%p, %I64x, %x, %u, %p, %p, %p, %p)\n", FileObject, IrpSp->Parameters.Read.ByteOffset.QuadPart,
3182  length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread);
3183  TRACE("sizes = %I64x, %I64x, %I64x\n", fcb->Header.AllocationSize, fcb->Header.FileSize, fcb->Header.ValidDataLength);
3184  if (!fCcCopyReadEx(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread)) {
3185  TRACE("CcCopyReadEx could not wait\n");
3186 
3188  return STATUS_PENDING;
3189  }
3190  TRACE("CcCopyReadEx finished\n");
3191  } else {
3192  TRACE("CcCopyRead(%p, %I64x, %x, %u, %p, %p)\n", FileObject, IrpSp->Parameters.Read.ByteOffset.QuadPart, length, wait, data, &Irp->IoStatus);
3193  TRACE("sizes = %I64x, %I64x, %I64x\n", fcb->Header.AllocationSize, fcb->Header.FileSize, fcb->Header.ValidDataLength);
3194  if (!CcCopyRead(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus)) {
3195  TRACE("CcCopyRead could not wait\n");
3196 
3198  return STATUS_PENDING;
3199  }
3200  TRACE("CcCopyRead finished\n");
3201  }
3202  }
3205  } _SEH2_END;
3206 
3207  if (NT_SUCCESS(Status)) {
3208  Status = Irp->IoStatus.Status;
3209  Irp->IoStatus.Information += addon;
3210  *bytes_read = (ULONG)Irp->IoStatus.Information;
3211  } else
3212  ERR("EXCEPTION - %08x\n", Status);
3213 
3214  return Status;
3215  } else {
3216  NTSTATUS Status;
3217 
3218  if (!wait) {
3220  return STATUS_PENDING;
3221  }
3222 
3223  if (fcb->ads)
3224  Status = read_stream(fcb, data, start, length, bytes_read);
3225  else
3226  Status = read_file(fcb, data, start, length, bytes_read, Irp);
3227 
3228  *bytes_read += addon;
3229  TRACE("read %u bytes\n", *bytes_read);
3230 
3231  Irp->IoStatus.Information = *bytes_read;
3232 
3233  if (diskacc && Status != STATUS_PENDING) {
3234  PETHREAD thread = NULL;
3235 
3236  if (Irp->Tail.Overlay.Thread && !IoIsSystemThread(Irp->Tail.Overlay.Thread))
3237  thread = Irp->Tail.Overlay.Thread;
3238  else if (!IoIsSystemThread(PsGetCurrentThread()))
3242 
3243  if (thread)
3244  fPsUpdateDiskCounters(PsGetThreadProcess(thread), *bytes_read, 0, 1, 0, 0);
3245  }
3246 
3247  return Status;
3248  }
3249 }
3250 
3257  ULONG bytes_read = 0;
3258  NTSTATUS Status;
3259  bool top_level;
3260  fcb* fcb;
3261  ccb* ccb;
3262  bool acquired_fcb_lock = false, wait;
3263 
3265 
3266  top_level = is_top_level(Irp);
3267 
3268  TRACE("read\n");
3269 
3270  if (Vcb && Vcb->type == VCB_TYPE_VOLUME) {
3272  goto exit2;
3273  } else if (!Vcb || Vcb->type != VCB_TYPE_FS) {
3275  goto end;
3276  }
3277 
3278  Irp->IoStatus.Information = 0;
3279 
3281  CcMdlReadComplete(IrpSp->FileObject, Irp->MdlAddress);
3282 
3283  Irp->MdlAddress = NULL;
3285 
3286  goto exit;
3287  }
3288 
3289  fcb = FileObject->FsContext;
3290 
3291  if (!fcb) {
3292  ERR("fcb was NULL\n");
3294  goto exit;
3295  }
3296 
3297  ccb = FileObject->FsContext2;
3298 
3299  if (!ccb) {
3300  ERR("ccb was NULL\n");
3302  goto exit;
3303  }
3304 
3305  if (Irp->RequestorMode == UserMode && !(ccb->access & FILE_READ_DATA)) {
3306  WARN("insufficient privileges\n");
3308  goto exit;
3309  }
3310 
3311  if (fcb == Vcb->volume_fcb) {
3312  TRACE("reading volume FCB\n");
3313 
3315 
3316  Status = IoCallDriver(Vcb->Vpb->RealDevice, Irp);
3317 
3318  goto exit2;
3319  }
3320 
3321  if (!(Irp->Flags & IRP_PAGING_IO))
3323 
3324  wait = IoIsOperationSynchronous(Irp);
3325 
3326  // Don't offload jobs when doing paging IO - otherwise this can lead to
3327  // deadlocks in CcCopyRead.
3328  if (Irp->Flags & IRP_PAGING_IO)
3329  wait = true;
3330 
3331  if (!(Irp->Flags & IRP_PAGING_IO) && FileObject->SectionObjectPointer && FileObject->SectionObjectPointer->DataSectionObject) {
3333 
3334  CcFlushCache(FileObject->SectionObjectPointer, &IrpSp->Parameters.Read.ByteOffset, IrpSp->Parameters.Read.Length, &iosb);
3335  if (!NT_SUCCESS(iosb.Status)) {
3336  ERR("CcFlushCache returned %08x\n", iosb.Status);
3337  return iosb.Status;
3338  }
3339  }
3340 
3341  if (!ExIsResourceAcquiredSharedLite(fcb->Header.Resource)) {
3342  if (!ExAcquireResourceSharedLite(fcb->Header.Resource, wait)) {
3345  goto exit;
3346  }
3347 
3348  acquired_fcb_lock = true;
3349  }
3350 
3351  Status = do_read(Irp, wait, &bytes_read);
3352 
3353  if (acquired_fcb_lock)
3354  ExReleaseResourceLite(fcb->Header.Resource);
3355 
3356 exit:
3357  if (FileObject->Flags & FO_SYNCHRONOUS_IO && !(Irp->Flags & IRP_PAGING_IO))
3358  FileObject->CurrentByteOffset.QuadPart = IrpSp->Parameters.Read.ByteOffset.QuadPart + (NT_SUCCESS(Status) ? bytes_read : 0);
3359 
3360 end:
3361  Irp->IoStatus.Status = Status;
3362 
3363  TRACE("Irp->IoStatus.Status = %08x\n", Irp->IoStatus.Status);
3364  TRACE("Irp->IoStatus.Information = %lu\n", Irp->IoStatus.Information);
3365  TRACE("returning %08x\n", Status);
3366 
3367  if (Status != STATUS_PENDING)
3369  else {
3370  if (!add_thread_job(Vcb, Irp))
3371  Status = do_read_job(Irp);
3372  }
3373 
3374 exit2:
3375  if (top_level)
3377 
3379 
3380  return Status;
3381 }
VOID(__stdcall * tFsRtlUpdateDiskCounters)(ULONG64 BytesRead, ULONG64 BytesWritten)
Definition: btrfs_drv.h:1832
BOOLEAN NTAPI IoIsSystemThread(IN PETHREAD Thread)
Definition: util.c:115
LOCAL void nextitem(arg_t *ap)
Definition: match.c:428
uint64_t obj_id
Definition: btrfs.h:128
void galois_double(uint8_t *data, uint32_t len)
Definition: galois.c:109
static PIO_STATUS_BLOCK iosb
Definition: file.c:98
uint64_t address
Definition: read.c:46
void init_file_cache(_In_ PFILE_OBJECT FileObject, _In_ CC_FILE_SIZES *ccfs)
Definition: btrfs.c:3946
VOID NTAPI CcMdlRead(IN PFILE_OBJECT FileObject, IN PLARGE_INTEGER FileOffset, IN ULONG Length, OUT PMDL *MdlChain, OUT PIO_STATUS_BLOCK IoStatus)
Definition: mdlsup.c:64
NTSTATUS Status
Definition: read.c:44
#define PAGE_SHIFT
Definition: env_spec_w32.h:45
uint32_t sector
Definition: isohybrid.c:61
NTSTATUS zstd_decompress(uint8_t *inbuf, uint32_t inlen, uint8_t *outbuf, uint32_t outlen)
Definition: compress.c:1133
uint16_t stripenum
Definition: read.c:32
#define max(a, b)
Definition: svc.c:63
uint8_t type
Definition: btrfs.h:341
#define LZO_PAGE_SIZE
Definition: read.c:63
NTSYSAPI VOID NTAPI RtlCopyMemory(VOID UNALIGNED *Destination, CONST VOID UNALIGNED *Source, ULONG Length)
_In_ ULONG cj
Definition: winddi.h:3540
uint8_t obj_type
Definition: btrfs.h:129
#define STATUS_INSUFFICIENT_RESOURCES
Definition: udferr_usr.h:158
NTSTATUS do_read_job(PIRP Irp)
Definition: worker-thread.c:26
#define FsRtlEnterFileSystem
uint16_t sub_stripes
Definition: btrfs.h:326
#define PsGetCurrentThread()
Definition: env_spec_w32.h:81
VOID NTAPI CcFlushCache(IN PSECTION_OBJECT_POINTERS SectionObjectPointer, IN OPTIONAL PLARGE_INTEGER FileOffset, IN ULONG Length, OUT OPTIONAL PIO_STATUS_BLOCK IoStatus)
Definition: cachesub.c:222
#define FsRtlExitFileSystem
#define STATUS_NOT_IMPLEMENTED
Definition: ntstatus.h:225
#define BTRFS_COMPRESSION_NONE
Definition: btrfs.h:61
static __inline void do_xor(uint8_t *buf1, uint8_t *buf2, uint32_t len)
Definition: btrfs_drv.h:1675
#define STATUS_MORE_PROCESSING_REQUIRED
Definition: shellext.h:68
Definition: http.c:7098
EXTENT_DATA2 * ed2
Definition: write.c:2819
struct read_data_context * context
Definition: read.c:31
GLenum GLuint GLenum GLsizei const GLchar * buf
Definition: glext.h:7751
_In_ PIRP Irp
Definition: csq.h:116
#define STATUS_INVALID_PARAMETER
Definition: udferr_usr.h:135
uint16_t startoffstripe
Definition: read.c:51
LONG stripes_left
Definition: read.c:48
uint64_t decoded_size
Definition: btrfs.h:337
#define _In_reads_bytes_opt_(size)
Definition: no_sal2.h:230
const char * devices
Definition: diskspace.c:793
VOID NTAPI MmBuildMdlForNonPagedPool(IN PMDL Mdl)
Definition: mdlsup.c:428
const GLint * first
Definition: glext.h:5794
#define FSRTL_FLAG2_IS_PAGING_FILE
Definition: fsrtltypes.h:57
#define WARN(fmt,...)
Definition: debug.h:111
#define IoIsErrorUserInduced(Status)
Definition: iofuncs.h:2769
#define TYPE_CHUNK_ITEM
Definition: btrfs.h:44
LONG NTSTATUS
Definition: precomp.h:26
GLintptr offset
Definition: glext.h:5920
#define IRP_NOCACHE
#define STATUS_INVALID_DEVICE_REQUEST
Definition: udferr_usr.h:138
Definition: write.c:111
GLuint GLuint GLsizei GLenum type
Definition: gl.h:1545
void get_raid56_lock_range(chunk *c, uint64_t address, uint64_t length, uint64_t *lockaddr, uint64_t *locklen)
Definition: write.c:2150
uint64_t address
Definition: btrfs_drv.h:546
GLint GLint GLint GLint GLint x
Definition: gl.h:1548
uint64_t stripe_length
Definition: btrfs.h:320
VOID NTAPI MmUnlockPages(IN PMDL Mdl)
Definition: mdlsup.c:1439
#define VCB_TYPE_FS
Definition: btrfs_drv.h:664
uint32_t crc32
Definition: btrfs.c:4138
uint8_t csum[32]
Definition: btrfs.h:138
GLuint GLuint end
Definition: gl.h:1545
#define BTRFS_COMPRESSION_LZO
Definition: btrfs.h:63
#define BTRFS_COMPRESSION_ZSTD
Definition: btrfs.h:64
void free_calc_job(calc_job *cj)
Definition: calcthread.c:53
RTL_BITMAP bmp
Definition: btrfs_drv.h:549
ACCESS_MASK access
Definition: btrfs_drv.h:383
unsigned short int uint16_t
Definition: acefiex.h:54
#define uint16_t
Definition: nsiface.idl:60
BOOLEAN NTAPI IoIsOperationSynchronous(IN PIRP Irp)
Definition: irp.c:1882
#define MmGetSystemAddressForMdlSafe(_Mdl, _Priority)
LONG NTAPI KeSetEvent(IN PKEVENT Event, IN KPRIORITY Increment, IN BOOLEAN Wait)
Definition: eventobj.c:159
bool is_top_level(_In_ PIRP Irp)
Definition: btrfs.c:276
NTSTATUS NTAPI KeWaitForSingleObject(IN PVOID Object, IN KWAIT_REASON WaitReason, IN KPROCESSOR_MODE WaitMode, IN BOOLEAN Alertable, IN PLARGE_INTEGER Timeout OPTIONAL)
Definition: wait.c:416
KEVENT Event
Definition: read.c:43
uint8_t encryption
Definition: btrfs.h:339
uint64_t offset
Definition: btrfs.h:130
_Post_satisfies_ static stripe __inline void get_raid0_o