ReactOS  0.4.13-dev-52-g0efcfec
read.c
Go to the documentation of this file.
1 /* Copyright (c) Mark Harmstone 2016-17
2  *
3  * This file is part of WinBtrfs.
4  *
5  * WinBtrfs is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public Licence as published by
7  * the Free Software Foundation, either version 3 of the Licence, or
8  * (at your option) any later version.
9  *
10  * WinBtrfs is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser General Public Licence for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public Licence
16  * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
17 
18 #include "btrfs_drv.h"
19 
26 };
27 
28 struct read_data_context;
29 
30 typedef struct {
41 
42 typedef struct {
45  chunk* c;
57 
58 extern BOOL diskacc;
62 
63 #define LINUX_PAGE_SIZE 4096
64 
65 _Function_class_(IO_COMPLETION_ROUTINE)
66 #ifdef __REACTOS__
67 static NTSTATUS NTAPI read_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
68 #else
69 static NTSTATUS read_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
70 #endif
71  read_data_stripe* stripe = conptr;
73 
75 
76  stripe->iosb = Irp->IoStatus;
77 
78  if (NT_SUCCESS(Irp->IoStatus.Status))
80  else
81  stripe->status = ReadDataStatus_Error;
82 
83  if (InterlockedDecrement(&context->stripes_left) == 0)
84  KeSetEvent(&context->Event, 0, FALSE);
85 
87 }
88 
91  calc_job* cj;
92  UINT32* csum2;
93 
94  // From experimenting, it seems that 40 sectors is roughly the crossover
95  // point where offloading the crc32 calculation becomes worth it.
96 
97  if (sectors < 40 || KeQueryActiveProcessorCount(NULL) < 2) {
98  ULONG j;
99 
100  for (j = 0; j < sectors; j++) {
101  UINT32 crc32 = ~calc_crc32c(0xffffffff, data + (j * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
102 
103  if (crc32 != csum[j]) {
104  return STATUS_CRC_ERROR;
105  }
106  }
107 
108  return STATUS_SUCCESS;
109  }
110 
112  if (!csum2) {
113  ERR("out of memory\n");
115  }
116 
117  Status = add_calc_job(Vcb, data, sectors, csum2, &cj);
118  if (!NT_SUCCESS(Status)) {
119  ERR("add_calc_job returned %08x\n", Status);
120  ExFreePool(csum2);
121  return Status;
122  }
123 
125 
126  if (RtlCompareMemory(csum2, csum, sectors * sizeof(UINT32)) != sectors * sizeof(UINT32)) {
127  free_calc_job(cj);
128  ExFreePool(csum2);
129  return STATUS_CRC_ERROR;
130  }
131 
132  free_calc_job(cj);
133  ExFreePool(csum2);
134 
135  return STATUS_SUCCESS;
136 }
137 
140  ULONG i;
141  BOOL checksum_error = FALSE;
142  UINT16 j, stripe = 0;
144  CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
145 
146  for (j = 0; j < ci->num_stripes; j++) {
147  if (context->stripes[j].status == ReadDataStatus_Error) {
148  WARN("stripe %u returned error %08x\n", j, context->stripes[j].iosb.Status);
150  return context->stripes[j].iosb.Status;
151  } else if (context->stripes[j].status == ReadDataStatus_Success) {
152  stripe = j;
153  break;
154  }
155  }
156 
157  if (context->stripes[stripe].status != ReadDataStatus_Success)
158  return STATUS_INTERNAL_ERROR;
159 
160  if (context->tree) {
161  tree_header* th = (tree_header*)buf;
162  UINT32 crc32;
163 
164  crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, context->buflen - sizeof(th->csum));
165 
166  if (th->address != context->address || crc32 != *((UINT32*)th->csum)) {
167  checksum_error = TRUE;
169  } else if (generation != 0 && th->generation != generation) {
170  checksum_error = TRUE;
172  }
173  } else if (context->csum) {
174 #ifdef DEBUG_STATS
175  LARGE_INTEGER time1, time2;
176 
178 #endif
179  Status = check_csum(Vcb, buf, (ULONG)context->stripes[stripe].Irp->IoStatus.Information / context->sector_size, context->csum);
180 
181  if (Status == STATUS_CRC_ERROR) {
182  checksum_error = TRUE;
184  } else if (!NT_SUCCESS(Status)) {
185  ERR("check_csum returned %08x\n", Status);
186  return Status;
187  }
188 #ifdef DEBUG_STATS
190 
191  Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart;
192 #endif
193  }
194 
195  if (!checksum_error)
196  return STATUS_SUCCESS;
197 
198  if (ci->num_stripes == 1)
199  return STATUS_CRC_ERROR;
200 
201  if (context->tree) {
202  tree_header* t2;
203  BOOL recovered = FALSE;
204 
205  t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
206  if (!t2) {
207  ERR("out of memory\n");
209  }
210 
211  for (j = 0; j < ci->num_stripes; j++) {
212  if (j != stripe && devices[j] && devices[j]->devobj) {
213  Status = sync_read_phys(devices[j]->devobj, cis[j].offset + context->stripes[stripe].stripestart, Vcb->superblock.node_size, (UINT8*)t2, FALSE);
214  if (!NT_SUCCESS(Status)) {
215  WARN("sync_read_phys returned %08x\n", Status);
217  } else {
218  UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&t2->fs_uuid, Vcb->superblock.node_size - sizeof(t2->csum));
219 
220  if (t2->address == addr && crc32 == *((UINT32*)t2->csum) && (generation == 0 || t2->generation == generation)) {
221  RtlCopyMemory(buf, t2, Vcb->superblock.node_size);
222  ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[stripe]->devitem.dev_id);
223  recovered = TRUE;
224 
225  if (!Vcb->readonly && !devices[stripe]->readonly) { // write good data over bad
226  Status = write_data_phys(devices[stripe]->devobj, cis[stripe].offset + context->stripes[stripe].stripestart,
227  t2, Vcb->superblock.node_size);
228  if (!NT_SUCCESS(Status)) {
229  WARN("write_data_phys returned %08x\n", Status);
231  }
232  }
233 
234  break;
235  } else if (t2->address != addr || crc32 != *((UINT32*)t2->csum))
237  else
239  }
240  }
241  }
242 
243  if (!recovered) {
244  ERR("unrecoverable checksum error at %llx\n", addr);
245  ExFreePool(t2);
246  return STATUS_CRC_ERROR;
247  }
248 
249  ExFreePool(t2);
250  } else {
251  ULONG sectors = (ULONG)context->stripes[stripe].Irp->IoStatus.Information / Vcb->superblock.sector_size;
252  UINT8* sector;
253 
254  sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size, ALLOC_TAG);
255  if (!sector) {
256  ERR("out of memory\n");
258  }
259 
260  for (i = 0; i < sectors; i++) {
261  UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
262 
263  if (context->csum[i] != crc32) {
264  BOOL recovered = FALSE;
265 
266  for (j = 0; j < ci->num_stripes; j++) {
267  if (j != stripe && devices[j] && devices[j]->devobj) {
268  Status = sync_read_phys(devices[j]->devobj, cis[j].offset + context->stripes[stripe].stripestart + UInt32x32To64(i, Vcb->superblock.sector_size),
269  Vcb->superblock.sector_size, sector, FALSE);
270  if (!NT_SUCCESS(Status)) {
271  WARN("sync_read_phys returned %08x\n", Status);
273  } else {
274  UINT32 crc32b = ~calc_crc32c(0xffffffff, sector, Vcb->superblock.sector_size);
275 
276  if (crc32b == context->csum[i]) {
277  RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size);
278  ERR("recovering from checksum error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe]->devitem.dev_id);
279  recovered = TRUE;
280 
281  if (!Vcb->readonly && !devices[stripe]->readonly) { // write good data over bad
282  Status = write_data_phys(devices[stripe]->devobj, cis[stripe].offset + context->stripes[stripe].stripestart + UInt32x32To64(i, Vcb->superblock.sector_size),
283  sector, Vcb->superblock.sector_size);
284  if (!NT_SUCCESS(Status)) {
285  WARN("write_data_phys returned %08x\n", Status);
287  }
288  }
289 
290  break;
291  } else
293  }
294  }
295  }
296 
297  if (!recovered) {
298  ERR("unrecoverable checksum error at %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size));
300  return STATUS_CRC_ERROR;
301  }
302  }
303  }
304 
306  }
307 
308  return STATUS_SUCCESS;
309 }
310 
313  UINT64 i;
314 
315  for (i = 0; i < ci->num_stripes; i++) {
316  if (context->stripes[i].status == ReadDataStatus_Error) {
317  WARN("stripe %llu returned error %08x\n", i, context->stripes[i].iosb.Status);
319  return context->stripes[i].iosb.Status;
320  }
321  }
322 
323  if (context->tree) { // shouldn't happen, as trees shouldn't cross stripe boundaries
324  tree_header* th = (tree_header*)buf;
325  UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
326 
327  if (crc32 != *((UINT32*)th->csum) || addr != th->address || (generation != 0 && generation != th->generation)) {
328  UINT64 off;
329  UINT16 stripe;
330 
332 
333  ERR("unrecoverable checksum error at %llx, device %llx\n", addr, devices[stripe]->devitem.dev_id);
334 
335  if (crc32 != *((UINT32*)th->csum)) {
336  WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)th->csum));
338  return STATUS_CRC_ERROR;
339  } else if (addr != th->address) {
340  WARN("address of tree was %llx, not %llx as expected\n", th->address, addr);
342  return STATUS_CRC_ERROR;
343  } else if (generation != 0 && generation != th->generation) {
344  WARN("generation of tree was %llx, not %llx as expected\n", th->generation, generation);
346  return STATUS_CRC_ERROR;
347  }
348  }
349  } else if (context->csum) {
351 #ifdef DEBUG_STATS
352  LARGE_INTEGER time1, time2;
353 
355 #endif
356  Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum);
357 
358  if (Status == STATUS_CRC_ERROR) {
359  for (i = 0; i < length / Vcb->superblock.sector_size; i++) {
360  UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
361 
362  if (context->csum[i] != crc32) {
363  UINT64 off;
364  UINT16 stripe;
365 
366  get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length, ci->num_stripes, &off, &stripe);
367 
368  ERR("unrecoverable checksum error at %llx, device %llx\n", addr, devices[stripe]->devitem.dev_id);
369 
371 
372  return Status;
373  }
374  }
375 
376  return Status;
377  } else if (!NT_SUCCESS(Status)) {
378  ERR("check_csum returned %08x\n", Status);
379  return Status;
380  }
381 #ifdef DEBUG_STATS
383 
384  Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart;
385 #endif
386  }
387 
388  return STATUS_SUCCESS;
389 }
390 
393  UINT64 i;
394  UINT16 j, stripe;
396  BOOL checksum_error = FALSE;
397  CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
398 
399  for (j = 0; j < ci->num_stripes; j++) {
400  if (context->stripes[j].status == ReadDataStatus_Error) {
401  WARN("stripe %llu returned error %08x\n", j, context->stripes[j].iosb.Status);
403  return context->stripes[j].iosb.Status;
404  } else if (context->stripes[j].status == ReadDataStatus_Success)
405  stripe = j;
406  }
407 
408  if (context->tree) {
409  tree_header* th = (tree_header*)buf;
410  UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
411 
412  if (crc32 != *((UINT32*)th->csum)) {
413  WARN("crc32 was %08x, expected %08x\n", crc32, *((UINT32*)th->csum));
414  checksum_error = TRUE;
416  } else if (addr != th->address) {
417  WARN("address of tree was %llx, not %llx as expected\n", th->address, addr);
418  checksum_error = TRUE;
420  } else if (generation != 0 && generation != th->generation) {
421  WARN("generation of tree was %llx, not %llx as expected\n", th->generation, generation);
422  checksum_error = TRUE;
424  }
425  } else if (context->csum) {
426 #ifdef DEBUG_STATS
427  LARGE_INTEGER time1, time2;
428 
430 #endif
431  Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum);
432 
433  if (Status == STATUS_CRC_ERROR)
434  checksum_error = TRUE;
435  else if (!NT_SUCCESS(Status)) {
436  ERR("check_csum returned %08x\n", Status);
437  return Status;
438  }
439 #ifdef DEBUG_STATS
441 
442  Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart;
443 #endif
444  }
445 
446  if (!checksum_error)
447  return STATUS_SUCCESS;
448 
449  if (context->tree) {
450  tree_header* t2;
451  UINT64 off;
452  UINT16 badsubstripe = 0;
453  BOOL recovered = FALSE;
454 
455  t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
456  if (!t2) {
457  ERR("out of memory\n");
459  }
460 
462 
463  stripe *= ci->sub_stripes;
464 
465  for (j = 0; j < ci->sub_stripes; j++) {
466  if (context->stripes[stripe + j].status == ReadDataStatus_Success) {
467  badsubstripe = j;
468  break;
469  }
470  }
471 
472  for (j = 0; j < ci->sub_stripes; j++) {
473  if (context->stripes[stripe + j].status != ReadDataStatus_Success && devices[stripe + j] && devices[stripe + j]->devobj) {
474  Status = sync_read_phys(devices[stripe + j]->devobj, cis[stripe + j].offset + off,
475  Vcb->superblock.node_size, (UINT8*)t2, FALSE);
476  if (!NT_SUCCESS(Status)) {
477  WARN("sync_read_phys returned %08x\n", Status);
479  } else {
480  UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&t2->fs_uuid, Vcb->superblock.node_size - sizeof(t2->csum));
481 
482  if (t2->address == addr && crc32 == *((UINT32*)t2->csum) && (generation == 0 || t2->generation == generation)) {
483  RtlCopyMemory(buf, t2, Vcb->superblock.node_size);
484  ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[stripe + j]->devitem.dev_id);
485  recovered = TRUE;
486 
487  if (!Vcb->readonly && !devices[stripe + badsubstripe]->readonly && devices[stripe + badsubstripe]->devobj) { // write good data over bad
488  Status = write_data_phys(devices[stripe + badsubstripe]->devobj, cis[stripe + badsubstripe].offset + off,
489  t2, Vcb->superblock.node_size);
490  if (!NT_SUCCESS(Status)) {
491  WARN("write_data_phys returned %08x\n", Status);
493  }
494  }
495 
496  break;
497  } else if (t2->address != addr || crc32 != *((UINT32*)t2->csum))
499  else
501  }
502  }
503  }
504 
505  if (!recovered) {
506  ERR("unrecoverable checksum error at %llx\n", addr);
507  ExFreePool(t2);
508  return STATUS_CRC_ERROR;
509  }
510 
511  ExFreePool(t2);
512  } else {
513  ULONG sectors = length / Vcb->superblock.sector_size;
514  UINT8* sector;
515 
516  sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size, ALLOC_TAG);
517  if (!sector) {
518  ERR("out of memory\n");
520  }
521 
522  for (i = 0; i < sectors; i++) {
523  UINT32 crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
524 
525  if (context->csum[i] != crc32) {
526  UINT64 off;
527  UINT16 stripe2, badsubstripe = 0;
528  BOOL recovered = FALSE;
529 
530  get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length,
531  ci->num_stripes / ci->sub_stripes, &off, &stripe2);
532 
533  stripe2 *= ci->sub_stripes;
534 
535  for (j = 0; j < ci->sub_stripes; j++) {
536  if (context->stripes[stripe2 + j].status == ReadDataStatus_Success) {
537  badsubstripe = j;
538  break;
539  }
540  }
541 
543 
544  for (j = 0; j < ci->sub_stripes; j++) {
545  if (context->stripes[stripe2 + j].status != ReadDataStatus_Success && devices[stripe2 + j] && devices[stripe2 + j]->devobj) {
546  Status = sync_read_phys(devices[stripe2 + j]->devobj, cis[stripe2 + j].offset + off,
547  Vcb->superblock.sector_size, sector, FALSE);
548  if (!NT_SUCCESS(Status)) {
549  WARN("sync_read_phys returned %08x\n", Status);
551  } else {
552  UINT32 crc32b = ~calc_crc32c(0xffffffff, sector, Vcb->superblock.sector_size);
553 
554  if (crc32b == context->csum[i]) {
555  RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size);
556  ERR("recovering from checksum error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe2 + j]->devitem.dev_id);
557  recovered = TRUE;
558 
559  if (!Vcb->readonly && !devices[stripe2 + badsubstripe]->readonly && devices[stripe2 + badsubstripe]->devobj) { // write good data over bad
560  Status = write_data_phys(devices[stripe2 + badsubstripe]->devobj, cis[stripe2 + badsubstripe].offset + off,
561  sector, Vcb->superblock.sector_size);
562  if (!NT_SUCCESS(Status)) {
563  WARN("write_data_phys returned %08x\n", Status);
564  log_device_error(Vcb, devices[stripe2 + badsubstripe], BTRFS_DEV_STAT_READ_ERRORS);
565  }
566  }
567 
568  break;
569  } else
571  }
572  }
573  }
574 
575  if (!recovered) {
576  ERR("unrecoverable checksum error at %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size));
578  return STATUS_CRC_ERROR;
579  }
580  }
581  }
582 
584  }
585 
586  return STATUS_SUCCESS;
587 }
588 
590  device** devices, UINT64 offset, UINT64 generation, chunk* c, BOOL degraded) {
591  ULONG i;
593  BOOL checksum_error = FALSE;
594  CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
595  UINT16 j, stripe;
596  BOOL no_success = TRUE;
597 
598  for (j = 0; j < ci->num_stripes; j++) {
599  if (context->stripes[j].status == ReadDataStatus_Error) {
600  WARN("stripe %u returned error %08x\n", j, context->stripes[j].iosb.Status);
602  return context->stripes[j].iosb.Status;
603  } else if (context->stripes[j].status == ReadDataStatus_Success) {
604  stripe = j;
605  no_success = FALSE;
606  }
607  }
608 
609  if (c) { // check partial stripes
610  LIST_ENTRY* le;
611  UINT64 ps_length = (ci->num_stripes - 1) * ci->stripe_length;
612 
613  ExAcquireResourceSharedLite(&c->partial_stripes_lock, TRUE);
614 
615  le = c->partial_stripes.Flink;
616  while (le != &c->partial_stripes) {
618 
619  if (ps->address + ps_length > addr && ps->address < addr + length) {
620  ULONG runlength, index;
621 
622  runlength = RtlFindFirstRunClear(&ps->bmp, &index);
623 
624  while (runlength != 0) {
625  UINT64 runstart = ps->address + (index * Vcb->superblock.sector_size);
626  UINT64 runend = runstart + (runlength * Vcb->superblock.sector_size);
627  UINT64 start = max(runstart, addr);
628  UINT64 end = min(runend, addr + length);
629 
630  if (end > start)
631  RtlCopyMemory(buf + start - addr, &ps->data[start - ps->address], (ULONG)(end - start));
632 
633  runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index);
634  }
635  } else if (ps->address >= addr + length)
636  break;
637 
638  le = le->Flink;
639  }
640 
641  ExReleaseResourceLite(&c->partial_stripes_lock);
642  }
643 
644  if (context->tree) {
645  tree_header* th = (tree_header*)buf;
646  UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
647 
648  if (addr != th->address || crc32 != *((UINT32*)th->csum)) {
649  checksum_error = TRUE;
650  if (!no_success && !degraded)
652  } else if (generation != 0 && generation != th->generation) {
653  checksum_error = TRUE;
654  if (!no_success && !degraded)
656  }
657  } else if (context->csum) {
658 #ifdef DEBUG_STATS
659  LARGE_INTEGER time1, time2;
660 
662 #endif
663  Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum);
664 
665  if (Status == STATUS_CRC_ERROR) {
666  if (!degraded)
667  WARN("checksum error\n");
668  checksum_error = TRUE;
669  } else if (!NT_SUCCESS(Status)) {
670  ERR("check_csum returned %08x\n", Status);
671  return Status;
672  }
673 
674 #ifdef DEBUG_STATS
676 
677  Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart;
678 #endif
679  } else if (degraded)
680  checksum_error = TRUE;
681 
682  if (!checksum_error)
683  return STATUS_SUCCESS;
684 
685  if (context->tree) {
686  UINT16 parity;
687  UINT64 off;
688  BOOL recovered = FALSE, first = TRUE, failed = FALSE;
689  UINT8* t2;
690 
691  t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size * 2, ALLOC_TAG);
692  if (!t2) {
693  ERR("out of memory\n");
695  }
696 
698 
699  parity = (((addr - offset) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
700 
701  stripe = (parity + stripe + 1) % ci->num_stripes;
702 
703  for (j = 0; j < ci->num_stripes; j++) {
704  if (j != stripe) {
705  if (devices[j] && devices[j]->devobj) {
706  if (first) {
707  Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.node_size, t2, FALSE);
708  if (!NT_SUCCESS(Status)) {
709  ERR("sync_read_phys returned %08x\n", Status);
711  failed = TRUE;
712  break;
713  }
714 
715  first = FALSE;
716  } else {
717  Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.node_size, t2 + Vcb->superblock.node_size, FALSE);
718  if (!NT_SUCCESS(Status)) {
719  ERR("sync_read_phys returned %08x\n", Status);
721  failed = TRUE;
722  break;
723  }
724 
725  do_xor(t2, t2 + Vcb->superblock.node_size, Vcb->superblock.node_size);
726  }
727  } else {
728  failed = TRUE;
729  break;
730  }
731  }
732  }
733 
734  if (!failed) {
735  tree_header* t3 = (tree_header*)t2;
736  UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&t3->fs_uuid, Vcb->superblock.node_size - sizeof(t3->csum));
737 
738  if (t3->address == addr && crc32 == *((UINT32*)t3->csum) && (generation == 0 || t3->generation == generation)) {
739  RtlCopyMemory(buf, t2, Vcb->superblock.node_size);
740 
741  if (!degraded)
742  ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[stripe]->devitem.dev_id);
743 
744  recovered = TRUE;
745 
746  if (!Vcb->readonly && devices[stripe] && !devices[stripe]->readonly && devices[stripe]->devobj) { // write good data over bad
747  Status = write_data_phys(devices[stripe]->devobj, cis[stripe].offset + off, t2, Vcb->superblock.node_size);
748  if (!NT_SUCCESS(Status)) {
749  WARN("write_data_phys returned %08x\n", Status);
751  }
752  }
753  }
754  }
755 
756  if (!recovered) {
757  ERR("unrecoverable checksum error at %llx\n", addr);
758  ExFreePool(t2);
759  return STATUS_CRC_ERROR;
760  }
761 
762  ExFreePool(t2);
763  } else {
764  ULONG sectors = length / Vcb->superblock.sector_size;
765  UINT8* sector;
766 
767  sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size * 2, ALLOC_TAG);
768  if (!sector) {
769  ERR("out of memory\n");
771  }
772 
773  for (i = 0; i < sectors; i++) {
774  UINT16 parity;
775  UINT64 off;
776  UINT32 crc32;
777 
778  if (context->csum)
779  crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
780 
781  get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length,
782  ci->num_stripes - 1, &off, &stripe);
783 
784  parity = (((addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size)) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
785 
786  stripe = (parity + stripe + 1) % ci->num_stripes;
787 
788  if (!devices[stripe] || !devices[stripe]->devobj || (context->csum && context->csum[i] != crc32)) {
789  BOOL recovered = FALSE, first = TRUE, failed = FALSE;
790 
791  if (devices[stripe] && devices[stripe]->devobj)
793 
794  for (j = 0; j < ci->num_stripes; j++) {
795  if (j != stripe) {
796  if (devices[j] && devices[j]->devobj) {
797  if (first) {
798  Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.sector_size, sector, FALSE);
799  if (!NT_SUCCESS(Status)) {
800  ERR("sync_read_phys returned %08x\n", Status);
801  failed = TRUE;
803  break;
804  }
805 
806  first = FALSE;
807  } else {
808  Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.sector_size, sector + Vcb->superblock.sector_size, FALSE);
809  if (!NT_SUCCESS(Status)) {
810  ERR("sync_read_phys returned %08x\n", Status);
811  failed = TRUE;
813  break;
814  }
815 
816  do_xor(sector, sector + Vcb->superblock.sector_size, Vcb->superblock.sector_size);
817  }
818  } else {
819  failed = TRUE;
820  break;
821  }
822  }
823  }
824 
825  if (!failed) {
826  if (context->csum)
827  crc32 = ~calc_crc32c(0xffffffff, sector, Vcb->superblock.sector_size);
828 
829  if (!context->csum || crc32 == context->csum[i]) {
830  RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector, Vcb->superblock.sector_size);
831 
832  if (!degraded)
833  ERR("recovering from checksum error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[stripe]->devitem.dev_id);
834 
835  recovered = TRUE;
836 
837  if (!Vcb->readonly && devices[stripe] && !devices[stripe]->readonly && devices[stripe]->devobj) { // write good data over bad
838  Status = write_data_phys(devices[stripe]->devobj, cis[stripe].offset + off,
839  sector, Vcb->superblock.sector_size);
840  if (!NT_SUCCESS(Status)) {
841  WARN("write_data_phys returned %08x\n", Status);
843  }
844  }
845  }
846  }
847 
848  if (!recovered) {
849  ERR("unrecoverable checksum error at %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size));
851  return STATUS_CRC_ERROR;
852  }
853  }
854  }
855 
857  }
858 
859  return STATUS_SUCCESS;
860 }
861 
862 void raid6_recover2(UINT8* sectors, UINT16 num_stripes, ULONG sector_size, UINT16 missing1, UINT16 missing2, UINT8* out) {
863  if (missing1 == num_stripes - 2 || missing2 == num_stripes - 2) { // reconstruct from q and data
864  UINT16 missing = missing1 == (num_stripes - 2) ? missing2 : missing1;
865  UINT16 stripe;
866 
867  stripe = num_stripes - 3;
868 
869  if (stripe == missing)
871  else
873 
874  do {
875  stripe--;
876 
878 
879  if (stripe != missing)
881  } while (stripe > 0);
882 
883  do_xor(out, sectors + ((num_stripes - 1) * sector_size), sector_size);
884 
885  if (missing != 0)
886  galois_divpower(out, (UINT8)missing, sector_size);
887  } else { // reconstruct from p and q
888  UINT16 x, y, stripe;
889  UINT8 gyx, gx, denom, a, b, *p, *q, *pxy, *qxy;
890  UINT32 j;
891 
892  stripe = num_stripes - 3;
893 
894  pxy = out + sector_size;
895  qxy = out;
896 
897  if (stripe == missing1 || stripe == missing2) {
900 
901  if (stripe == missing1)
902  x = stripe;
903  else
904  y = stripe;
905  } else {
908  }
909 
910  do {
911  stripe--;
912 
914 
915  if (stripe != missing1 && stripe != missing2) {
918  } else if (stripe == missing1)
919  x = stripe;
920  else if (stripe == missing2)
921  y = stripe;
922  } while (stripe > 0);
923 
924  gyx = gpow2(y > x ? (y-x) : (255-x+y));
925  gx = gpow2(255-x);
926 
927  denom = gdiv(1, gyx ^ 1);
928  a = gmul(gyx, denom);
929  b = gmul(gx, denom);
930 
931  p = sectors + ((num_stripes - 2) * sector_size);
932  q = sectors + ((num_stripes - 1) * sector_size);
933 
934  for (j = 0; j < sector_size; j++) {
935  *qxy = gmul(a, *p ^ *pxy) ^ gmul(b, *q ^ *qxy);
936 
937  p++;
938  q++;
939  pxy++;
940  qxy++;
941  }
942 
944  do_xor(out + sector_size, sectors + ((num_stripes - 2) * sector_size), sector_size);
945  }
946 }
947 
949  device** devices, UINT64 offset, UINT64 generation, chunk* c, BOOL degraded) {
951  ULONG i;
952  BOOL checksum_error = FALSE;
953  CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
954  UINT16 stripe, j;
955  BOOL no_success = TRUE;
956 
957  for (j = 0; j < ci->num_stripes; j++) {
958  if (context->stripes[j].status == ReadDataStatus_Error) {
959  WARN("stripe %u returned error %08x\n", j, context->stripes[j].iosb.Status);
960 
961  if (devices[j])
963  return context->stripes[j].iosb.Status;
964  } else if (context->stripes[j].status == ReadDataStatus_Success) {
965  stripe = j;
966  no_success = FALSE;
967  }
968  }
969 
970  if (c) { // check partial stripes
971  LIST_ENTRY* le;
972  UINT64 ps_length = (ci->num_stripes - 2) * ci->stripe_length;
973 
974  ExAcquireResourceSharedLite(&c->partial_stripes_lock, TRUE);
975 
976  le = c->partial_stripes.Flink;
977  while (le != &c->partial_stripes) {
979 
980  if (ps->address + ps_length > addr && ps->address < addr + length) {
981  ULONG runlength, index;
982 
983  runlength = RtlFindFirstRunClear(&ps->bmp, &index);
984 
985  while (runlength != 0) {
986  UINT64 runstart = ps->address + (index * Vcb->superblock.sector_size);
987  UINT64 runend = runstart + (runlength * Vcb->superblock.sector_size);
988  UINT64 start = max(runstart, addr);
989  UINT64 end = min(runend, addr + length);
990 
991  if (end > start)
992  RtlCopyMemory(buf + start - addr, &ps->data[start - ps->address], (ULONG)(end - start));
993 
994  runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index);
995  }
996  } else if (ps->address >= addr + length)
997  break;
998 
999  le = le->Flink;
1000  }
1001 
1002  ExReleaseResourceLite(&c->partial_stripes_lock);
1003  }
1004 
1005  if (context->tree) {
1006  tree_header* th = (tree_header*)buf;
1007  UINT32 crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1008 
1009  if (addr != th->address || crc32 != *((UINT32*)th->csum)) {
1010  checksum_error = TRUE;
1011  if (!no_success && !degraded && devices[stripe])
1013  } else if (generation != 0 && generation != th->generation) {
1014  checksum_error = TRUE;
1015  if (!no_success && !degraded && devices[stripe])
1017  }
1018  } else if (context->csum) {
1019 #ifdef DEBUG_STATS
1020  LARGE_INTEGER time1, time2;
1021 
1023 #endif
1024  Status = check_csum(Vcb, buf, length / Vcb->superblock.sector_size, context->csum);
1025 
1026  if (Status == STATUS_CRC_ERROR) {
1027  if (!degraded)
1028  WARN("checksum error\n");
1029  checksum_error = TRUE;
1030  } else if (!NT_SUCCESS(Status)) {
1031  ERR("check_csum returned %08x\n", Status);
1032  return Status;
1033  }
1034 #ifdef DEBUG_STATS
1036 
1037  Vcb->stats.read_csum_time += time2.QuadPart - time1.QuadPart;
1038 #endif
1039  } else if (degraded)
1040  checksum_error = TRUE;
1041 
1042  if (!checksum_error)
1043  return STATUS_SUCCESS;
1044 
1045  if (context->tree) {
1046  UINT8* sector;
1047  UINT16 k, physstripe, parity1, parity2, error_stripe;
1048  UINT64 off;
1049  BOOL recovered = FALSE, failed = FALSE;
1050  ULONG num_errors = 0;
1051 
1052  sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size * (ci->num_stripes + 2), ALLOC_TAG);
1053  if (!sector) {
1054  ERR("out of memory\n");
1056  }
1057 
1059 
1060  parity1 = (((addr - offset) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
1061  parity2 = (parity1 + 1) % ci->num_stripes;
1062 
1063  physstripe = (parity2 + stripe + 1) % ci->num_stripes;
1064 
1065  j = (parity2 + 1) % ci->num_stripes;
1066 
1067  for (k = 0; k < ci->num_stripes - 1; k++) {
1068  if (j != physstripe) {
1069  if (devices[j] && devices[j]->devobj) {
1070  Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.node_size, sector + (k * Vcb->superblock.node_size), FALSE);
1071  if (!NT_SUCCESS(Status)) {
1072  ERR("sync_read_phys returned %08x\n", Status);
1074  num_errors++;
1075  error_stripe = k;
1076 
1077  if (num_errors > 1) {
1078  failed = TRUE;
1079  break;
1080  }
1081  }
1082  } else {
1083  num_errors++;
1084  error_stripe = k;
1085 
1086  if (num_errors > 1) {
1087  failed = TRUE;
1088  break;
1089  }
1090  }
1091  }
1092 
1093  j = (j + 1) % ci->num_stripes;
1094  }
1095 
1096  if (!failed) {
1097  if (num_errors == 0) {
1098  tree_header* th = (tree_header*)(sector + (stripe * Vcb->superblock.node_size));
1099  UINT32 crc32;
1100 
1101  RtlCopyMemory(sector + (stripe * Vcb->superblock.node_size), sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size),
1102  Vcb->superblock.node_size);
1103 
1104  for (j = 0; j < ci->num_stripes - 2; j++) {
1105  if (j != stripe)
1106  do_xor(sector + (stripe * Vcb->superblock.node_size), sector + (j * Vcb->superblock.node_size), Vcb->superblock.node_size);
1107  }
1108 
1109  crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1110 
1111  if (th->address == addr && crc32 == *((UINT32*)th->csum) && (generation == 0 || th->generation == generation)) {
1112  RtlCopyMemory(buf, sector + (stripe * Vcb->superblock.node_size), Vcb->superblock.node_size);
1113 
1114  if (devices[physstripe] && devices[physstripe]->devobj)
1115  ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[physstripe]->devitem.dev_id);
1116 
1117  recovered = TRUE;
1118 
1119  if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1120  Status = write_data_phys(devices[physstripe]->devobj, cis[physstripe].offset + off,
1121  sector + (stripe * Vcb->superblock.node_size), Vcb->superblock.node_size);
1122  if (!NT_SUCCESS(Status)) {
1123  WARN("write_data_phys returned %08x\n", Status);
1125  }
1126  }
1127  }
1128  }
1129 
1130  if (!recovered) {
1131  UINT32 crc32;
1132  tree_header* th = (tree_header*)(sector + (ci->num_stripes * Vcb->superblock.node_size));
1133  BOOL read_q = FALSE;
1134 
1135  if (devices[parity2] && devices[parity2]->devobj) {
1136  Status = sync_read_phys(devices[parity2]->devobj, cis[parity2].offset + off,
1137  Vcb->superblock.node_size, sector + ((ci->num_stripes - 1) * Vcb->superblock.node_size), FALSE);
1138  if (!NT_SUCCESS(Status)) {
1139  ERR("sync_read_phys returned %08x\n", Status);
1141  } else
1142  read_q = TRUE;
1143  }
1144 
1145  if (read_q) {
1146  if (num_errors == 1) {
1147  raid6_recover2(sector, ci->num_stripes, Vcb->superblock.node_size, stripe, error_stripe, sector + (ci->num_stripes * Vcb->superblock.node_size));
1148 
1149  crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1150 
1151  if (th->address == addr && crc32 == *((UINT32*)th->csum) && (generation == 0 || th->generation == generation))
1152  recovered = TRUE;
1153  } else {
1154  for (j = 0; j < ci->num_stripes - 1; j++) {
1155  if (j != stripe) {
1156  raid6_recover2(sector, ci->num_stripes, Vcb->superblock.node_size, stripe, j, sector + (ci->num_stripes * Vcb->superblock.node_size));
1157 
1158  crc32 = ~calc_crc32c(0xffffffff, (UINT8*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1159 
1160  if (th->address == addr && crc32 == *((UINT32*)th->csum) && (generation == 0 || th->generation == generation)) {
1161  recovered = TRUE;
1162  error_stripe = j;
1163  break;
1164  }
1165  }
1166  }
1167  }
1168  }
1169 
1170  if (recovered) {
1171  UINT16 error_stripe_phys = (parity2 + error_stripe + 1) % ci->num_stripes;
1172 
1173  if (devices[physstripe] && devices[physstripe]->devobj)
1174  ERR("recovering from checksum error at %llx, device %llx\n", addr, devices[physstripe]->devitem.dev_id);
1175 
1176  RtlCopyMemory(buf, sector + (ci->num_stripes * Vcb->superblock.node_size), Vcb->superblock.node_size);
1177 
1178  if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1179  Status = write_data_phys(devices[physstripe]->devobj, cis[physstripe].offset + off,
1180  sector + (ci->num_stripes * Vcb->superblock.node_size), Vcb->superblock.node_size);
1181  if (!NT_SUCCESS(Status)) {
1182  WARN("write_data_phys returned %08x\n", Status);
1184  }
1185  }
1186 
1187  if (devices[error_stripe_phys] && devices[error_stripe_phys]->devobj) {
1188  if (error_stripe == ci->num_stripes - 2) {
1189  ERR("recovering from parity error at %llx, device %llx\n", addr, devices[error_stripe_phys]->devitem.dev_id);
1190 
1192 
1193  RtlZeroMemory(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), Vcb->superblock.node_size);
1194 
1195  for (j = 0; j < ci->num_stripes - 2; j++) {
1196  if (j == stripe) {
1197  do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), sector + (ci->num_stripes * Vcb->superblock.node_size),
1198  Vcb->superblock.node_size);
1199  } else {
1200  do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), sector + (j * Vcb->superblock.node_size),
1201  Vcb->superblock.node_size);
1202  }
1203  }
1204  } else {
1205  ERR("recovering from checksum error at %llx, device %llx\n", addr + ((error_stripe - stripe) * ci->stripe_length),
1206  devices[error_stripe_phys]->devitem.dev_id);
1207 
1209 
1210  RtlCopyMemory(sector + (error_stripe * Vcb->superblock.node_size),
1211  sector + ((ci->num_stripes + 1) * Vcb->superblock.node_size), Vcb->superblock.node_size);
1212  }
1213  }
1214 
1215  if (!Vcb->readonly && devices[error_stripe_phys] && devices[error_stripe_phys]->devobj && !devices[error_stripe_phys]->readonly) { // write good data over bad
1216  Status = write_data_phys(devices[error_stripe_phys]->devobj, cis[error_stripe_phys].offset + off,
1217  sector + (error_stripe * Vcb->superblock.node_size), Vcb->superblock.node_size);
1218  if (!NT_SUCCESS(Status)) {
1219  WARN("write_data_phys returned %08x\n", Status);
1221  }
1222  }
1223  }
1224  }
1225  }
1226 
1227  if (!recovered) {
1228  ERR("unrecoverable checksum error at %llx\n", addr);
1229  ExFreePool(sector);
1230  return STATUS_CRC_ERROR;
1231  }
1232 
1233  ExFreePool(sector);
1234  } else {
1235  ULONG sectors = length / Vcb->superblock.sector_size;
1236  UINT8* sector;
1237 
1238  sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size * (ci->num_stripes + 2), ALLOC_TAG);
1239  if (!sector) {
1240  ERR("out of memory\n");
1242  }
1243 
1244  for (i = 0; i < sectors; i++) {
1245  UINT64 off;
1246  UINT16 physstripe, parity1, parity2;
1247  UINT32 crc32;
1248 
1249  if (context->csum)
1250  crc32 = ~calc_crc32c(0xffffffff, buf + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1251 
1252  get_raid0_offset(addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size), ci->stripe_length,
1253  ci->num_stripes - 2, &off, &stripe);
1254 
1255  parity1 = (((addr - offset + UInt32x32To64(i, Vcb->superblock.sector_size)) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
1256  parity2 = (parity1 + 1) % ci->num_stripes;
1257 
1258  physstripe = (parity2 + stripe + 1) % ci->num_stripes;
1259 
1260  if (!devices[physstripe] || !devices[physstripe]->devobj || (context->csum && context->csum[i] != crc32)) {
1261  UINT16 k, error_stripe;
1262  BOOL recovered = FALSE, failed = FALSE;
1263  ULONG num_errors = 0;
1264 
1265  if (devices[physstripe] && devices[physstripe]->devobj)
1267 
1268  j = (parity2 + 1) % ci->num_stripes;
1269 
1270  for (k = 0; k < ci->num_stripes - 1; k++) {
1271  if (j != physstripe) {
1272  if (devices[j] && devices[j]->devobj) {
1273  Status = sync_read_phys(devices[j]->devobj, cis[j].offset + off, Vcb->superblock.sector_size, sector + (k * Vcb->superblock.sector_size), FALSE);
1274  if (!NT_SUCCESS(Status)) {
1275  ERR("sync_read_phys returned %08x\n", Status);
1277  num_errors++;
1278  error_stripe = k;
1279 
1280  if (num_errors > 1) {
1281  failed = TRUE;
1282  break;
1283  }
1284  }
1285  } else {
1286  num_errors++;
1287  error_stripe = k;
1288 
1289  if (num_errors > 1) {
1290  failed = TRUE;
1291  break;
1292  }
1293  }
1294  }
1295 
1296  j = (j + 1) % ci->num_stripes;
1297  }
1298 
1299  if (!failed) {
1300  if (num_errors == 0) {
1301  RtlCopyMemory(sector + (stripe * Vcb->superblock.sector_size), sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1302 
1303  for (j = 0; j < ci->num_stripes - 2; j++) {
1304  if (j != stripe)
1305  do_xor(sector + (stripe * Vcb->superblock.sector_size), sector + (j * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1306  }
1307 
1308  if (context->csum)
1309  crc32 = ~calc_crc32c(0xffffffff, sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1310 
1311  if (!context->csum || crc32 == context->csum[i]) {
1312  RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1313 
1314  if (devices[physstripe] && devices[physstripe]->devobj)
1315  ERR("recovering from checksum error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size),
1316  devices[physstripe]->devitem.dev_id);
1317 
1318  recovered = TRUE;
1319 
1320  if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1321  Status = write_data_phys(devices[physstripe]->devobj, cis[physstripe].offset + off,
1322  sector + (stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1323  if (!NT_SUCCESS(Status)) {
1324  WARN("write_data_phys returned %08x\n", Status);
1326  }
1327  }
1328  }
1329  }
1330 
1331  if (!recovered) {
1332  BOOL read_q = FALSE;
1333 
1334  if (devices[parity2] && devices[parity2]->devobj) {
1335  Status = sync_read_phys(devices[parity2]->devobj, cis[parity2].offset + off,
1336  Vcb->superblock.sector_size, sector + ((ci->num_stripes - 1) * Vcb->superblock.sector_size), FALSE);
1337  if (!NT_SUCCESS(Status)) {
1338  ERR("sync_read_phys returned %08x\n", Status);
1340  } else
1341  read_q = TRUE;
1342  }
1343 
1344  if (read_q) {
1345  if (num_errors == 1) {
1346  raid6_recover2(sector, ci->num_stripes, Vcb->superblock.sector_size, stripe, error_stripe, sector + (ci->num_stripes * Vcb->superblock.sector_size));
1347 
1348  if (!devices[physstripe] || !devices[physstripe]->devobj)
1349  recovered = TRUE;
1350  else {
1351  crc32 = ~calc_crc32c(0xffffffff, sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1352 
1353  if (crc32 == context->csum[i])
1354  recovered = TRUE;
1355  }
1356  } else {
1357  for (j = 0; j < ci->num_stripes - 1; j++) {
1358  if (j != stripe) {
1359  raid6_recover2(sector, ci->num_stripes, Vcb->superblock.sector_size, stripe, j, sector + (ci->num_stripes * Vcb->superblock.sector_size));
1360 
1361  crc32 = ~calc_crc32c(0xffffffff, sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1362 
1363  if (crc32 == context->csum[i]) {
1364  recovered = TRUE;
1365  error_stripe = j;
1366  break;
1367  }
1368  }
1369  }
1370  }
1371  }
1372 
1373  if (recovered) {
1374  UINT16 error_stripe_phys = (parity2 + error_stripe + 1) % ci->num_stripes;
1375 
1376  if (devices[physstripe] && devices[physstripe]->devobj)
1377  ERR("recovering from checksum error at %llx, device %llx\n",
1378  addr + UInt32x32To64(i, Vcb->superblock.sector_size), devices[physstripe]->devitem.dev_id);
1379 
1380  RtlCopyMemory(buf + (i * Vcb->superblock.sector_size), sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1381 
1382  if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
1383  Status = write_data_phys(devices[physstripe]->devobj, cis[physstripe].offset + off,
1384  sector + (ci->num_stripes * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1385  if (!NT_SUCCESS(Status)) {
1386  WARN("write_data_phys returned %08x\n", Status);
1388  }
1389  }
1390 
1391  if (devices[error_stripe_phys] && devices[error_stripe_phys]->devobj) {
1392  if (error_stripe == ci->num_stripes - 2) {
1393  ERR("recovering from parity error at %llx, device %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size),
1394  devices[error_stripe_phys]->devitem.dev_id);
1395 
1397 
1398  RtlZeroMemory(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1399 
1400  for (j = 0; j < ci->num_stripes - 2; j++) {
1401  if (j == stripe) {
1402  do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), sector + (ci->num_stripes * Vcb->superblock.sector_size),
1403  Vcb->superblock.sector_size);
1404  } else {
1405  do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.sector_size), sector + (j * Vcb->superblock.sector_size),
1406  Vcb->superblock.sector_size);
1407  }
1408  }
1409  } else {
1410  ERR("recovering from checksum error at %llx, device %llx\n",
1411  addr + UInt32x32To64(i, Vcb->superblock.sector_size) + ((error_stripe - stripe) * ci->stripe_length),
1412  devices[error_stripe_phys]->devitem.dev_id);
1413 
1415 
1416  RtlCopyMemory(sector + (error_stripe * Vcb->superblock.sector_size),
1417  sector + ((ci->num_stripes + 1) * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1418  }
1419  }
1420 
1421  if (!Vcb->readonly && devices[error_stripe_phys] && devices[error_stripe_phys]->devobj && !devices[error_stripe_phys]->readonly) { // write good data over bad
1422  Status = write_data_phys(devices[error_stripe_phys]->devobj, cis[error_stripe_phys].offset + off,
1423  sector + (error_stripe * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
1424  if (!NT_SUCCESS(Status)) {
1425  WARN("write_data_phys returned %08x\n", Status);
1427  }
1428  }
1429  }
1430  }
1431  }
1432 
1433  if (!recovered) {
1434  ERR("unrecoverable checksum error at %llx\n", addr + UInt32x32To64(i, Vcb->superblock.sector_size));
1435  ExFreePool(sector);
1436  return STATUS_CRC_ERROR;
1437  }
1438  }
1439  }
1440 
1441  ExFreePool(sector);
1442  }
1443 
1444  return STATUS_SUCCESS;
1445 }
1446 
1449  _In_ ULONG priority) {
1450  CHUNK_ITEM* ci;
1451  CHUNK_ITEM_STRIPE* cis;
1453  UINT64 type, offset, total_reading = 0;
1454  NTSTATUS Status;
1455  device** devices = NULL;
1456  UINT16 i, startoffstripe, allowed_missing, missing_devices = 0;
1457  UINT8* dummypage = NULL;
1458  PMDL dummy_mdl = NULL;
1459  BOOL need_to_wait;
1460  UINT64 lockaddr, locklen;
1461 #ifdef DEBUG_STATS
1462  LARGE_INTEGER time1, time2;
1463 #endif
1464 
1465  if (Vcb->log_to_phys_loaded) {
1466  if (!c) {
1468 
1469  if (!c) {
1470  ERR("get_chunk_from_address failed\n");
1471  return STATUS_INTERNAL_ERROR;
1472  }
1473  }
1474 
1475  ci = c->chunk_item;
1476  offset = c->offset;
1477  devices = c->devices;
1478 
1479  if (pc)
1480  *pc = c;
1481  } else {
1482  LIST_ENTRY* le = Vcb->sys_chunks.Flink;
1483 
1484  ci = NULL;
1485 
1486  c = NULL;
1487  while (le != &Vcb->sys_chunks) {
1489 
1490  if (sc->key.obj_id == 0x100 && sc->key.obj_type == TYPE_CHUNK_ITEM && sc->key.offset <= addr) {
1491  CHUNK_ITEM* chunk_item = sc->data;
1492 
1493  if ((addr - sc->key.offset) < chunk_item->size && chunk_item->num_stripes > 0) {
1494  ci = chunk_item;
1495  offset = sc->key.offset;
1496  cis = (CHUNK_ITEM_STRIPE*)&chunk_item[1];
1497 
1499  if (!devices) {
1500  ERR("out of memory\n");
1502  }
1503 
1504  for (i = 0; i < ci->num_stripes; i++) {
1505  devices[i] = find_device_from_uuid(Vcb, &cis[i].dev_uuid);
1506  }
1507 
1508  break;
1509  }
1510  }
1511 
1512  le = le->Flink;
1513  }
1514 
1515  if (!ci) {
1516  ERR("could not find chunk for %llx in bootstrap\n", addr);
1517  return STATUS_INTERNAL_ERROR;
1518  }
1519 
1520  if (pc)
1521  *pc = NULL;
1522  }
1523 
1524  if (ci->type & BLOCK_FLAG_DUPLICATE) {
1526  allowed_missing = ci->num_stripes - 1;
1527  } else if (ci->type & BLOCK_FLAG_RAID0) {
1529  allowed_missing = 0;
1530  } else if (ci->type & BLOCK_FLAG_RAID1) {
1532  allowed_missing = 1;
1533  } else if (ci->type & BLOCK_FLAG_RAID10) {
1535  allowed_missing = 1;
1536  } else if (ci->type & BLOCK_FLAG_RAID5) {
1538  allowed_missing = 1;
1539  } else if (ci->type & BLOCK_FLAG_RAID6) {
1541  allowed_missing = 2;
1542  } else { // SINGLE
1544  allowed_missing = 0;
1545  }
1546 
1547  cis = (CHUNK_ITEM_STRIPE*)&ci[1];
1548 
1551 
1553  if (!context.stripes) {
1554  ERR("out of memory\n");
1556  }
1557 
1558  if (c && (type == BLOCK_FLAG_RAID5 || type == BLOCK_FLAG_RAID6)) {
1559  get_raid56_lock_range(c, addr, length, &lockaddr, &locklen);
1560  chunk_lock_range(Vcb, c, lockaddr, locklen);
1561  }
1562 
1563  RtlZeroMemory(context.stripes, sizeof(read_data_stripe) * ci->num_stripes);
1564 
1565  context.buflen = length;
1566  context.num_stripes = ci->num_stripes;
1567  context.stripes_left = context.num_stripes;
1568  context.sector_size = Vcb->superblock.sector_size;
1569  context.csum = csum;
1570  context.tree = is_tree;
1571  context.type = type;
1572 
1573  if (type == BLOCK_FLAG_RAID0) {
1574  UINT64 startoff, endoff;
1575  UINT16 endoffstripe, stripe;
1576  UINT32 *stripeoff, pos;
1577  PMDL master_mdl;
1578  PFN_NUMBER* pfns;
1579 
1580  // FIXME - test this still works if page size isn't the same as sector size
1581 
1582  // This relies on the fact that MDLs are followed in memory by the page file numbers,
1583  // so with a bit of jiggery-pokery you can trick your disks into deinterlacing your RAID0
1584  // data for you without doing a memcpy yourself.
1585  // MDLs are officially opaque, so this might very well break in future versions of Windows.
1586 
1587  get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes, &startoff, &startoffstripe);
1588  get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes, &endoff, &endoffstripe);
1589 
1590  if (file_read) {
1591  // Unfortunately we can't avoid doing at least one memcpy, as Windows can give us an MDL
1592  // with duplicated dummy PFNs, which confuse check_csum. Ah well.
1593  // See https://msdn.microsoft.com/en-us/library/windows/hardware/Dn614012.aspx if you're interested.
1594 
1596 
1597  if (!context.va) {
1598  ERR("out of memory\n");
1600  goto exit;
1601  }
1602  } else
1603  context.va = buf;
1604 
1605  master_mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL);
1606  if (!master_mdl) {
1607  ERR("out of memory\n");
1609  goto exit;
1610  }
1611 
1613 
1614  _SEH2_TRY {
1618  } _SEH2_END;
1619 
1620  if (!NT_SUCCESS(Status)) {
1621  ERR("MmProbeAndLockPages threw exception %08x\n", Status);
1622  IoFreeMdl(master_mdl);
1623  goto exit;
1624  }
1625 
1626  pfns = (PFN_NUMBER*)(master_mdl + 1);
1627 
1628  for (i = 0; i < ci->num_stripes; i++) {
1629  if (startoffstripe > i)
1630  context.stripes[i].stripestart = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
1631  else if (startoffstripe == i)
1632  context.stripes[i].stripestart = startoff;
1633  else
1634  context.stripes[i].stripestart = startoff - (startoff % ci->stripe_length);
1635 
1636  if (endoffstripe > i)
1637  context.stripes[i].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
1638  else if (endoffstripe == i)
1639  context.stripes[i].stripeend = endoff + 1;
1640  else
1641  context.stripes[i].stripeend = endoff - (endoff % ci->stripe_length);
1642 
1643  if (context.stripes[i].stripestart != context.stripes[i].stripeend) {
1644  context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), FALSE, FALSE, NULL);
1645 
1646  if (!context.stripes[i].mdl) {
1647  ERR("IoAllocateMdl failed\n");
1648  MmUnlockPages(master_mdl);
1649  IoFreeMdl(master_mdl);
1651  goto exit;
1652  }
1653  }
1654  }
1655 
1656  stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes, ALLOC_TAG);
1657  if (!stripeoff) {
1658  ERR("out of memory\n");
1659  MmUnlockPages(master_mdl);
1660  IoFreeMdl(master_mdl);
1662  goto exit;
1663  }
1664 
1665  RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes);
1666 
1667  pos = 0;
1668  stripe = startoffstripe;
1669  while (pos < length) {
1670  PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
1671 
1672  if (pos == 0) {
1673  UINT32 readlen = (UINT32)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length));
1674 
1675  RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1676 
1677  stripeoff[stripe] += readlen;
1678  pos += readlen;
1679  } else if (length - pos < ci->stripe_length) {
1680  RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1681 
1682  pos = length;
1683  } else {
1684  RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1685 
1686  stripeoff[stripe] += (UINT32)ci->stripe_length;
1687  pos += (UINT32)ci->stripe_length;
1688  }
1689 
1690  stripe = (stripe + 1) % ci->num_stripes;
1691  }
1692 
1693  MmUnlockPages(master_mdl);
1694  IoFreeMdl(master_mdl);
1695 
1696  ExFreePool(stripeoff);
1697  } else if (type == BLOCK_FLAG_RAID10) {
1698  UINT64 startoff, endoff;
1699  UINT16 endoffstripe, j, stripe;
1700  ULONG orig_ls;
1701  PMDL master_mdl;
1702  PFN_NUMBER* pfns;
1703  UINT32* stripeoff, pos;
1704  read_data_stripe** stripes;
1705 
1706  if (c)
1707  orig_ls = c->last_stripe;
1708  else
1709  orig_ls = 0;
1710 
1711  get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &startoff, &startoffstripe);
1712  get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &endoff, &endoffstripe);
1713 
1714  if ((ci->num_stripes % ci->sub_stripes) != 0) {
1715  ERR("chunk %llx: num_stripes %x was not a multiple of sub_stripes %x!\n", offset, ci->num_stripes, ci->sub_stripes);
1717  goto exit;
1718  }
1719 
1720  if (file_read) {
1722 
1723  if (!context.va) {
1724  ERR("out of memory\n");
1726  goto exit;
1727  }
1728  } else
1729  context.va = buf;
1730 
1731  context.firstoff = (UINT16)((startoff % ci->stripe_length) / Vcb->superblock.sector_size);
1732  context.startoffstripe = startoffstripe;
1733  context.sectors_per_stripe = (UINT16)(ci->stripe_length / Vcb->superblock.sector_size);
1734 
1735  startoffstripe *= ci->sub_stripes;
1736  endoffstripe *= ci->sub_stripes;
1737 
1738  if (c)
1739  c->last_stripe = (orig_ls + 1) % ci->sub_stripes;
1740 
1741  master_mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL);
1742  if (!master_mdl) {
1743  ERR("out of memory\n");
1745  goto exit;
1746  }
1747 
1749 
1750  _SEH2_TRY {
1754  } _SEH2_END;
1755 
1756  if (!NT_SUCCESS(Status)) {
1757  ERR("MmProbeAndLockPages threw exception %08x\n", Status);
1758  IoFreeMdl(master_mdl);
1759  goto exit;
1760  }
1761 
1762  pfns = (PFN_NUMBER*)(master_mdl + 1);
1763 
1765  if (!stripes) {
1766  ERR("out of memory\n");
1767  MmUnlockPages(master_mdl);
1768  IoFreeMdl(master_mdl);
1770  goto exit;
1771  }
1772 
1773  RtlZeroMemory(stripes, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes);
1774 
1775  for (i = 0; i < ci->num_stripes; i += ci->sub_stripes) {
1776  UINT64 sstart, send;
1777  BOOL stripeset = FALSE;
1778 
1779  if (startoffstripe > i)
1780  sstart = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
1781  else if (startoffstripe == i)
1782  sstart = startoff;
1783  else
1784  sstart = startoff - (startoff % ci->stripe_length);
1785 
1786  if (endoffstripe > i)
1787  send = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
1788  else if (endoffstripe == i)
1789  send = endoff + 1;
1790  else
1791  send = endoff - (endoff % ci->stripe_length);
1792 
1793  for (j = 0; j < ci->sub_stripes; j++) {
1794  if (j == orig_ls && devices[i+j] && devices[i+j]->devobj) {
1795  context.stripes[i+j].stripestart = sstart;
1796  context.stripes[i+j].stripeend = send;
1797  stripes[i / ci->sub_stripes] = &context.stripes[i+j];
1798 
1799  if (sstart != send) {
1800  context.stripes[i+j].mdl = IoAllocateMdl(context.va, (ULONG)(send - sstart), FALSE, FALSE, NULL);
1801 
1802  if (!context.stripes[i+j].mdl) {
1803  ERR("IoAllocateMdl failed\n");
1804  MmUnlockPages(master_mdl);
1805  IoFreeMdl(master_mdl);
1807  goto exit;
1808  }
1809  }
1810 
1811  stripeset = TRUE;
1812  } else
1813  context.stripes[i+j].status = ReadDataStatus_Skip;
1814  }
1815 
1816  if (!stripeset) {
1817  for (j = 0; j < ci->sub_stripes; j++) {
1818  if (devices[i+j] && devices[i+j]->devobj) {
1819  context.stripes[i+j].stripestart = sstart;
1820  context.stripes[i+j].stripeend = send;
1821  context.stripes[i+j].status = ReadDataStatus_Pending;
1822  stripes[i / ci->sub_stripes] = &context.stripes[i+j];
1823 
1824  if (sstart != send) {
1825  context.stripes[i+j].mdl = IoAllocateMdl(context.va, (ULONG)(send - sstart), FALSE, FALSE, NULL);
1826 
1827  if (!context.stripes[i+j].mdl) {
1828  ERR("IoAllocateMdl failed\n");
1829  MmUnlockPages(master_mdl);
1830  IoFreeMdl(master_mdl);
1832  goto exit;
1833  }
1834  }
1835 
1836  stripeset = TRUE;
1837  break;
1838  }
1839  }
1840 
1841  if (!stripeset) {
1842  ERR("could not find stripe to read\n");
1844  goto exit;
1845  }
1846  }
1847  }
1848 
1849  stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG);
1850  if (!stripeoff) {
1851  ERR("out of memory\n");
1852  MmUnlockPages(master_mdl);
1853  IoFreeMdl(master_mdl);
1855  goto exit;
1856  }
1857 
1858  RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes / ci->sub_stripes);
1859 
1860  pos = 0;
1861  stripe = startoffstripe / ci->sub_stripes;
1862  while (pos < length) {
1863  PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(stripes[stripe]->mdl + 1);
1864 
1865  if (pos == 0) {
1866  UINT32 readlen = (UINT32)min(stripes[stripe]->stripeend - stripes[stripe]->stripestart,
1867  ci->stripe_length - (stripes[stripe]->stripestart % ci->stripe_length));
1868 
1869  RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1870 
1871  stripeoff[stripe] += readlen;
1872  pos += readlen;
1873  } else if (length - pos < ci->stripe_length) {
1874  RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1875 
1876  pos = length;
1877  } else {
1878  RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1879 
1880  stripeoff[stripe] += (ULONG)ci->stripe_length;
1881  pos += (ULONG)ci->stripe_length;
1882  }
1883 
1884  stripe = (stripe + 1) % (ci->num_stripes / ci->sub_stripes);
1885  }
1886 
1887  MmUnlockPages(master_mdl);
1888  IoFreeMdl(master_mdl);
1889 
1890  ExFreePool(stripeoff);
1891  ExFreePool(stripes);
1892  } else if (type == BLOCK_FLAG_DUPLICATE) {
1893  UINT64 orig_ls;
1894 
1895  if (c)
1896  orig_ls = i = c->last_stripe;
1897  else
1898  orig_ls = i = 0;
1899 
1900  while (!devices[i] || !devices[i]->devobj) {
1901  i = (i + 1) % ci->num_stripes;
1902 
1903  if (i == orig_ls) {
1904  ERR("no devices available to service request\n");
1906  goto exit;
1907  }
1908  }
1909 
1910  if (c)
1911  c->last_stripe = (i + 1) % ci->num_stripes;
1912 
1913  context.stripes[i].stripestart = addr - offset;
1914  context.stripes[i].stripeend = context.stripes[i].stripestart + length;
1915 
1916  if (file_read) {
1918 
1919  if (!context.va) {
1920  ERR("out of memory\n");
1922  goto exit;
1923  }
1924 
1925  context.stripes[i].mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL);
1926  if (!context.stripes[i].mdl) {
1927  ERR("IoAllocateMdl failed\n");
1929  goto exit;
1930  }
1931 
1932  MmBuildMdlForNonPagedPool(context.stripes[i].mdl);
1933  } else {
1934  context.stripes[i].mdl = IoAllocateMdl(buf, length, FALSE, FALSE, NULL);
1935 
1936  if (!context.stripes[i].mdl) {
1937  ERR("IoAllocateMdl failed\n");
1939  goto exit;
1940  }
1941 
1943 
1944  _SEH2_TRY {
1948  } _SEH2_END;
1949 
1950  if (!NT_SUCCESS(Status)) {
1951  ERR("MmProbeAndLockPages threw exception %08x\n", Status);
1952  goto exit;
1953  }
1954  }
1955  } else if (type == BLOCK_FLAG_RAID5) {
1956  UINT64 startoff, endoff;
1957  UINT16 endoffstripe, parity;
1958  UINT32 *stripeoff, pos;
1959  PMDL master_mdl;
1960  PFN_NUMBER *pfns, dummy;
1961  BOOL need_dummy = FALSE;
1962 
1963  get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 1, &startoff, &startoffstripe);
1964  get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 1, &endoff, &endoffstripe);
1965 
1966  if (file_read) {
1968 
1969  if (!context.va) {
1970  ERR("out of memory\n");
1972  goto exit;
1973  }
1974  } else
1975  context.va = buf;
1976 
1977  master_mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL);
1978  if (!master_mdl) {
1979  ERR("out of memory\n");
1981  goto exit;
1982  }
1983 
1985 
1986  _SEH2_TRY {
1990  } _SEH2_END;
1991 
1992  if (!NT_SUCCESS(Status)) {
1993  ERR("MmProbeAndLockPages threw exception %08x\n", Status);
1994  IoFreeMdl(master_mdl);
1995  goto exit;
1996  }
1997 
1998  pfns = (PFN_NUMBER*)(master_mdl + 1);
1999 
2000  pos = 0;
2001  while (pos < length) {
2002  parity = (((addr - offset + pos) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
2003 
2004  if (pos == 0) {
2005  UINT16 stripe = (parity + startoffstripe + 1) % ci->num_stripes;
2006  ULONG skip, readlen;
2007 
2008  i = startoffstripe;
2009  while (stripe != parity) {
2010  if (i == startoffstripe) {
2011  readlen = min(length, (ULONG)(ci->stripe_length - (startoff % ci->stripe_length)));
2012 
2013  context.stripes[stripe].stripestart = startoff;
2014  context.stripes[stripe].stripeend = startoff + readlen;
2015 
2016  pos += readlen;
2017 
2018  if (pos == length)
2019  break;
2020  } else {
2021  readlen = min(length - pos, (ULONG)ci->stripe_length);
2022 
2023  context.stripes[stripe].stripestart = startoff - (startoff % ci->stripe_length);
2024  context.stripes[stripe].stripeend = context.stripes[stripe].stripestart + readlen;
2025 
2026  pos += readlen;
2027 
2028  if (pos == length)
2029  break;
2030  }
2031 
2032  i++;
2033  stripe = (stripe + 1) % ci->num_stripes;
2034  }
2035 
2036  if (pos == length)
2037  break;
2038 
2039  for (i = 0; i < startoffstripe; i++) {
2040  UINT16 stripe2 = (parity + i + 1) % ci->num_stripes;
2041 
2042  context.stripes[stripe2].stripestart = context.stripes[stripe2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2043  }
2044 
2045  context.stripes[parity].stripestart = context.stripes[parity].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2046 
2047  if (length - pos > ci->num_stripes * (ci->num_stripes - 1) * ci->stripe_length) {
2048  skip = (ULONG)(((length - pos) / (ci->num_stripes * (ci->num_stripes - 1) * ci->stripe_length)) - 1);
2049 
2050  for (i = 0; i < ci->num_stripes; i++) {
2051  context.stripes[i].stripeend += skip * ci->num_stripes * ci->stripe_length;
2052  }
2053 
2054  pos += (UINT32)(skip * (ci->num_stripes - 1) * ci->num_stripes * ci->stripe_length);
2055  need_dummy = TRUE;
2056  }
2057  } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 1)) {
2058  for (i = 0; i < ci->num_stripes; i++) {
2059  context.stripes[i].stripeend += ci->stripe_length;
2060  }
2061 
2062  pos += (UINT32)(ci->stripe_length * (ci->num_stripes - 1));
2063  need_dummy = TRUE;
2064  } else {
2065  UINT16 stripe = (parity + 1) % ci->num_stripes;
2066 
2067  i = 0;
2068  while (stripe != parity) {
2069  if (endoffstripe == i) {
2070  context.stripes[stripe].stripeend = endoff + 1;
2071  break;
2072  } else if (endoffstripe > i)
2073  context.stripes[stripe].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
2074 
2075  i++;
2076  stripe = (stripe + 1) % ci->num_stripes;
2077  }
2078 
2079  break;
2080  }
2081  }
2082 
2083  for (i = 0; i < ci->num_stripes; i++) {
2084  if (context.stripes[i].stripestart != context.stripes[i].stripeend) {
2085  context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart),
2086  FALSE, FALSE, NULL);
2087 
2088  if (!context.stripes[i].mdl) {
2089  ERR("IoAllocateMdl failed\n");
2090  MmUnlockPages(master_mdl);
2091  IoFreeMdl(master_mdl);
2093  goto exit;
2094  }
2095  }
2096  }
2097 
2098  if (need_dummy) {
2100  if (!dummypage) {
2101  ERR("out of memory\n");
2102  MmUnlockPages(master_mdl);
2103  IoFreeMdl(master_mdl);
2105  goto exit;
2106  }
2107 
2108  dummy_mdl = IoAllocateMdl(dummypage, PAGE_SIZE, FALSE, FALSE, NULL);
2109  if (!dummy_mdl) {
2110  ERR("IoAllocateMdl failed\n");
2111  MmUnlockPages(master_mdl);
2112  IoFreeMdl(master_mdl);
2114  goto exit;
2115  }
2116 
2117  MmBuildMdlForNonPagedPool(dummy_mdl);
2118 
2119  dummy = *(PFN_NUMBER*)(dummy_mdl + 1);
2120  }
2121 
2122  stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes, ALLOC_TAG);
2123  if (!stripeoff) {
2124  ERR("out of memory\n");
2125  MmUnlockPages(master_mdl);
2126  IoFreeMdl(master_mdl);
2128  goto exit;
2129  }
2130 
2131  RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes);
2132 
2133  pos = 0;
2134 
2135  while (pos < length) {
2136  PFN_NUMBER* stripe_pfns;
2137 
2138  parity = (((addr - offset + pos) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
2139 
2140  if (pos == 0) {
2141  UINT16 stripe = (parity + startoffstripe + 1) % ci->num_stripes;
2142  UINT32 readlen = min(length - pos, (UINT32)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart,
2143  ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length)));
2144 
2145  stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2146 
2147  RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2148 
2149  stripeoff[stripe] = readlen;
2150  pos += readlen;
2151 
2152  stripe = (stripe + 1) % ci->num_stripes;
2153 
2154  while (stripe != parity) {
2155  stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2156  readlen = min(length - pos, (UINT32)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2157 
2158  if (readlen == 0)
2159  break;
2160 
2161  RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2162 
2163  stripeoff[stripe] = readlen;
2164  pos += readlen;
2165 
2166  stripe = (stripe + 1) % ci->num_stripes;
2167  }
2168  } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 1)) {
2169  UINT16 stripe = (parity + 1) % ci->num_stripes;
2170  ULONG k;
2171 
2172  while (stripe != parity) {
2173  stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2174 
2175  RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
2176 
2177  stripeoff[stripe] += (UINT32)ci->stripe_length;
2178  pos += (UINT32)ci->stripe_length;
2179 
2180  stripe = (stripe + 1) % ci->num_stripes;
2181  }
2182 
2183  stripe_pfns = (PFN_NUMBER*)(context.stripes[parity].mdl + 1);
2184 
2185  for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) {
2186  stripe_pfns[stripeoff[parity] >> PAGE_SHIFT] = dummy;
2187  stripeoff[parity] += PAGE_SIZE;
2188  }
2189  } else {
2190  UINT16 stripe = (parity + 1) % ci->num_stripes;
2191  UINT32 readlen;
2192 
2193  while (pos < length) {
2194  stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2195  readlen = min(length - pos, (ULONG)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2196 
2197  if (readlen == 0)
2198  break;
2199 
2200  RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2201 
2202  stripeoff[stripe] += readlen;
2203  pos += readlen;
2204 
2205  stripe = (stripe + 1) % ci->num_stripes;
2206  }
2207  }
2208  }
2209 
2210  MmUnlockPages(master_mdl);
2211  IoFreeMdl(master_mdl);
2212 
2213  ExFreePool(stripeoff);
2214  } else if (type == BLOCK_FLAG_RAID6) {
2215  UINT64 startoff, endoff;
2216  UINT16 endoffstripe, parity1;
2217  UINT32 *stripeoff, pos;
2218  PMDL master_mdl;
2219  PFN_NUMBER *pfns, dummy;
2220  BOOL need_dummy = FALSE;
2221 
2222  get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 2, &startoff, &startoffstripe);
2223  get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 2, &endoff, &endoffstripe);
2224 
2225  if (file_read) {
2227 
2228  if (!context.va) {
2229  ERR("out of memory\n");
2231  goto exit;
2232  }
2233  } else
2234  context.va = buf;
2235 
2236  master_mdl = IoAllocateMdl(context.va, length, FALSE, FALSE, NULL);
2237  if (!master_mdl) {
2238  ERR("out of memory\n");
2240  goto exit;
2241  }
2242 
2244 
2245  _SEH2_TRY {
2249  } _SEH2_END;
2250 
2251  if (!NT_SUCCESS(Status)) {
2252  ERR("MmProbeAndLockPages threw exception %08x\n", Status);
2253  IoFreeMdl(master_mdl);
2254  goto exit;
2255  }
2256 
2257  pfns = (PFN_NUMBER*)(master_mdl + 1);
2258 
2259  pos = 0;
2260  while (pos < length) {
2261  parity1 = (((addr - offset + pos) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
2262 
2263  if (pos == 0) {
2264  UINT16 stripe = (parity1 + startoffstripe + 2) % ci->num_stripes, parity2;
2265  ULONG skip, readlen;
2266 
2267  i = startoffstripe;
2268  while (stripe != parity1) {
2269  if (i == startoffstripe) {
2270  readlen = (ULONG)min(length, ci->stripe_length - (startoff % ci->stripe_length));
2271 
2272  context.stripes[stripe].stripestart = startoff;
2273  context.stripes[stripe].stripeend = startoff + readlen;
2274 
2275  pos += readlen;
2276 
2277  if (pos == length)
2278  break;
2279  } else {
2280  readlen = min(length - pos, (ULONG)ci->stripe_length);
2281 
2282  context.stripes[stripe].stripestart = startoff - (startoff % ci->stripe_length);
2283  context.stripes[stripe].stripeend = context.stripes[stripe].stripestart + readlen;
2284 
2285  pos += readlen;
2286 
2287  if (pos == length)
2288  break;
2289  }
2290 
2291  i++;
2292  stripe = (stripe + 1) % ci->num_stripes;
2293  }
2294 
2295  if (pos == length)
2296  break;
2297 
2298  for (i = 0; i < startoffstripe; i++) {
2299  UINT16 stripe2 = (parity1 + i + 2) % ci->num_stripes;
2300 
2301  context.stripes[stripe2].stripestart = context.stripes[stripe2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2302  }
2303 
2304  context.stripes[parity1].stripestart = context.stripes[parity1].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2305 
2306  parity2 = (parity1 + 1) % ci->num_stripes;
2307  context.stripes[parity2].stripestart = context.stripes[parity2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
2308 
2309  if (length - pos > ci->num_stripes * (ci->num_stripes - 2) * ci->stripe_length) {
2310  skip = (ULONG)(((length - pos) / (ci->num_stripes * (ci->num_stripes - 2) * ci->stripe_length)) - 1);
2311 
2312  for (i = 0; i < ci->num_stripes; i++) {
2313  context.stripes[i].stripeend += skip * ci->num_stripes * ci->stripe_length;
2314  }
2315 
2316  pos += (UINT32)(skip * (ci->num_stripes - 2) * ci->num_stripes * ci->stripe_length);
2317  need_dummy = TRUE;
2318  }
2319  } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 2)) {
2320  for (i = 0; i < ci->num_stripes; i++) {
2321  context.stripes[i].stripeend += ci->stripe_length;
2322  }
2323 
2324  pos += (UINT32)(ci->stripe_length * (ci->num_stripes - 2));
2325  need_dummy = TRUE;
2326  } else {
2327  UINT16 stripe = (parity1 + 2) % ci->num_stripes;
2328 
2329  i = 0;
2330  while (stripe != parity1) {
2331  if (endoffstripe == i) {
2332  context.stripes[stripe].stripeend = endoff + 1;
2333  break;
2334  } else if (endoffstripe > i)
2335  context.stripes[stripe].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
2336 
2337  i++;
2338  stripe = (stripe + 1) % ci->num_stripes;
2339  }
2340 
2341  break;
2342  }
2343  }
2344 
2345  for (i = 0; i < ci->num_stripes; i++) {
2346  if (context.stripes[i].stripestart != context.stripes[i].stripeend) {
2347  context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), FALSE, FALSE, NULL);
2348 
2349  if (!context.stripes[i].mdl) {
2350  ERR("IoAllocateMdl failed\n");
2351  MmUnlockPages(master_mdl);
2352  IoFreeMdl(master_mdl);
2354  goto exit;
2355  }
2356  }
2357  }
2358 
2359  if (need_dummy) {
2361  if (!dummypage) {
2362  ERR("out of memory\n");
2363  MmUnlockPages(master_mdl);
2364  IoFreeMdl(master_mdl);
2366  goto exit;
2367  }
2368 
2369  dummy_mdl = IoAllocateMdl(dummypage, PAGE_SIZE, FALSE, FALSE, NULL);
2370  if (!dummy_mdl) {
2371  ERR("IoAllocateMdl failed\n");
2372  MmUnlockPages(master_mdl);
2373  IoFreeMdl(master_mdl);
2375  goto exit;
2376  }
2377 
2378  MmBuildMdlForNonPagedPool(dummy_mdl);
2379 
2380  dummy = *(PFN_NUMBER*)(dummy_mdl + 1);
2381  }
2382 
2383  stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(UINT32) * ci->num_stripes, ALLOC_TAG);
2384  if (!stripeoff) {
2385  ERR("out of memory\n");
2386  MmUnlockPages(master_mdl);
2387  IoFreeMdl(master_mdl);
2389  goto exit;
2390  }
2391 
2392  RtlZeroMemory(stripeoff, sizeof(UINT32) * ci->num_stripes);
2393 
2394  pos = 0;
2395 
2396  while (pos < length) {
2397  PFN_NUMBER* stripe_pfns;
2398 
2399  parity1 = (((addr - offset + pos) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
2400 
2401  if (pos == 0) {
2402  UINT16 stripe = (parity1 + startoffstripe + 2) % ci->num_stripes;
2403  UINT32 readlen = min(length - pos, (UINT32)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart,
2404  ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length)));
2405 
2406  stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2407 
2408  RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2409 
2410  stripeoff[stripe] = readlen;
2411  pos += readlen;
2412 
2413  stripe = (stripe + 1) % ci->num_stripes;
2414 
2415  while (stripe != parity1) {
2416  stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2417  readlen = (UINT32)min(length - pos, min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2418 
2419  if (readlen == 0)
2420  break;
2421 
2422  RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2423 
2424  stripeoff[stripe] = readlen;
2425  pos += readlen;
2426 
2427  stripe = (stripe + 1) % ci->num_stripes;
2428  }
2429  } else if (length - pos >= ci->stripe_length * (ci->num_stripes - 2)) {
2430  UINT16 stripe = (parity1 + 2) % ci->num_stripes;
2431  UINT16 parity2 = (parity1 + 1) % ci->num_stripes;
2432  ULONG k;
2433 
2434  while (stripe != parity1) {
2435  stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2436 
2437  RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
2438 
2439  stripeoff[stripe] += (UINT32)ci->stripe_length;
2440  pos += (UINT32)ci->stripe_length;
2441 
2442  stripe = (stripe + 1) % ci->num_stripes;
2443  }
2444 
2445  stripe_pfns = (PFN_NUMBER*)(context.stripes[parity1].mdl + 1);
2446 
2447  for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) {
2448  stripe_pfns[stripeoff[parity1] >> PAGE_SHIFT] = dummy;
2449  stripeoff[parity1] += PAGE_SIZE;
2450  }
2451 
2452  stripe_pfns = (PFN_NUMBER*)(context.stripes[parity2].mdl + 1);
2453 
2454  for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) {
2455  stripe_pfns[stripeoff[parity2] >> PAGE_SHIFT] = dummy;
2456  stripeoff[parity2] += PAGE_SIZE;
2457  }
2458  } else {
2459  UINT16 stripe = (parity1 + 2) % ci->num_stripes;
2460  UINT32 readlen;
2461 
2462  while (pos < length) {
2463  stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
2464  readlen = (UINT32)min(length - pos, min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
2465 
2466  if (readlen == 0)
2467  break;
2468 
2469  RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
2470 
2471  stripeoff[stripe] += readlen;
2472  pos += readlen;
2473 
2474  stripe = (stripe + 1) % ci->num_stripes;
2475  }
2476  }
2477  }
2478 
2479  MmUnlockPages(master_mdl);
2480  IoFreeMdl(master_mdl);
2481 
2482  ExFreePool(stripeoff);
2483  }
2484 
2485  context.address = addr;
2486 
2487  for (i = 0; i < ci->num_stripes; i++) {
2488  if (!devices[i] || !devices[i]->devobj || context.stripes[i].stripestart == context.stripes[i].stripeend) {
2489  context.stripes[i].status = ReadDataStatus_MissingDevice;
2490  context.stripes_left--;
2491 
2492  if (!devices[i] || !devices[i]->devobj)
2493  missing_devices++;
2494  }
2495  }
2496 
2497  if (missing_devices > allowed_missing) {
2498  ERR("not enough devices to service request (%u missing)\n", missing_devices);
2500  goto exit;
2501  }
2502 
2503  for (i = 0; i < ci->num_stripes; i++) {
2505 
2506  if (devices[i] && devices[i]->devobj && context.stripes[i].stripestart != context.stripes[i].stripeend && context.stripes[i].status != ReadDataStatus_Skip) {
2507  context.stripes[i].context = (struct read_data_context*)&context;
2508 
2509  if (type == BLOCK_FLAG_RAID10) {
2510  context.stripes[i].stripenum = i / ci->sub_stripes;
2511  }
2512 
2513  if (!Irp) {
2514  context.stripes[i].Irp = IoAllocateIrp(devices[i]->devobj->StackSize, FALSE);
2515 
2516  if (!context.stripes[i].Irp) {
2517  ERR("IoAllocateIrp failed\n");
2519  goto exit;
2520  }
2521  } else {
2522  context.stripes[i].Irp = IoMakeAssociatedIrp(Irp, devices[i]->devobj->StackSize);
2523 
2524  if (!context.stripes[i].Irp) {
2525  ERR("IoMakeAssociatedIrp failed\n");
2527  goto exit;
2528  }
2529  }
2530 
2531  IrpSp = IoGetNextIrpStackLocation(context.stripes[i].Irp);
2533 
2534  if (devices[i]->devobj->Flags & DO_BUFFERED_IO) {
2535  context.stripes[i].Irp->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), ALLOC_TAG);
2536  if (!context.stripes[i].Irp->AssociatedIrp.SystemBuffer) {
2537  ERR("out of memory\n");
2539  goto exit;
2540  }
2541 
2543 
2544  context.stripes[i].Irp->UserBuffer = MmGetSystemAddressForMdlSafe(context.stripes[i].mdl, priority);
2545  } else if (devices[i]->devobj->Flags & DO_DIRECT_IO)
2546  context.stripes[i].Irp->MdlAddress = context.stripes[i].mdl;
2547  else
2548  context.stripes[i].Irp->UserBuffer = MmGetSystemAddressForMdlSafe(context.stripes[i].mdl, priority);
2549 
2550  IrpSp->Parameters.Read.Length = (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart);
2551  IrpSp->Parameters.Read.ByteOffset.QuadPart = context.stripes[i].stripestart + cis[i].offset;
2552 
2553  total_reading += IrpSp->Parameters.Read.Length;
2554 
2555  context.stripes[i].Irp->UserIosb = &context.stripes[i].iosb;
2556 
2557  IoSetCompletionRoutine(context.stripes[i].Irp, read_data_completion, &context.stripes[i], TRUE, TRUE, TRUE);
2558 
2559  context.stripes[i].status = ReadDataStatus_Pending;
2560  }
2561  }
2562 
2563 #ifdef DEBUG_STATS
2564  if (!is_tree)
2566 #endif
2567 
2568  need_to_wait = FALSE;
2569  for (i = 0; i < ci->num_stripes; i++) {
2570  if (context.stripes[i].status != ReadDataStatus_MissingDevice && context.stripes[i].status != ReadDataStatus_Skip) {
2571  IoCallDriver(devices[i]->devobj, context.stripes[i].Irp);
2572  need_to_wait = TRUE;
2573  }
2574  }
2575 
2576  if (need_to_wait)
2578 
2579 #ifdef DEBUG_STATS
2580  if (!is_tree) {
2582 
2583  Vcb->stats.read_disk_time += time2.QuadPart - time1.QuadPart;
2584  }
2585 #endif
2586 
2587  if (diskacc)
2588  fFsRtlUpdateDiskCounters(total_reading, 0);
2589 
2590  // check if any of the devices return a "user-induced" error
2591 
2592  for (i = 0; i < ci->num_stripes; i++) {
2593  if (context.stripes[i].status == ReadDataStatus_Error && IoIsErrorUserInduced(context.stripes[i].iosb.Status)) {
2594  Status = context.stripes[i].iosb.Status;
2595  goto exit;
2596  }
2597  }
2598 
2599  if (type == BLOCK_FLAG_RAID0) {
2600  Status = read_data_raid0(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, generation, offset);
2601  if (!NT_SUCCESS(Status)) {
2602  ERR("read_data_raid0 returned %08x\n", Status);
2603 
2604  if (file_read)
2605  ExFreePool(context.va);
2606 
2607  goto exit;
2608  }
2609 
2610  if (file_read) {
2612  ExFreePool(context.va);
2613  }
2614  } else if (type == BLOCK_FLAG_RAID10) {
2615  Status = read_data_raid10(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, generation, offset);
2616 
2617  if (!NT_SUCCESS(Status)) {
2618  ERR("read_data_raid10 returned %08x\n", Status);
2619 
2620  if (file_read)
2621  ExFreePool(context.va);
2622 
2623  goto exit;
2624  }
2625 
2626  if (file_read) {
2628  ExFreePool(context.va);
2629  }
2630  } else if (type == BLOCK_FLAG_DUPLICATE) {
2631  Status = read_data_dup(Vcb, file_read ? context.va : buf, addr, &context, ci, devices, generation);
2632  if (!NT_SUCCESS(Status)) {
2633  ERR("read_data_dup returned %08x\n", Status);
2634 
2635  if (file_read)
2636  ExFreePool(context.va);
2637 
2638  goto exit;
2639  }
2640 
2641  if (file_read) {
2643  ExFreePool(context.va);
2644  }
2645  } else if (type == BLOCK_FLAG_RAID5) {
2646  Status = read_data_raid5(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, offset, generation, c, missing_devices > 0 ? TRUE : FALSE);
2647  if (!NT_SUCCESS(Status)) {
2648  ERR("read_data_raid5 returned %08x\n", Status);
2649 
2650  if (file_read)
2651  ExFreePool(context.va);
2652 
2653  goto exit;
2654  }
2655 
2656  if (file_read) {
2658  ExFreePool(context.va);
2659  }
2660  } else if (type == BLOCK_FLAG_RAID6) {
2661  Status = read_data_raid6(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, offset, generation, c, missing_devices > 0 ? TRUE : FALSE);
2662  if (!NT_SUCCESS(Status)) {
2663  ERR("read_data_raid6 returned %08x\n", Status);
2664 
2665  if (file_read)
2666  ExFreePool(context.va);
2667 
2668  goto exit;
2669  }
2670 
2671  if (file_read) {
2673  ExFreePool(context.va);
2674  }
2675  }
2676 
2677 exit:
2678  if (c && (type == BLOCK_FLAG_RAID5 || type == BLOCK_FLAG_RAID6))
2679  chunk_unlock_range(Vcb, c, lockaddr, locklen);
2680 
2681  if (dummy_mdl)
2682  IoFreeMdl(dummy_mdl);
2683 
2684  if (dummypage)
2685  ExFreePool(dummypage);
2686 
2687  for (i = 0; i < ci->num_stripes; i++) {
2688  if (context.stripes[i].mdl) {
2689  if (context.stripes[i].mdl->MdlFlags & MDL_PAGES_LOCKED)
2690  MmUnlockPages(context.stripes[i].mdl);
2691 
2692  IoFreeMdl(context.stripes[i].mdl);
2693  }
2694 
2695  if (context.stripes[i].Irp)
2696  IoFreeIrp(context.stripes[i].Irp);
2697  }
2698 
2699  ExFreePool(context.stripes);
2700 
2701  if (!Vcb->log_to_phys_loaded)
2703 
2704  return Status;
2705 }
2706 
2708  ULONG readlen;
2709 
2710  TRACE("(%p, %p, %llx, %llx, %p)\n", fcb, data, start, length, pbr);
2711 
2712  if (pbr) *pbr = 0;
2713 
2714  if (start >= fcb->adsdata.Length) {
2715  TRACE("tried to read beyond end of stream\n");
2716  return STATUS_END_OF_FILE;
2717  }
2718 
2719  if (length == 0) {
2720  WARN("tried to read zero bytes\n");
2721  return STATUS_SUCCESS;
2722  }
2723 
2724  if (start + length < fcb->adsdata.Length)
2725  readlen = length;
2726  else
2727  readlen = fcb->adsdata.Length - (ULONG)start;
2728 
2729  if (readlen > 0)
2730  RtlCopyMemory(data + start, fcb->adsdata.Buffer, readlen);
2731 
2732  if (pbr) *pbr = readlen;
2733 
2734  return STATUS_SUCCESS;
2735 }
2736 
2738  NTSTATUS Status;
2739  EXTENT_DATA* ed;
2740  UINT32 bytes_read = 0;
2741  UINT64 last_end;
2742  LIST_ENTRY* le;
2743 #ifdef DEBUG_STATS
2744  LARGE_INTEGER time1, time2;
2745 #endif
2746 
2747  TRACE("(%p, %p, %llx, %llx, %p)\n", fcb, data, start, length, pbr);
2748 
2749  if (pbr)
2750  *pbr = 0;
2751 
2752  if (start >= fcb->inode_item.st_size) {
2753  WARN("Tried to read beyond end of file\n");
2755  goto exit;
2756  }
2757 
2758 #ifdef DEBUG_STATS
2760 #endif
2761 
2762  le = fcb->extents.Flink;
2763 
2764  last_end = start;
2765 
2766  while (le != &fcb->extents) {
2767  UINT64 len;
2769  EXTENT_DATA2* ed2;
2770 
2771  if (!ext->ignore) {
2772  ed = &ext->extent_data;
2773 
2775 
2776  len = ed2 ? ed2->num_bytes : ed->decoded_size;
2777 
2778  if (ext->offset + len <= start) {
2779  last_end = ext->offset + len;
2780  goto nextitem;
2781  }
2782 
2783  if (ext->offset > last_end && ext->offset > start + bytes_read) {
2784  UINT32 read = (UINT32)min(length, ext->offset - max(start, last_end));
2785 
2786  RtlZeroMemory(data + bytes_read, read);
2787  bytes_read += read;
2788  length -= read;
2789  }
2790 
2791  if (length == 0 || ext->offset > start + bytes_read + length)
2792  break;
2793 
2795  WARN("Encryption not supported\n");
2797  goto exit;
2798  }
2799 
2800  if (ed->encoding != BTRFS_ENCODING_NONE) {
2801  WARN("Other encodings not supported\n");
2803  goto exit;
2804  }
2805 
2806  switch (ed->type) {
2807  case EXTENT_TYPE_INLINE:
2808  {
2809  UINT64 off = start + bytes_read - ext->offset;
2810  UINT32 read;
2811 
2813  read = (UINT32)min(min(len, ext->datalen) - off, length);
2814 
2815  RtlCopyMemory(data + bytes_read, &ed->data[off], read);
2817  UINT8* decomp;
2818  BOOL decomp_alloc;
2819  UINT16 inlen = ext->datalen - (UINT16)offsetof(EXTENT_DATA, data[0]);
2820 
2821  if (ed->decoded_size == 0 || ed->decoded_size > 0xffffffff) {
2822  ERR("ed->decoded_size was invalid (%llx)\n", ed->decoded_size);
2824  goto exit;
2825  }
2826 
2828 
2829  if (off > 0) {
2831  if (!decomp) {
2832  ERR("out of memory\n");
2834  goto exit;
2835  }
2836 
2837  decomp_alloc = TRUE;
2838  } else {
2839  decomp = data + bytes_read;
2840  decomp_alloc = FALSE;
2841  }
2842 
2844  Status = zlib_decompress(ed->data, inlen, decomp, (UINT32)(read + off));
2845  if (!NT_SUCCESS(Status)) {
2846  ERR("zlib_decompress returned %08x\n", Status);
2847  if (decomp_alloc) ExFreePool(decomp);
2848  goto exit;
2849  }
2850  } else if (ed->compression == BTRFS_COMPRESSION_LZO) {
2851  if (inlen < sizeof(UINT32)) {
2852  ERR("extent data was truncated\n");
2854  if (decomp_alloc) ExFreePool(decomp);
2855  goto exit;
2856  } else
2857  inlen -= sizeof(UINT32);
2858 
2859  Status = lzo_decompress(ed->data + sizeof(UINT32), inlen, decomp, (UINT32)(read + off), sizeof(UINT32));
2860  if (!NT_SUCCESS(Status)) {
2861  ERR("lzo_decompress returned %08x\n", Status);
2862  if (decomp_alloc) ExFreePool(decomp);
2863  goto exit;
2864  }
2865  } else if (ed->compression == BTRFS_COMPRESSION_ZSTD) {
2866  Status = zstd_decompress(ed->data, inlen, decomp, (UINT32)(read + off));
2867  if (!NT_SUCCESS(Status)) {
2868  ERR("zstd_decompress returned %08x\n", Status);
2869  if (decomp_alloc) ExFreePool(decomp);
2870  goto exit;
2871  }
2872  }
2873 
2874  if (decomp_alloc) {
2875  RtlCopyMemory(data + bytes_read, decomp + off, read);
2876  ExFreePool(decomp);
2877  }
2878  } else {
2879  ERR("unhandled compression type %x\n", ed->compression);
2881  goto exit;
2882  }
2883 
2884  bytes_read += read;
2885  length -= read;
2886 
2887  break;
2888  }
2889 
2890  case EXTENT_TYPE_REGULAR:
2891  {
2892  UINT64 off = start + bytes_read - ext->offset;
2893  UINT32 to_read, read;
2894  UINT8* buf;
2895  BOOL mdl = (Irp && Irp->MdlAddress) ? TRUE : FALSE;
2896  BOOL buf_free;
2897  UINT32 bumpoff = 0, *csum;
2898  UINT64 addr;
2899  chunk* c;
2900 
2901  read = (UINT32)(len - off);
2902  if (read > length) read = (UINT32)length;
2903 
2905  addr = ed2->address + ed2->offset + off;
2906  to_read = (UINT32)sector_align(read, fcb->Vcb->superblock.sector_size);
2907 
2908  if (addr % fcb->Vcb->superblock.sector_size > 0) {
2909  bumpoff = addr % fcb->Vcb->superblock.sector_size;
2910  addr -= bumpoff;
2911  to_read = (UINT32)sector_align(read + bumpoff, fcb->Vcb->superblock.sector_size);
2912  }
2913  } else {
2914  addr = ed2->address;
2915  to_read = (UINT32)sector_align(ed2->size, fcb->Vcb->superblock.sector_size);
2916  }
2917 
2918  if (ed->compression == BTRFS_COMPRESSION_NONE && start % fcb->Vcb->superblock.sector_size == 0 &&
2919  length % fcb->Vcb->superblock.sector_size == 0) {
2920  buf = data + bytes_read;
2921  buf_free = FALSE;
2922  } else {
2924  buf_free = TRUE;
2925 
2926  if (!buf) {
2927  ERR("out of memory\n");
2929  goto exit;
2930  }
2931 
2932  mdl = FALSE;
2933  }
2934 
2936 
2937  if (!c) {
2938  ERR("get_chunk_from_address(%llx) failed\n", addr);
2939 
2940  if (buf_free)
2941  ExFreePool(buf);
2942 
2943  goto exit;
2944  }
2945 
2946  if (ext->csum) {
2948  csum = &ext->csum[off / fcb->Vcb->superblock.sector_size];
2949  else
2950  csum = ext->csum;
2951  } else
2952  csum = NULL;
2953 
2954  Status = read_data(fcb->Vcb, addr, to_read, csum, FALSE, buf, c, NULL, Irp, 0, mdl,
2956  if (!NT_SUCCESS(Status)) {
2957  ERR("read_data returned %08x\n", Status);
2958 
2959  if (buf_free)
2960  ExFreePool(buf);
2961 
2962  goto exit;
2963  }
2964 
2966  if (buf_free)
2967  RtlCopyMemory(data + bytes_read, buf + bumpoff, read);
2968  } else {
2969  UINT8 *decomp = NULL, *buf2;
2970  ULONG outlen, inlen, off2;
2971  UINT32 inpageoff = 0;
2972 
2973  off2 = (ULONG)(ed2->offset + off);
2974  buf2 = buf;
2975  inlen = (ULONG)ed2->size;
2976 
2978  ULONG inoff = sizeof(UINT32);
2979 
2980  inlen -= sizeof(UINT32);
2981 
2982  // If reading a few sectors in, skip to the interesting bit
2983  while (off2 > LINUX_PAGE_SIZE) {
2984  UINT32 partlen;
2985 
2986  if (inlen < sizeof(UINT32))
2987  break;
2988 
2989  partlen = *(UINT32*)(buf2 + inoff);
2990 
2991  if (partlen < inlen) {
2992  off2 -= LINUX_PAGE_SIZE;
2993  inoff += partlen + sizeof(UINT32);
2994  inlen -= partlen + sizeof(UINT32);
2995 
2996  if (LINUX_PAGE_SIZE - (inoff % LINUX_PAGE_SIZE) < sizeof(UINT32))
2997  inoff = ((inoff / LINUX_PAGE_SIZE) + 1) * LINUX_PAGE_SIZE;
2998  } else
2999  break;
3000  }
3001 
3002  buf2 = &buf2[inoff];
3003  inpageoff = inoff % LINUX_PAGE_SIZE;
3004  }
3005 
3006  if (off2 != 0) {
3007  outlen = off2 + min(read, (UINT32)(ed2->num_bytes - off));
3008 
3009  decomp = ExAllocatePoolWithTag(PagedPool, outlen, ALLOC_TAG);
3010  if (!decomp) {
3011  ERR("out of memory\n");
3012  ExFreePool(buf);
3014  goto exit;
3015  }
3016  } else
3017  outlen = min(read, (UINT32)(ed2->num_bytes - off));
3018 
3020  Status = zlib_decompress(buf2, inlen, decomp ? decomp : (data + bytes_read), outlen);
3021 
3022  if (!NT_SUCCESS(Status)) {
3023  ERR("zlib_decompress returned %08x\n", Status);
3024  ExFreePool(buf);
3025 
3026  if (decomp)
3027  ExFreePool(decomp);
3028 
3029  goto exit;
3030  }
3031  } else if (ed->compression == BTRFS_COMPRESSION_LZO) {
3032  Status = lzo_decompress(buf2, inlen, decomp ? decomp : (data + bytes_read), outlen, inpageoff);
3033 
3034  if (!NT_SUCCESS(Status)) {
3035  ERR("lzo_decompress returned %08x\n", Status);
3036  ExFreePool(buf);
3037 
3038  if (decomp)
3039  ExFreePool(decomp);
3040 
3041  goto exit;
3042  }
3043  } else if (ed->compression == BTRFS_COMPRESSION_ZSTD) {
3044  Status = zstd_decompress(buf2, inlen, decomp ? decomp : (data + bytes_read), outlen);
3045 
3046  if (!NT_SUCCESS(Status)) {
3047  ERR("zstd_decompress returned %08x\n", Status);
3048  ExFreePool(buf);
3049 
3050  if (decomp)
3051  ExFreePool(decomp);
3052 
3053  goto exit;
3054  }
3055  } else {
3056  ERR("unsupported compression type %x\n", ed->compression);
3058 
3059  ExFreePool(buf);
3060 
3061  if (decomp)
3062  ExFreePool(decomp);
3063 
3064  goto exit;
3065  }
3066 
3067  if (decomp) {
3068  RtlCopyMemory(data + bytes_read, decomp + off2, (size_t)min(read, ed2->num_bytes - off));
3069  ExFreePool(decomp);
3070  }
3071  }
3072 
3073  if (buf_free)
3074  ExFreePool(buf);
3075 
3076  bytes_read += read;
3077  length -= read;
3078 
3079  break;
3080  }
3081 
3082  case EXTENT_TYPE_PREALLOC:
3083  {
3084  UINT64 off = start + bytes_read - ext->offset;
3085  UINT32 read = (UINT32)(len - off);
3086 
3087  if (read > length) read = (UINT32)length;
3088 
3089  RtlZeroMemory(data + bytes_read, read);
3090 
3091  bytes_read += read;
3092  length -= read;
3093 
3094  break;
3095  }
3096 
3097  default:
3098  WARN("Unsupported extent data type %u\n", ed->type);
3100  goto exit;
3101  }
3102 
3103  last_end = ext->offset + len;
3104 
3105  if (length == 0)
3106  break;
3107  }
3108 
3109 nextitem:
3110  le = le->Flink;
3111  }
3112 
3113  if (length > 0 && start + bytes_read < fcb->inode_item.st_size) {
3114  UINT32 read = (UINT32)min(fcb->inode_item.st_size - start - bytes_read, length);
3115 
3116  RtlZeroMemory(data + bytes_read, read);
3117 
3118  bytes_read += read;
3119  length -= read;
3120  }
3121 
3123  if (pbr)
3124  *pbr = bytes_read;
3125 
3126 #ifdef DEBUG_STATS
3128 
3129  fcb->Vcb->stats.num_reads++;
3130  fcb->Vcb->stats.data_read += bytes_read;
3131  fcb->Vcb->stats.read_total_time += time2.QuadPart - time1.QuadPart;
3132 #endif
3133 
3134 exit:
3135  return Status;
3136 }
3137 
3138 NTSTATUS do_read(PIRP Irp, BOOLEAN wait, ULONG* bytes_read) {
3141  fcb* fcb = FileObject->FsContext;
3142  UINT8* data = NULL;
3143  ULONG length = IrpSp->Parameters.Read.Length, addon = 0;
3144  UINT64 start = IrpSp->Parameters.Read.ByteOffset.QuadPart;
3145 
3146  *bytes_read = 0;
3147 
3148  if (!fcb || !fcb->Vcb || !fcb->subvol)
3149  return STATUS_INTERNAL_ERROR;
3150 
3151  TRACE("file = %S (fcb = %p)\n", file_desc(FileObject), fcb);
3152  TRACE("offset = %llx, length = %x\n", start, length);
3153  TRACE("paging_io = %s, no cache = %s\n", Irp->Flags & IRP_PAGING_IO ? "TRUE" : "FALSE", Irp->Flags & IRP_NOCACHE ? "TRUE" : "FALSE");
3154 
3155  if (!fcb->ads && fcb->type == BTRFS_TYPE_DIRECTORY)
3157 
3158  if (!(Irp->Flags & IRP_PAGING_IO) && !FsRtlCheckLockForReadAccess(&fcb->lock, Irp)) {
3159  WARN("tried to read locked region\n");
3161  }
3162 
3163  if (length == 0) {
3164  TRACE("tried to read zero bytes\n");
3165  return STATUS_SUCCESS;
3166  }
3167 
3168  if (start >= (UINT64)fcb->Header.FileSize.QuadPart) {
3169  TRACE("tried to read with offset after file end (%llx >= %llx)\n", start, fcb->Header.FileSize.QuadPart);
3170  return STATUS_END_OF_FILE;
3171  }
3172 
3173  TRACE("FileObject %p fcb %p FileSize = %llx st_size = %llx (%p)\n", FileObject, fcb, fcb->Header.FileSize.QuadPart, fcb->inode_item.st_size, &fcb->inode_item.st_size);
3174 
3175  if (Irp->Flags & IRP_NOCACHE || !(IrpSp->MinorFunction & IRP_MN_MDL)) {
3177 
3178  if (Irp->MdlAddress && !data) {
3179  ERR("MmGetSystemAddressForMdlSafe returned NULL\n");
3181  }
3182 
3183  if (start >= (UINT64)fcb->Header.ValidDataLength.QuadPart) {
3184  length = (ULONG)min(length, min(start + length, (UINT64)fcb->Header.FileSize.QuadPart) - fcb->Header.ValidDataLength.QuadPart);
3186  Irp->IoStatus.Information = *bytes_read = length;
3187  return STATUS_SUCCESS;
3188  }
3189 
3190  if (length + start > (UINT64)fcb->Header.ValidDataLength.QuadPart) {
3191  addon = (ULONG)(min(start + length, (UINT64)fcb->Header.FileSize.QuadPart) - fcb->Header.ValidDataLength.QuadPart);
3192  RtlZeroMemory(data + (fcb->Header.ValidDataLength.QuadPart - start), addon);
3193  length = (ULONG)(fcb->Header.ValidDataLength.QuadPart - start);
3194  }
3195  }
3196 
3197  if (!(Irp->Flags & IRP_NOCACHE)) {
3199 
3200  _SEH2_TRY {
3201  if (!FileObject->PrivateCacheMap) {
3202  CC_FILE_SIZES ccfs;
3203 
3204  ccfs.AllocationSize = fcb->Header.AllocationSize;
3205  ccfs.FileSize = fcb->Header.FileSize;
3206  ccfs.ValidDataLength = fcb->Header.ValidDataLength;
3207 
3208  init_file_cache(FileObject, &ccfs);
3209  }
3210 
3211  if (IrpSp->MinorFunction & IRP_MN_MDL) {
3212  CcMdlRead(FileObject,&IrpSp->Parameters.Read.ByteOffset, length, &Irp->MdlAddress, &Irp->IoStatus);
3213  } else {
3214  if (fCcCopyReadEx) {
3215  TRACE("CcCopyReadEx(%p, %llx, %x, %u, %p, %p, %p, %p)\n", FileObject, IrpSp->Parameters.Read.ByteOffset.QuadPart,
3216  length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread);
3217  TRACE("sizes = %llx, %llx, %llx\n", fcb->Header.AllocationSize, fcb->Header.FileSize, fcb->Header.ValidDataLength);
3218  if (!fCcCopyReadEx(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread)) {
3219  TRACE("CcCopyReadEx could not wait\n");
3220 
3222  return STATUS_PENDING;
3223  }
3224  TRACE("CcCopyReadEx finished\n");
3225  } else {
3226  TRACE("CcCopyRead(%p, %llx, %x, %u, %p, %p)\n", FileObject, IrpSp->Parameters.Read.ByteOffset.QuadPart, length, wait, data, &Irp->IoStatus);
3227  TRACE("sizes = %llx, %llx, %llx\n", fcb->Header.AllocationSize, fcb->Header.FileSize, fcb->Header.ValidDataLength);
3228  if (!CcCopyRead(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus)) {
3229  TRACE("CcCopyRead could not wait\n");
3230 
3232  return STATUS_PENDING;
3233  }
3234  TRACE("CcCopyRead finished\n");
3235  }
3236  }
3239  } _SEH2_END;
3240 
3241  if (NT_SUCCESS(Status)) {
3242  Status = Irp->IoStatus.Status;
3243  Irp->IoStatus.Information += addon;
3244  *bytes_read = (ULONG)Irp->IoStatus.Information;
3245  } else
3246  ERR("EXCEPTION - %08x\n", Status);
3247 
3248  return Status;
3249  } else {
3250  NTSTATUS Status;
3251 
3252  if (!wait) {
3254  return STATUS_PENDING;
3255  }
3256 
3257  if (fcb->ads)
3258  Status = read_stream(fcb, data, start, length, bytes_read);
3259  else
3260  Status = read_file(fcb, data, start, length, bytes_read, Irp);
3261 
3262  *bytes_read += addon;
3263  TRACE("read %u bytes\n", *bytes_read);
3264 
3265  Irp->IoStatus.Information = *bytes_read;
3266 
3267  if (diskacc && Status != STATUS_PENDING) {
3268  PETHREAD thread = NULL;
3269 
3270  if (Irp->Tail.Overlay.Thread && !IoIsSystemThread(Irp->Tail.Overlay.Thread))
3271  thread = Irp->Tail.Overlay.Thread;
3272  else if (!IoIsSystemThread(PsGetCurrentThread()))
3276 
3277  if (thread)
3278  fPsUpdateDiskCounters(PsGetThreadProcess(thread), *bytes_read, 0, 1, 0, 0);
3279  }
3280 
3281  return Status;
3282  }
3283 }
3284 
3291  ULONG bytes_read = 0;
3292  NTSTATUS Status;
3293  BOOL top_level;
3294  fcb* fcb;
3295  ccb* ccb;
3296  BOOLEAN fcb_lock = FALSE, wait;
3297 
3299 
3300  top_level = is_top_level(Irp);
3301 
3302  TRACE("read\n");
3303 
3304  if (Vcb && Vcb->type == VCB_TYPE_VOLUME) {
3306  goto exit2;
3307  } else if (!Vcb || Vcb->type != VCB_TYPE_FS) {
3309  goto end;
3310  }
3311 
3312  Irp->IoStatus.Information = 0;
3313 
3315  CcMdlReadComplete(IrpSp->FileObject, Irp->MdlAddress);
3316 
3317  Irp->MdlAddress = NULL;
3319 
3320  goto exit;
3321  }
3322 
3323  fcb = FileObject->FsContext;
3324 
3325  if (!fcb) {
3326  ERR("fcb was NULL\n");
3328  goto exit;
3329  }
3330 
3331  ccb = FileObject->FsContext2;
3332 
3333  if (!ccb) {
3334  ERR("ccb was NULL\n");
3336  goto exit;
3337  }
3338 
3339  if (Irp->RequestorMode == UserMode && !(ccb->access & FILE_READ_DATA)) {
3340  WARN("insufficient privileges\n");
3342  goto exit;
3343  }
3344 
3345  if (fcb == Vcb->volume_fcb) {
3346  TRACE("reading volume FCB\n");
3347 
3349 
3350  Status = IoCallDriver(Vcb->Vpb->RealDevice, Irp);
3351 
3352  goto exit2;
3353  }
3354 
3355  wait = IoIsOperationSynchronous(Irp);
3356 
3357  // Don't offload jobs when doing paging IO - otherwise this can lead to
3358  // deadlocks in CcCopyRead.
3359  if (Irp->Flags & IRP_PAGING_IO)
3360  wait = TRUE;
3361 
3362  if (!(Irp->Flags & IRP_PAGING_IO) && FileObject->SectionObjectPointer->DataSectionObject) {
3364 
3365  CcFlushCache(FileObject->SectionObjectPointer, &IrpSp->Parameters.Read.ByteOffset, IrpSp->Parameters.Read.Length, &iosb);
3366  if (!NT_SUCCESS(iosb.Status)) {
3367  ERR("CcFlushCache returned %08x\n", iosb.Status);
3368  return iosb.Status;
3369  }
3370  }
3371 
3372  if (!ExIsResourceAcquiredSharedLite(fcb->Header.Resource)) {
3373  if (!ExAcquireResourceSharedLite(fcb->Header.Resource, wait)) {
3376  goto exit;
3377  }
3378 
3379  fcb_lock = TRUE;
3380  }
3381 
3382  Status = do_read(Irp, wait, &bytes_read);
3383 
3384  if (fcb_lock)
3385  ExReleaseResourceLite(fcb->Header.Resource);
3386 
3387 exit:
3388  if (FileObject->Flags & FO_SYNCHRONOUS_IO && !(Irp->Flags & IRP_PAGING_IO))
3389  FileObject->CurrentByteOffset.QuadPart = IrpSp->Parameters.Read.ByteOffset.QuadPart + (NT_SUCCESS(Status) ? bytes_read : 0);
3390 
3391 end:
3392  Irp->IoStatus.Status = Status;
3393 
3394  TRACE("Irp->IoStatus.Status = %08x\n", Irp->IoStatus.Status);
3395  TRACE("Irp->IoStatus.Information = %lu\n", Irp->IoStatus.Information);
3396  TRACE("returning %08x\n", Status);
3397 
3398  if (Status != STATUS_PENDING)
3400  else {
3401  if (!add_thread_job(Vcb, Irp))
3402  do_read_job(Irp);
3403  }
3404 
3405 exit2:
3406  if (top_level)
3408 
3410 
3411  return Status;
3412 }
BOOLEAN NTAPI IoIsSystemThread(IN PETHREAD Thread)
Definition: util.c:115
LOCAL void nextitem(arg_t *ap)
Definition: match.c:428
static PIO_STATUS_BLOCK iosb
Definition: file.c:98
void init_file_cache(_In_ PFILE_OBJECT FileObject, _In_ CC_FILE_SIZES *ccfs)
Definition: btrfs.c:3702
VOID NTAPI CcMdlRead(IN PFILE_OBJECT FileObject, IN PLARGE_INTEGER FileOffset, IN ULONG Length, OUT PMDL *MdlChain, OUT PIO_STATUS_BLOCK IoStatus)
Definition: mdlsup.c:64
NTSTATUS Status
Definition: read.c:44
UINT64 generation
Definition: btrfs.h:138
#define PAGE_SHIFT
Definition: env_spec_w32.h:45
NTSTATUS read_data(_In_ device_extension *Vcb, _In_ UINT64 addr, _In_ UINT32 length, _In_reads_bytes_opt_(length *sizeof(UINT32)/Vcb->superblock.sector_size) UINT32 *csum, _In_ BOOL is_tree, _Out_writes_bytes_(length) UINT8 *buf, _In_opt_ chunk *c, _Out_opt_ chunk **pc, _In_opt_ PIRP Irp, _In_ UINT64 generation, _In_ BOOL file_read, _In_ ULONG priority)
Definition: read.c:1447
#define max(a, b)
Definition: svc.c:63
LARGE_INTEGER NTAPI KeQueryPerformanceCounter(IN PLARGE_INTEGER PerformanceFreq)
Definition: timer.c:138
#define TRUE
Definition: types.h:120
NTSYSAPI VOID NTAPI RtlCopyMemory(VOID UNALIGNED *Destination, CONST VOID UNALIGNED *Source, ULONG Length)
UINT16 startoffstripe
Definition: read.c:51
_In_ ULONG cj
Definition: winddi.h:3540
#define STATUS_INSUFFICIENT_RESOURCES
Definition: udferr_usr.h:158
#define FsRtlEnterFileSystem
UINT64 offset
Definition: btrfs.h:125
_Post_satisfies_ static stripe __inline void get_raid0_offset(_In_ UINT64 off, _In_ UINT64 stripe_length, _In_ UINT16 num_stripes, _Out_ UINT64 *stripeoff, _Out_ UINT16 *stripe)
Definition: btrfs_drv.h:976
UINT8 gmul(UINT8 a, UINT8 b)
Definition: galois.c:73
#define PsGetCurrentThread()
Definition: env_spec_w32.h:81
NTSTATUS lzo_decompress(UINT8 *inbuf, UINT32 inlen, UINT8 *outbuf, UINT32 outlen, UINT32 inpageoff)
Definition: compress.c:278
VOID NTAPI CcFlushCache(IN PSECTION_OBJECT_POINTERS SectionObjectPointer, IN OPTIONAL PLARGE_INTEGER FileOffset, IN ULONG Length, OUT OPTIONAL PIO_STATUS_BLOCK IoStatus)
Definition: cachesub.c:222
#define FsRtlExitFileSystem
#define STATUS_NOT_IMPLEMENTED
Definition: ntstatus.h:225
#define BTRFS_COMPRESSION_NONE
Definition: btrfs.h:58
#define STATUS_MORE_PROCESSING_REQUIRED
Definition: shellext.h:63
UINT64 num_bytes
Definition: btrfs.h:344
Definition: http.c:6587
EXTENT_DATA2 * ed2
Definition: write.c:2827
struct read_data_context * context
Definition: read.c:31
GLenum GLuint GLenum GLsizei const GLchar * buf
Definition: glext.h:7751
_In_ PIRP Irp
Definition: csq.h:116
#define STATUS_INVALID_PARAMETER
Definition: udferr_usr.h:135
UINT64 size
Definition: btrfs.h:342
LONG stripes_left
Definition: read.c:48
#define _In_reads_bytes_opt_(size)
Definition: no_sal2.h:230
const char * devices
Definition: diskspace.c:793
VOID NTAPI MmBuildMdlForNonPagedPool(IN PMDL Mdl)
Definition: mdlsup.c:428
const GLint * first
Definition: glext.h:5794
#define FSRTL_FLAG2_IS_PAGING_FILE
Definition: fsrtltypes.h:57
#define WARN(fmt,...)
Definition: debug.h:111
#define IoIsErrorUserInduced(Status)
Definition: iofuncs.h:2769
#define TYPE_CHUNK_ITEM
Definition: btrfs.h:42
LONG NTSTATUS
Definition: precomp.h:26
NTSTATUS do_read(PIRP Irp, BOOLEAN wait, ULONG *bytes_read)
Definition: read.c:3138
GLintptr offset
Definition: glext.h:5920
#define IRP_NOCACHE
#define STATUS_INVALID_DEVICE_REQUEST
Definition: udferr_usr.h:138
UINT64 obj_id
Definition: btrfs.h:123
Definition: write.c:115
GLuint GLuint GLsizei GLenum type
Definition: gl.h:1545
UINT32 * csum
Definition: write.c:2829