ReactOS  0.4.15-dev-5153-gfece68b
write.c
Go to the documentation of this file.
1 /* Copyright (c) Mark Harmstone 2016-17
2  *
3  * This file is part of WinBtrfs.
4  *
5  * WinBtrfs is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public Licence as published by
7  * the Free Software Foundation, either version 3 of the Licence, or
8  * (at your option) any later version.
9  *
10  * WinBtrfs is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser General Public Licence for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public Licence
16  * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
17 
18 #include "btrfs_drv.h"
19 
20 typedef struct {
26 } write_stripe;
27 
28 _Function_class_(IO_COMPLETION_ROUTINE)
29 static NTSTATUS __stdcall write_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr);
30 
31 static void remove_fcb_extent(fcb* fcb, extent* ext, LIST_ENTRY* rollback) __attribute__((nonnull(1, 2, 3)));
32 
36 extern bool diskacc;
37 
38 __attribute__((nonnull(1, 2, 4)))
40  LIST_ENTRY* le;
41  space* s;
42 
43  TRACE("(%p, %I64x, %I64x, %p)\n", Vcb, c->offset, length, address);
44 
45  if (length > c->chunk_item->size - c->used)
46  return false;
47 
48  if (!c->cache_loaded) {
50 
51  if (!NT_SUCCESS(Status)) {
52  ERR("load_cache_chunk returned %08lx\n", Status);
53  return false;
54  }
55  }
56 
57  if (IsListEmpty(&c->space_size))
58  return false;
59 
60  le = c->space_size.Flink;
61  while (le != &c->space_size) {
62  s = CONTAINING_RECORD(le, space, list_entry_size);
63 
64  if (s->size == length) {
65  *address = s->address;
66  return true;
67  } else if (s->size < length) {
68  if (le == c->space_size.Flink)
69  return false;
70 
71  s = CONTAINING_RECORD(le->Blink, space, list_entry_size);
72 
73  *address = s->address;
74  return true;
75  }
76 
77  le = le->Flink;
78  }
79 
80  s = CONTAINING_RECORD(c->space_size.Blink, space, list_entry_size);
81 
82  if (s->size > length) {
83  *address = s->address;
84  return true;
85  }
86 
87  return false;
88 }
89 
90 __attribute__((nonnull(1)))
92  LIST_ENTRY* le2;
93 
94  ExAcquireResourceSharedLite(&Vcb->chunk_lock, true);
95 
96  le2 = Vcb->chunks.Flink;
97  while (le2 != &Vcb->chunks) {
99 
100  if (address >= c->offset && address < c->offset + c->chunk_item->size) {
101  ExReleaseResourceLite(&Vcb->chunk_lock);
102  return c;
103  }
104 
105  le2 = le2->Flink;
106  }
107 
108  ExReleaseResourceLite(&Vcb->chunk_lock);
109 
110  return NULL;
111 }
112 
113 typedef struct {
116 } stripe;
117 
118 __attribute__((nonnull(1)))
119 static uint64_t find_new_chunk_address(device_extension* Vcb, uint64_t size) {
120  uint64_t lastaddr;
121  LIST_ENTRY* le;
122 
123  lastaddr = 0xc00000;
124 
125  le = Vcb->chunks.Flink;
126  while (le != &Vcb->chunks) {
128 
129  if (c->offset >= lastaddr + size)
130  return lastaddr;
131 
132  lastaddr = c->offset + c->chunk_item->size;
133 
134  le = le->Flink;
135  }
136 
137  return lastaddr;
138 }
139 
140 __attribute__((nonnull(1,2)))
141 static bool find_new_dup_stripes(device_extension* Vcb, stripe* stripes, uint64_t max_stripe_size, bool full_size) {
142  uint64_t devusage = 0xffffffffffffffff;
143  space *devdh1 = NULL, *devdh2 = NULL;
144  LIST_ENTRY* le;
145  device* dev2 = NULL;
146 
147  le = Vcb->devices.Flink;
148 
149  while (le != &Vcb->devices) {
151 
152  if (!dev->readonly && !dev->reloc && dev->devobj) {
153  uint64_t usage = (dev->devitem.bytes_used * 4096) / dev->devitem.num_bytes;
154 
155  // favour devices which have been used the least
156  if (usage < devusage) {
157  if (!IsListEmpty(&dev->space)) {
158  LIST_ENTRY* le2;
159  space *dh1 = NULL, *dh2 = NULL;
160 
161  le2 = dev->space.Flink;
162  while (le2 != &dev->space) {
164 
165  if (dh->size >= max_stripe_size && (!dh1 || !dh2 || dh->size < dh1->size)) {
166  dh2 = dh1;
167  dh1 = dh;
168  }
169 
170  le2 = le2->Flink;
171  }
172 
173  if (dh1 && (dh2 || dh1->size >= 2 * max_stripe_size)) {
174  dev2 = dev;
175  devusage = usage;
176  devdh1 = dh1;
177  devdh2 = dh2 ? dh2 : dh1;
178  }
179  }
180  }
181  }
182 
183  le = le->Flink;
184  }
185 
186  if (!devdh1) {
187  uint64_t size = 0;
188 
189  // Can't find hole of at least max_stripe_size; look for the largest one we can find
190 
191  if (full_size)
192  return false;
193 
194  le = Vcb->devices.Flink;
195  while (le != &Vcb->devices) {
197 
198  if (!dev->readonly && !dev->reloc) {
199  if (!IsListEmpty(&dev->space)) {
200  LIST_ENTRY* le2;
201  space *dh1 = NULL, *dh2 = NULL;
202 
203  le2 = dev->space.Flink;
204  while (le2 != &dev->space) {
206 
207  if (!dh1 || !dh2 || dh->size < dh1->size) {
208  dh2 = dh1;
209  dh1 = dh;
210  }
211 
212  le2 = le2->Flink;
213  }
214 
215  if (dh1) {
216  uint64_t devsize;
217 
218  if (dh2)
219  devsize = max(dh1->size / 2, min(dh1->size, dh2->size));
220  else
221  devsize = dh1->size / 2;
222 
223  if (devsize > size) {
224  dev2 = dev;
225  devdh1 = dh1;
226 
227  if (dh2 && min(dh1->size, dh2->size) > dh1->size / 2)
228  devdh2 = dh2;
229  else
230  devdh2 = dh1;
231 
232  size = devsize;
233  }
234  }
235  }
236  }
237 
238  le = le->Flink;
239  }
240 
241  if (!devdh1)
242  return false;
243  }
244 
245  stripes[0].device = stripes[1].device = dev2;
246  stripes[0].dh = devdh1;
247  stripes[1].dh = devdh2;
248 
249  return true;
250 }
251 
252 __attribute__((nonnull(1,2)))
253 static bool find_new_stripe(device_extension* Vcb, stripe* stripes, uint16_t i, uint64_t max_stripe_size, bool allow_missing, bool full_size) {
254  uint64_t k, devusage = 0xffffffffffffffff;
255  space* devdh = NULL;
256  LIST_ENTRY* le;
257  device* dev2 = NULL;
258 
259  le = Vcb->devices.Flink;
260  while (le != &Vcb->devices) {
262  uint64_t usage;
263  bool skip = false;
264 
265  if (dev->readonly || dev->reloc || (!dev->devobj && !allow_missing)) {
266  le = le->Flink;
267  continue;
268  }
269 
270  // skip this device if it already has a stripe
271  if (i > 0) {
272  for (k = 0; k < i; k++) {
273  if (stripes[k].device == dev) {
274  skip = true;
275  break;
276  }
277  }
278  }
279 
280  if (!skip) {
281  usage = (dev->devitem.bytes_used * 4096) / dev->devitem.num_bytes;
282 
283  // favour devices which have been used the least
284  if (usage < devusage) {
285  if (!IsListEmpty(&dev->space)) {
286  LIST_ENTRY* le2;
287 
288  le2 = dev->space.Flink;
289  while (le2 != &dev->space) {
291 
292  if ((dev2 != dev && dh->size >= max_stripe_size) ||
293  (dev2 == dev && dh->size >= max_stripe_size && dh->size < devdh->size)
294  ) {
295  devdh = dh;
296  dev2 = dev;
297  devusage = usage;
298  }
299 
300  le2 = le2->Flink;
301  }
302  }
303  }
304  }
305 
306  le = le->Flink;
307  }
308 
309  if (!devdh) {
310  // Can't find hole of at least max_stripe_size; look for the largest one we can find
311 
312  if (full_size)
313  return false;
314 
315  le = Vcb->devices.Flink;
316  while (le != &Vcb->devices) {
318  bool skip = false;
319 
320  if (dev->readonly || dev->reloc || (!dev->devobj && !allow_missing)) {
321  le = le->Flink;
322  continue;
323  }
324 
325  // skip this device if it already has a stripe
326  if (i > 0) {
327  for (k = 0; k < i; k++) {
328  if (stripes[k].device == dev) {
329  skip = true;
330  break;
331  }
332  }
333  }
334 
335  if (!skip) {
336  if (!IsListEmpty(&dev->space)) {
337  LIST_ENTRY* le2;
338 
339  le2 = dev->space.Flink;
340  while (le2 != &dev->space) {
342 
343  if (!devdh || devdh->size < dh->size) {
344  devdh = dh;
345  dev2 = dev;
346  }
347 
348  le2 = le2->Flink;
349  }
350  }
351  }
352 
353  le = le->Flink;
354  }
355 
356  if (!devdh)
357  return false;
358  }
359 
360  stripes[i].dh = devdh;
361  stripes[i].device = dev2;
362 
363  return true;
364 }
365 
366 __attribute__((nonnull(1,3)))
367 NTSTATUS alloc_chunk(device_extension* Vcb, uint64_t flags, chunk** pc, bool full_size) {
369  uint64_t max_stripe_size, max_chunk_size, stripe_size, stripe_length, factor;
370  uint64_t total_size = 0, logaddr;
371  uint16_t i, type, num_stripes, sub_stripes, max_stripes, min_stripes, allowed_missing;
372  stripe* stripes = NULL;
373  uint16_t cisize;
374  CHUNK_ITEM_STRIPE* cis;
375  chunk* c = NULL;
376  space* s = NULL;
377  LIST_ENTRY* le;
378 
379  le = Vcb->devices.Flink;
380  while (le != &Vcb->devices) {
382  total_size += dev->devitem.num_bytes;
383 
384  le = le->Flink;
385  }
386 
387  TRACE("total_size = %I64x\n", total_size);
388 
389  // We purposely check for DATA first - mixed blocks have the same size
390  // as DATA ones.
391  if (flags & BLOCK_FLAG_DATA) {
392  max_stripe_size = 0x40000000; // 1 GB
393  max_chunk_size = 10 * max_stripe_size;
394  } else if (flags & BLOCK_FLAG_METADATA) {
395  if (total_size > 0xC80000000) // 50 GB
396  max_stripe_size = 0x40000000; // 1 GB
397  else
398  max_stripe_size = 0x10000000; // 256 MB
399 
400  max_chunk_size = max_stripe_size;
401  } else if (flags & BLOCK_FLAG_SYSTEM) {
402  max_stripe_size = 0x2000000; // 32 MB
403  max_chunk_size = 2 * max_stripe_size;
404  } else {
405  ERR("unknown chunk type\n");
406  return STATUS_INTERNAL_ERROR;
407  }
408 
409  if (flags & BLOCK_FLAG_DUPLICATE) {
410  min_stripes = 2;
411  max_stripes = 2;
412  sub_stripes = 0;
414  allowed_missing = 0;
415  } else if (flags & BLOCK_FLAG_RAID0) {
416  min_stripes = 2;
417  max_stripes = (uint16_t)min(0xffff, Vcb->superblock.num_devices);
418  sub_stripes = 0;
420  allowed_missing = 0;
421  } else if (flags & BLOCK_FLAG_RAID1) {
422  min_stripes = 2;
423  max_stripes = 2;
424  sub_stripes = 1;
426  allowed_missing = 1;
427  } else if (flags & BLOCK_FLAG_RAID10) {
428  min_stripes = 4;
429  max_stripes = (uint16_t)min(0xffff, Vcb->superblock.num_devices);
430  sub_stripes = 2;
432  allowed_missing = 1;
433  } else if (flags & BLOCK_FLAG_RAID5) {
434  min_stripes = 3;
435  max_stripes = (uint16_t)min(0xffff, Vcb->superblock.num_devices);
436  sub_stripes = 1;
438  allowed_missing = 1;
439  } else if (flags & BLOCK_FLAG_RAID6) {
440  min_stripes = 4;
441  max_stripes = 257;
442  sub_stripes = 1;
444  allowed_missing = 2;
445  } else if (flags & BLOCK_FLAG_RAID1C3) {
446  min_stripes = 3;
447  max_stripes = 3;
448  sub_stripes = 1;
450  allowed_missing = 2;
451  } else if (flags & BLOCK_FLAG_RAID1C4) {
452  min_stripes = 4;
453  max_stripes = 4;
454  sub_stripes = 1;
456  allowed_missing = 3;
457  } else { // SINGLE
458  min_stripes = 1;
459  max_stripes = 1;
460  sub_stripes = 1;
461  type = 0;
462  allowed_missing = 0;
463  }
464 
465  if (max_chunk_size > total_size / 10) { // cap at 10%
466  max_chunk_size = total_size / 10;
467  max_stripe_size = max_chunk_size / min_stripes;
468  }
469 
470  if (max_stripe_size > total_size / (10 * min_stripes))
471  max_stripe_size = total_size / (10 * min_stripes);
472 
473  TRACE("would allocate a new chunk of %I64x bytes and stripe %I64x\n", max_chunk_size, max_stripe_size);
474 
475  stripes = ExAllocatePoolWithTag(PagedPool, sizeof(stripe) * max_stripes, ALLOC_TAG);
476  if (!stripes) {
477  ERR("out of memory\n");
479  goto end;
480  }
481 
482  num_stripes = 0;
483 
484  if (type == BLOCK_FLAG_DUPLICATE) {
485  if (!find_new_dup_stripes(Vcb, stripes, max_stripe_size, full_size)) {
487  goto end;
488  } else
489  num_stripes = max_stripes;
490  } else {
491  for (i = 0; i < max_stripes; i++) {
492  if (!find_new_stripe(Vcb, stripes, i, max_stripe_size, false, full_size))
493  break;
494  else
495  num_stripes++;
496  }
497  }
498 
499  if (num_stripes < min_stripes && Vcb->options.allow_degraded && allowed_missing > 0) {
500  uint16_t added_missing = 0;
501 
502  for (i = num_stripes; i < max_stripes; i++) {
503  if (!find_new_stripe(Vcb, stripes, i, max_stripe_size, true, full_size))
504  break;
505  else {
506  added_missing++;
507  if (added_missing >= allowed_missing)
508  break;
509  }
510  }
511 
512  num_stripes += added_missing;
513  }
514 
515  // for RAID10, round down to an even number of stripes
516  if (type == BLOCK_FLAG_RAID10 && (num_stripes % sub_stripes) != 0) {
517  num_stripes -= num_stripes % sub_stripes;
518  }
519 
520  if (num_stripes < min_stripes) {
521  WARN("found %u stripes, needed at least %u\n", num_stripes, min_stripes);
523  goto end;
524  }
525 
527  if (!c) {
528  ERR("out of memory\n");
530  goto end;
531  }
532 
533  c->devices = NULL;
534 
535  cisize = sizeof(CHUNK_ITEM) + (num_stripes * sizeof(CHUNK_ITEM_STRIPE));
536  c->chunk_item = ExAllocatePoolWithTag(NonPagedPool, cisize, ALLOC_TAG);
537  if (!c->chunk_item) {
538  ERR("out of memory\n");
540  goto end;
541  }
542 
543  stripe_length = 0x10000; // FIXME? BTRFS_STRIPE_LEN in kernel
544 
545  if (type == BLOCK_FLAG_DUPLICATE && stripes[1].dh == stripes[0].dh)
546  stripe_size = min(stripes[0].dh->size / 2, max_stripe_size);
547  else {
548  stripe_size = max_stripe_size;
549  for (i = 0; i < num_stripes; i++) {
550  if (stripes[i].dh->size < stripe_size)
551  stripe_size = stripes[i].dh->size;
552  }
553  }
554 
555  if (type == BLOCK_FLAG_RAID0)
556  factor = num_stripes;
557  else if (type == BLOCK_FLAG_RAID10)
558  factor = num_stripes / sub_stripes;
559  else if (type == BLOCK_FLAG_RAID5)
560  factor = num_stripes - 1;
561  else if (type == BLOCK_FLAG_RAID6)
562  factor = num_stripes - 2;
563  else
564  factor = 1; // SINGLE, DUPLICATE, RAID1, RAID1C3, RAID1C4
565 
566  if (stripe_size * factor > max_chunk_size)
567  stripe_size = max_chunk_size / factor;
568 
569  if (stripe_size % stripe_length > 0)
570  stripe_size -= stripe_size % stripe_length;
571 
572  if (stripe_size == 0) {
573  ERR("not enough free space found (stripe_size == 0)\n");
575  goto end;
576  }
577 
578  c->chunk_item->size = stripe_size * factor;
579  c->chunk_item->root_id = Vcb->extent_root->id;
580  c->chunk_item->stripe_length = stripe_length;
581  c->chunk_item->type = flags;
582  c->chunk_item->opt_io_alignment = (uint32_t)c->chunk_item->stripe_length;
583  c->chunk_item->opt_io_width = (uint32_t)c->chunk_item->stripe_length;
584  c->chunk_item->sector_size = stripes[0].device->devitem.minimal_io_size;
585  c->chunk_item->num_stripes = num_stripes;
586  c->chunk_item->sub_stripes = sub_stripes;
587 
588  c->devices = ExAllocatePoolWithTag(NonPagedPool, sizeof(device*) * num_stripes, ALLOC_TAG);
589  if (!c->devices) {
590  ERR("out of memory\n");
592  goto end;
593  }
594 
595  cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
596  for (i = 0; i < num_stripes; i++) {
597  cis[i].dev_id = stripes[i].device->devitem.dev_id;
598 
599  if (type == BLOCK_FLAG_DUPLICATE && i == 1 && stripes[i].dh == stripes[0].dh)
600  cis[i].offset = stripes[0].dh->address + stripe_size;
601  else
602  cis[i].offset = stripes[i].dh->address;
603 
604  cis[i].dev_uuid = stripes[i].device->devitem.device_uuid;
605 
606  c->devices[i] = stripes[i].device;
607  }
608 
609  logaddr = find_new_chunk_address(Vcb, c->chunk_item->size);
610 
611  Vcb->superblock.chunk_root_generation = Vcb->superblock.generation;
612 
613  c->size = cisize;
614  c->offset = logaddr;
615  c->used = c->oldused = 0;
616  c->cache = c->old_cache = NULL;
617  c->readonly = false;
618  c->reloc = false;
619  c->last_alloc_set = false;
620  c->last_stripe = 0;
621  c->cache_loaded = true;
622  c->changed = false;
623  c->space_changed = false;
624  c->balance_num = 0;
625 
626  InitializeListHead(&c->space);
627  InitializeListHead(&c->space_size);
628  InitializeListHead(&c->deleting);
629  InitializeListHead(&c->changed_extents);
630 
631  InitializeListHead(&c->range_locks);
632  ExInitializeResourceLite(&c->range_locks_lock);
633  KeInitializeEvent(&c->range_locks_event, NotificationEvent, false);
634 
635  InitializeListHead(&c->partial_stripes);
636  ExInitializeResourceLite(&c->partial_stripes_lock);
637 
638  ExInitializeResourceLite(&c->lock);
639  ExInitializeResourceLite(&c->changed_extents_lock);
640 
642  if (!s) {
643  ERR("out of memory\n");
645  goto end;
646  }
647 
648  s->address = c->offset;
649  s->size = c->chunk_item->size;
650  InsertTailList(&c->space, &s->list_entry);
651  InsertTailList(&c->space_size, &s->list_entry_size);
652 
654 
655  for (i = 0; i < num_stripes; i++) {
656  stripes[i].device->devitem.bytes_used += stripe_size;
657 
658  space_list_subtract2(&stripes[i].device->space, NULL, cis[i].offset, stripe_size, NULL, NULL);
659  }
660 
662 
664  Vcb->superblock.incompat_flags |= BTRFS_INCOMPAT_FLAGS_RAID56;
665 
666 end:
667  if (stripes)
668  ExFreePool(stripes);
669 
670  if (!NT_SUCCESS(Status)) {
671  if (c) {
672  if (c->devices)
673  ExFreePool(c->devices);
674 
675  if (c->chunk_item)
676  ExFreePool(c->chunk_item);
677 
678  ExFreePool(c);
679  }
680 
681  if (s) ExFreePool(s);
682  } else {
683  bool done = false;
684 
685  le = Vcb->chunks.Flink;
686  while (le != &Vcb->chunks) {
688 
689  if (c2->offset > c->offset) {
690  InsertHeadList(le->Blink, &c->list_entry);
691  done = true;
692  break;
693  }
694 
695  le = le->Flink;
696  }
697 
698  if (!done)
699  InsertTailList(&Vcb->chunks, &c->list_entry);
700 
701  c->created = true;
702  c->changed = true;
703  c->space_changed = true;
704  c->list_entry_balance.Flink = NULL;
705 
706  *pc = c;
707  }
708 
709  return Status;
710 }
711 
712 __attribute__((nonnull(1,3,5,8)))
713 static NTSTATUS prepare_raid0_write(_Pre_satisfies_(_Curr_->chunk_item->num_stripes>0) _In_ chunk* c, _In_ uint64_t address, _In_reads_bytes_(length) void* data,
715  uint64_t startoff, endoff;
716  uint16_t startoffstripe, endoffstripe, stripenum;
717  uint64_t pos, *stripeoff;
718  uint32_t i;
719  bool file_write = Irp && Irp->MdlAddress && (Irp->MdlAddress->ByteOffset == 0);
720  PMDL master_mdl;
721  PFN_NUMBER* pfns;
722 
723  stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(uint64_t) * c->chunk_item->num_stripes, ALLOC_TAG);
724  if (!stripeoff) {
725  ERR("out of memory\n");
727  }
728 
729  get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &startoff, &startoffstripe);
730  get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &endoff, &endoffstripe);
731 
732  if (file_write) {
733  master_mdl = Irp->MdlAddress;
734 
735  pfns = (PFN_NUMBER*)(Irp->MdlAddress + 1);
736  pfns = &pfns[irp_offset >> PAGE_SHIFT];
737  } else if (((ULONG_PTR)data % PAGE_SIZE) != 0) {
739  if (!wtc->scratch) {
740  ERR("out of memory\n");
742  }
743 
744  RtlCopyMemory(wtc->scratch, data, length);
745 
746  master_mdl = IoAllocateMdl(wtc->scratch, length, false, false, NULL);
747  if (!master_mdl) {
748  ERR("out of memory\n");
750  }
751 
752  MmBuildMdlForNonPagedPool(master_mdl);
753 
754  wtc->mdl = master_mdl;
755 
756  pfns = (PFN_NUMBER*)(master_mdl + 1);
757  } else {
759 
760  master_mdl = IoAllocateMdl(data, length, false, false, NULL);
761  if (!master_mdl) {
762  ERR("out of memory\n");
764  }
765 
766  _SEH2_TRY {
770  } _SEH2_END;
771 
772  if (!NT_SUCCESS(Status)) {
773  ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
774  IoFreeMdl(master_mdl);
775  return Status;
776  }
777 
778  wtc->mdl = master_mdl;
779 
780  pfns = (PFN_NUMBER*)(master_mdl + 1);
781  }
782 
783  for (i = 0; i < c->chunk_item->num_stripes; i++) {
784  if (startoffstripe > i)
785  stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
786  else if (startoffstripe == i)
787  stripes[i].start = startoff;
788  else
789  stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length);
790 
791  if (endoffstripe > i)
792  stripes[i].end = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
793  else if (endoffstripe == i)
794  stripes[i].end = endoff + 1;
795  else
796  stripes[i].end = endoff - (endoff % c->chunk_item->stripe_length);
797 
798  if (stripes[i].start != stripes[i].end) {
799  stripes[i].mdl = IoAllocateMdl(NULL, (ULONG)(stripes[i].end - stripes[i].start), false, false, NULL);
800  if (!stripes[i].mdl) {
801  ERR("IoAllocateMdl failed\n");
802  ExFreePool(stripeoff);
804  }
805  }
806  }
807 
808  pos = 0;
809  RtlZeroMemory(stripeoff, sizeof(uint64_t) * c->chunk_item->num_stripes);
810 
811  stripenum = startoffstripe;
812 
813  while (pos < length) {
814  PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(stripes[stripenum].mdl + 1);
815 
816  if (pos == 0) {
817  uint32_t writelen = (uint32_t)min(stripes[stripenum].end - stripes[stripenum].start,
818  c->chunk_item->stripe_length - (stripes[stripenum].start % c->chunk_item->stripe_length));
819 
820  RtlCopyMemory(stripe_pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
821 
822  stripeoff[stripenum] += writelen;
823  pos += writelen;
824  } else if (length - pos < c->chunk_item->stripe_length) {
825  RtlCopyMemory(&stripe_pfns[stripeoff[stripenum] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)((length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
826  break;
827  } else {
828  RtlCopyMemory(&stripe_pfns[stripeoff[stripenum] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
829 
830  stripeoff[stripenum] += c->chunk_item->stripe_length;
831  pos += c->chunk_item->stripe_length;
832  }
833 
834  stripenum = (stripenum + 1) % c->chunk_item->num_stripes;
835  }
836 
837  ExFreePool(stripeoff);
838 
839  return STATUS_SUCCESS;
840 }
841 
842 __attribute__((nonnull(1,3,5,8)))
843 static NTSTATUS prepare_raid10_write(_Pre_satisfies_(_Curr_->chunk_item->sub_stripes>0&&_Curr_->chunk_item->num_stripes>=_Curr_->chunk_item->sub_stripes) _In_ chunk* c,
845  _In_ PIRP Irp, _In_ uint64_t irp_offset, _In_ write_data_context* wtc) {
846  uint64_t startoff, endoff;
847  uint16_t startoffstripe, endoffstripe, stripenum;
848  uint64_t pos, *stripeoff;
849  uint32_t i;
850  bool file_write = Irp && Irp->MdlAddress && (Irp->MdlAddress->ByteOffset == 0);
851  PMDL master_mdl;
852  PFN_NUMBER* pfns;
853 
854  get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes / c->chunk_item->sub_stripes, &startoff, &startoffstripe);
855  get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes / c->chunk_item->sub_stripes, &endoff, &endoffstripe);
856 
857  stripenum = startoffstripe;
858  startoffstripe *= c->chunk_item->sub_stripes;
859  endoffstripe *= c->chunk_item->sub_stripes;
860 
861  if (file_write) {
862  master_mdl = Irp->MdlAddress;
863 
864  pfns = (PFN_NUMBER*)(Irp->MdlAddress + 1);
865  pfns = &pfns[irp_offset >> PAGE_SHIFT];
866  } else if (((ULONG_PTR)data % PAGE_SIZE) != 0) {
868  if (!wtc->scratch) {
869  ERR("out of memory\n");
871  }
872 
873  RtlCopyMemory(wtc->scratch, data, length);
874 
875  master_mdl = IoAllocateMdl(wtc->scratch, length, false, false, NULL);
876  if (!master_mdl) {
877  ERR("out of memory\n");
879  }
880 
881  MmBuildMdlForNonPagedPool(master_mdl);
882 
883  wtc->mdl = master_mdl;
884 
885  pfns = (PFN_NUMBER*)(master_mdl + 1);
886  } else {
888 
889  master_mdl = IoAllocateMdl(data, length, false, false, NULL);
890  if (!master_mdl) {
891  ERR("out of memory\n");
893  }
894 
895  _SEH2_TRY {
899  } _SEH2_END;
900 
901  if (!NT_SUCCESS(Status)) {
902  ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
903  IoFreeMdl(master_mdl);
904  return Status;
905  }
906 
907  wtc->mdl = master_mdl;
908 
909  pfns = (PFN_NUMBER*)(master_mdl + 1);
910  }
911 
912  for (i = 0; i < c->chunk_item->num_stripes; i += c->chunk_item->sub_stripes) {
913  uint16_t j;
914 
915  if (startoffstripe > i)
916  stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
917  else if (startoffstripe == i)
918  stripes[i].start = startoff;
919  else
920  stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length);
921 
922  if (endoffstripe > i)
923  stripes[i].end = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
924  else if (endoffstripe == i)
925  stripes[i].end = endoff + 1;
926  else
927  stripes[i].end = endoff - (endoff % c->chunk_item->stripe_length);
928 
929  stripes[i].mdl = IoAllocateMdl(NULL, (ULONG)(stripes[i].end - stripes[i].start), false, false, NULL);
930  if (!stripes[i].mdl) {
931  ERR("IoAllocateMdl failed\n");
933  }
934 
935  for (j = 1; j < c->chunk_item->sub_stripes; j++) {
936  stripes[i+j].start = stripes[i].start;
937  stripes[i+j].end = stripes[i].end;
938  stripes[i+j].data = stripes[i].data;
939  stripes[i+j].mdl = stripes[i].mdl;
940  }
941  }
942 
943  pos = 0;
944 
945  stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(uint64_t) * c->chunk_item->num_stripes / c->chunk_item->sub_stripes, ALLOC_TAG);
946  if (!stripeoff) {
947  ERR("out of memory\n");
949  }
950 
951  RtlZeroMemory(stripeoff, sizeof(uint64_t) * c->chunk_item->num_stripes / c->chunk_item->sub_stripes);
952 
953  while (pos < length) {
954  PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(stripes[stripenum * c->chunk_item->sub_stripes].mdl + 1);
955 
956  if (pos == 0) {
957  uint32_t writelen = (uint32_t)min(stripes[stripenum * c->chunk_item->sub_stripes].end - stripes[stripenum * c->chunk_item->sub_stripes].start,
958  c->chunk_item->stripe_length - (stripes[stripenum * c->chunk_item->sub_stripes].start % c->chunk_item->stripe_length));
959 
960  RtlCopyMemory(stripe_pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
961 
962  stripeoff[stripenum] += writelen;
963  pos += writelen;
964  } else if (length - pos < c->chunk_item->stripe_length) {
965  RtlCopyMemory(&stripe_pfns[stripeoff[stripenum] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)((length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
966  break;
967  } else {
968  RtlCopyMemory(&stripe_pfns[stripeoff[stripenum] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
969 
970  stripeoff[stripenum] += c->chunk_item->stripe_length;
971  pos += c->chunk_item->stripe_length;
972  }
973 
974  stripenum = (stripenum + 1) % (c->chunk_item->num_stripes / c->chunk_item->sub_stripes);
975  }
976 
977  ExFreePool(stripeoff);
978 
979  return STATUS_SUCCESS;
980 }
981 
982 __attribute__((nonnull(1,2,5)))
983 static NTSTATUS add_partial_stripe(device_extension* Vcb, chunk* c, uint64_t address, uint32_t length, void* data) {
985  LIST_ENTRY* le;
986  partial_stripe* ps;
987  uint64_t stripe_addr;
988  uint16_t num_data_stripes;
989 
990  num_data_stripes = c->chunk_item->num_stripes - (c->chunk_item->type & BLOCK_FLAG_RAID5 ? 1 : 2);
991  stripe_addr = address - ((address - c->offset) % (num_data_stripes * c->chunk_item->stripe_length));
992 
993  ExAcquireResourceExclusiveLite(&c->partial_stripes_lock, true);
994 
995  le = c->partial_stripes.Flink;
996  while (le != &c->partial_stripes) {
998 
999  if (ps->address == stripe_addr) {
1000  // update existing entry
1001 
1002  RtlCopyMemory(ps->data + address - stripe_addr, data, length);
1003  RtlClearBits(&ps->bmp, (ULONG)((address - stripe_addr) >> Vcb->sector_shift), length >> Vcb->sector_shift);
1004 
1005  // if now filled, flush
1006  if (RtlAreBitsClear(&ps->bmp, 0, (ULONG)((num_data_stripes * c->chunk_item->stripe_length) >> Vcb->sector_shift))) {
1007  Status = flush_partial_stripe(Vcb, c, ps);
1008  if (!NT_SUCCESS(Status)) {
1009  ERR("flush_partial_stripe returned %08lx\n", Status);
1010  goto end;
1011  }
1012 
1014 
1015  if (ps->bmparr)
1016  ExFreePool(ps->bmparr);
1017 
1018  ExFreePool(ps);
1019  }
1020 
1022  goto end;
1023  } else if (ps->address > stripe_addr)
1024  break;
1025 
1026  le = le->Flink;
1027  }
1028 
1029  // add new entry
1030 
1031  ps = ExAllocatePoolWithTag(NonPagedPool, offsetof(partial_stripe, data[0]) + (ULONG)(num_data_stripes * c->chunk_item->stripe_length), ALLOC_TAG);
1032  if (!ps) {
1033  ERR("out of memory\n");
1035  goto end;
1036  }
1037 
1038  ps->bmplen = (ULONG)(num_data_stripes * c->chunk_item->stripe_length) >> Vcb->sector_shift;
1039 
1040  ps->address = stripe_addr;
1041  ps->bmparr = ExAllocatePoolWithTag(NonPagedPool, (size_t)sector_align(((ps->bmplen / 8) + 1), sizeof(ULONG)), ALLOC_TAG);
1042  if (!ps->bmparr) {
1043  ERR("out of memory\n");
1044  ExFreePool(ps);
1046  goto end;
1047  }
1048 
1049  RtlInitializeBitMap(&ps->bmp, ps->bmparr, ps->bmplen);
1050  RtlSetAllBits(&ps->bmp);
1051 
1052  RtlCopyMemory(ps->data + address - stripe_addr, data, length);
1053  RtlClearBits(&ps->bmp, (ULONG)((address - stripe_addr) >> Vcb->sector_shift), length >> Vcb->sector_shift);
1054 
1055  InsertHeadList(le->Blink, &ps->list_entry);
1056 
1058 
1059 end:
1060  ExReleaseResourceLite(&c->partial_stripes_lock);
1061 
1062  return Status;
1063 }
1064 
1065 typedef struct {
1068 } log_stripe;
1069 
1070 __attribute__((nonnull(1,2,4,6,10)))
1071 static NTSTATUS prepare_raid5_write(device_extension* Vcb, chunk* c, uint64_t address, void* data, uint32_t length, write_stripe* stripes, PIRP Irp,
1072  uint64_t irp_offset, ULONG priority, write_data_context* wtc) {
1073  uint64_t startoff, endoff, parity_start, parity_end;
1074  uint16_t startoffstripe, endoffstripe, parity, num_data_stripes = c->chunk_item->num_stripes - 1;
1075  uint64_t pos, parity_pos, *stripeoff = NULL;
1076  uint32_t i;
1077  bool file_write = Irp && Irp->MdlAddress && (Irp->MdlAddress->ByteOffset == 0);
1078  PMDL master_mdl;
1079  NTSTATUS Status;
1080  PFN_NUMBER *pfns, *parity_pfns;
1081  log_stripe* log_stripes = NULL;
1082 
1083  if ((address + length - c->offset) % (num_data_stripes * c->chunk_item->stripe_length) > 0) {
1084  uint64_t delta = (address + length - c->offset) % (num_data_stripes * c->chunk_item->stripe_length);
1085 
1086  delta = min(length, delta);
1087  Status = add_partial_stripe(Vcb, c, address + length - delta, (uint32_t)delta, (uint8_t*)data + length - delta);
1088  if (!NT_SUCCESS(Status)) {
1089  ERR("add_partial_stripe returned %08lx\n", Status);
1090  goto exit;
1091  }
1092 
1093  length -= (uint32_t)delta;
1094  }
1095 
1096  if (length > 0 && (address - c->offset) % (num_data_stripes * c->chunk_item->stripe_length) > 0) {
1097  uint64_t delta = (num_data_stripes * c->chunk_item->stripe_length) - ((address - c->offset) % (num_data_stripes * c->chunk_item->stripe_length));
1098 
1099  Status = add_partial_stripe(Vcb, c, address, (uint32_t)delta, data);
1100  if (!NT_SUCCESS(Status)) {
1101  ERR("add_partial_stripe returned %08lx\n", Status);
1102  goto exit;
1103  }
1104 
1105  address += delta;
1106  length -= (uint32_t)delta;
1107  irp_offset += delta;
1108  data = (uint8_t*)data + delta;
1109  }
1110 
1111  if (length == 0) {
1113  goto exit;
1114  }
1115 
1116  get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, num_data_stripes, &startoff, &startoffstripe);
1117  get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, num_data_stripes, &endoff, &endoffstripe);
1118 
1119  pos = 0;
1120  while (pos < length) {
1121  parity = (((address - c->offset + pos) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1122 
1123  if (pos == 0) {
1124  uint16_t stripe = (parity + startoffstripe + 1) % c->chunk_item->num_stripes;
1125  ULONG skip, writelen;
1126 
1127  i = startoffstripe;
1128  while (stripe != parity) {
1129  if (i == startoffstripe) {
1130  writelen = (ULONG)min(length, c->chunk_item->stripe_length - (startoff % c->chunk_item->stripe_length));
1131 
1132  stripes[stripe].start = startoff;
1133  stripes[stripe].end = startoff + writelen;
1134 
1135  pos += writelen;
1136 
1137  if (pos == length)
1138  break;
1139  } else {
1140  writelen = (ULONG)min(length - pos, c->chunk_item->stripe_length);
1141 
1142  stripes[stripe].start = startoff - (startoff % c->chunk_item->stripe_length);
1143  stripes[stripe].end = stripes[stripe].start + writelen;
1144 
1145  pos += writelen;
1146 
1147  if (pos == length)
1148  break;
1149  }
1150 
1151  i++;
1152  stripe = (stripe + 1) % c->chunk_item->num_stripes;
1153  }
1154 
1155  if (pos == length)
1156  break;
1157 
1158  for (i = 0; i < startoffstripe; i++) {
1159  stripe = (parity + i + 1) % c->chunk_item->num_stripes;
1160 
1161  stripes[stripe].start = stripes[stripe].end = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1162  }
1163 
1164  stripes[parity].start = stripes[parity].end = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1165 
1166  if (length - pos > c->chunk_item->num_stripes * num_data_stripes * c->chunk_item->stripe_length) {
1167  skip = (ULONG)(((length - pos) / (c->chunk_item->num_stripes * num_data_stripes * c->chunk_item->stripe_length)) - 1);
1168 
1169  for (i = 0; i < c->chunk_item->num_stripes; i++) {
1170  stripes[i].end += skip * c->chunk_item->num_stripes * c->chunk_item->stripe_length;
1171  }
1172 
1173  pos += skip * num_data_stripes * c->chunk_item->num_stripes * c->chunk_item->stripe_length;
1174  }
1175  } else if (length - pos >= c->chunk_item->stripe_length * num_data_stripes) {
1176  for (i = 0; i < c->chunk_item->num_stripes; i++) {
1177  stripes[i].end += c->chunk_item->stripe_length;
1178  }
1179 
1180  pos += c->chunk_item->stripe_length * num_data_stripes;
1181  } else {
1182  uint16_t stripe = (parity + 1) % c->chunk_item->num_stripes;
1183 
1184  i = 0;
1185  while (stripe != parity) {
1186  if (endoffstripe == i) {
1187  stripes[stripe].end = endoff + 1;
1188  break;
1189  } else if (endoffstripe > i)
1190  stripes[stripe].end = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1191 
1192  i++;
1193  stripe = (stripe + 1) % c->chunk_item->num_stripes;
1194  }
1195 
1196  break;
1197  }
1198  }
1199 
1200  parity_start = 0xffffffffffffffff;
1201  parity_end = 0;
1202 
1203  for (i = 0; i < c->chunk_item->num_stripes; i++) {
1204  if (stripes[i].start != 0 || stripes[i].end != 0) {
1205  parity_start = min(stripes[i].start, parity_start);
1206  parity_end = max(stripes[i].end, parity_end);
1207  }
1208  }
1209 
1210  if (parity_end == parity_start) {
1212  goto exit;
1213  }
1214 
1215  parity = (((address - c->offset) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1216  stripes[parity].start = parity_start;
1217 
1218  parity = (((address - c->offset + length - 1) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1219  stripes[parity].end = parity_end;
1220 
1221  log_stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(log_stripe) * num_data_stripes, ALLOC_TAG);
1222  if (!log_stripes) {
1223  ERR("out of memory\n");
1225  goto exit;
1226  }
1227 
1228  RtlZeroMemory(log_stripes, sizeof(log_stripe) * num_data_stripes);
1229 
1230  for (i = 0; i < num_data_stripes; i++) {
1231  log_stripes[i].mdl = IoAllocateMdl(NULL, (ULONG)(parity_end - parity_start), false, false, NULL);
1232  if (!log_stripes[i].mdl) {
1233  ERR("out of memory\n");
1235  goto exit;
1236  }
1237 
1238  log_stripes[i].mdl->MdlFlags |= MDL_PARTIAL;
1239  log_stripes[i].pfns = (PFN_NUMBER*)(log_stripes[i].mdl + 1);
1240  }
1241 
1242  wtc->parity1 = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(parity_end - parity_start), ALLOC_TAG);
1243  if (!wtc->parity1) {
1244  ERR("out of memory\n");
1246  goto exit;
1247  }
1248 
1249  wtc->parity1_mdl = IoAllocateMdl(wtc->parity1, (ULONG)(parity_end - parity_start), false, false, NULL);
1250  if (!wtc->parity1_mdl) {
1251  ERR("out of memory\n");
1253  goto exit;
1254  }
1255 
1256  MmBuildMdlForNonPagedPool(wtc->parity1_mdl);
1257 
1258  if (file_write)
1259  master_mdl = Irp->MdlAddress;
1260  else if (((ULONG_PTR)data % PAGE_SIZE) != 0) {
1262  if (!wtc->scratch) {
1263  ERR("out of memory\n");
1265  goto exit;
1266  }
1267 
1268  RtlCopyMemory(wtc->scratch, data, length);
1269 
1270  master_mdl = IoAllocateMdl(wtc->scratch, length, false, false, NULL);
1271  if (!master_mdl) {
1272  ERR("out of memory\n");
1274  goto exit;
1275  }
1276 
1277  MmBuildMdlForNonPagedPool(master_mdl);
1278 
1279  wtc->mdl = master_mdl;
1280  } else {
1281  master_mdl = IoAllocateMdl(data, length, false, false, NULL);
1282  if (!master_mdl) {
1283  ERR("out of memory\n");
1285  goto exit;
1286  }
1287 
1289 
1290  _SEH2_TRY {
1294  } _SEH2_END;
1295 
1296  if (!NT_SUCCESS(Status)) {
1297  ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
1298  IoFreeMdl(master_mdl);
1299  return Status;
1300  }
1301 
1302  wtc->mdl = master_mdl;
1303  }
1304 
1305  pfns = (PFN_NUMBER*)(master_mdl + 1);
1306  parity_pfns = (PFN_NUMBER*)(wtc->parity1_mdl + 1);
1307 
1308  if (file_write)
1309  pfns = &pfns[irp_offset >> PAGE_SHIFT];
1310 
1311  for (i = 0; i < c->chunk_item->num_stripes; i++) {
1312  if (stripes[i].start != stripes[i].end) {
1313  stripes[i].mdl = IoAllocateMdl((uint8_t*)MmGetMdlVirtualAddress(master_mdl) + irp_offset, (ULONG)(stripes[i].end - stripes[i].start), false, false, NULL);
1314  if (!stripes[i].mdl) {
1315  ERR("IoAllocateMdl failed\n");
1317  goto exit;
1318  }
1319  }
1320  }
1321 
1322  stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(uint64_t) * c->chunk_item->num_stripes, ALLOC_TAG);
1323  if (!stripeoff) {
1324  ERR("out of memory\n");
1326  goto exit;
1327  }
1328 
1329  RtlZeroMemory(stripeoff, sizeof(uint64_t) * c->chunk_item->num_stripes);
1330 
1331  pos = 0;
1332  parity_pos = 0;
1333 
1334  while (pos < length) {
1335  PFN_NUMBER* stripe_pfns;
1336 
1337  parity = (((address - c->offset + pos) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1338 
1339  if (pos == 0) {
1340  uint16_t stripe = (parity + startoffstripe + 1) % c->chunk_item->num_stripes;
1341  uint32_t writelen = (uint32_t)min(length - pos, min(stripes[stripe].end - stripes[stripe].start,
1342  c->chunk_item->stripe_length - (stripes[stripe].start % c->chunk_item->stripe_length)));
1343  uint32_t maxwritelen = writelen;
1344 
1345  stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1346 
1347  RtlCopyMemory(stripe_pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1348 
1349  RtlCopyMemory(log_stripes[startoffstripe].pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1350  log_stripes[startoffstripe].pfns += writelen >> PAGE_SHIFT;
1351 
1352  stripeoff[stripe] = writelen;
1353  pos += writelen;
1354 
1355  stripe = (stripe + 1) % c->chunk_item->num_stripes;
1356  i = startoffstripe + 1;
1357 
1358  while (stripe != parity) {
1359  stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1360  writelen = (uint32_t)min(length - pos, min(stripes[stripe].end - stripes[stripe].start, c->chunk_item->stripe_length));
1361 
1362  if (writelen == 0)
1363  break;
1364 
1365  if (writelen > maxwritelen)
1366  maxwritelen = writelen;
1367 
1368  RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1369 
1370  RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1371  log_stripes[i].pfns += writelen >> PAGE_SHIFT;
1372 
1373  stripeoff[stripe] = writelen;
1374  pos += writelen;
1375 
1376  stripe = (stripe + 1) % c->chunk_item->num_stripes;
1377  i++;
1378  }
1379 
1380  stripe_pfns = (PFN_NUMBER*)(stripes[parity].mdl + 1);
1381 
1382  RtlCopyMemory(stripe_pfns, parity_pfns, maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1383  stripeoff[parity] = maxwritelen;
1384  parity_pos = maxwritelen;
1385  } else if (length - pos >= c->chunk_item->stripe_length * num_data_stripes) {
1386  uint16_t stripe = (parity + 1) % c->chunk_item->num_stripes;
1387 
1388  i = 0;
1389  while (stripe != parity) {
1390  stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1391 
1392  RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1393 
1394  RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1395  log_stripes[i].pfns += c->chunk_item->stripe_length >> PAGE_SHIFT;
1396 
1397  stripeoff[stripe] += c->chunk_item->stripe_length;
1398  pos += c->chunk_item->stripe_length;
1399 
1400  stripe = (stripe + 1) % c->chunk_item->num_stripes;
1401  i++;
1402  }
1403 
1404  stripe_pfns = (PFN_NUMBER*)(stripes[parity].mdl + 1);
1405 
1406  RtlCopyMemory(&stripe_pfns[stripeoff[parity] >> PAGE_SHIFT], &parity_pfns[parity_pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1407  stripeoff[parity] += c->chunk_item->stripe_length;
1408  parity_pos += c->chunk_item->stripe_length;
1409  } else {
1410  uint16_t stripe = (parity + 1) % c->chunk_item->num_stripes;
1411  uint32_t writelen, maxwritelen = 0;
1412 
1413  i = 0;
1414  while (pos < length) {
1415  stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1416  writelen = (uint32_t)min(length - pos, min(stripes[stripe].end - stripes[stripe].start, c->chunk_item->stripe_length));
1417 
1418  if (writelen == 0)
1419  break;
1420 
1421  if (writelen > maxwritelen)
1422  maxwritelen = writelen;
1423 
1424  RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1425 
1426  RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1427  log_stripes[i].pfns += writelen >> PAGE_SHIFT;
1428 
1429  stripeoff[stripe] += writelen;
1430  pos += writelen;
1431 
1432  stripe = (stripe + 1) % c->chunk_item->num_stripes;
1433  i++;
1434  }
1435 
1436  stripe_pfns = (PFN_NUMBER*)(stripes[parity].mdl + 1);
1437 
1438  RtlCopyMemory(&stripe_pfns[stripeoff[parity] >> PAGE_SHIFT], &parity_pfns[parity_pos >> PAGE_SHIFT], maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1439  }
1440  }
1441 
1442  for (i = 0; i < num_data_stripes; i++) {
1444 
1445  if (i == 0)
1446  RtlCopyMemory(wtc->parity1, ss, (uint32_t)(parity_end - parity_start));
1447  else
1448  do_xor(wtc->parity1, ss, (uint32_t)(parity_end - parity_start));
1449  }
1450 
1452 
1453 exit:
1454  if (log_stripes) {
1455  for (i = 0; i < num_data_stripes; i++) {
1456  if (log_stripes[i].mdl)
1457  IoFreeMdl(log_stripes[i].mdl);
1458  }
1459 
1460  ExFreePool(log_stripes);
1461  }
1462 
1463  if (stripeoff)
1464  ExFreePool(stripeoff);
1465 
1466  return Status;
1467 }
1468 
1469 __attribute__((nonnull(1,2,4,6,10)))
1470 static NTSTATUS prepare_raid6_write(device_extension* Vcb, chunk* c, uint64_t address, void* data, uint32_t length, write_stripe* stripes, PIRP Irp,
1471  uint64_t irp_offset, ULONG priority, write_data_context* wtc) {
1472  uint64_t startoff, endoff, parity_start, parity_end;
1473  uint16_t startoffstripe, endoffstripe, parity1, num_data_stripes = c->chunk_item->num_stripes - 2;
1474  uint64_t pos, parity_pos, *stripeoff = NULL;
1475  uint32_t i;
1476  bool file_write = Irp && Irp->MdlAddress && (Irp->MdlAddress->ByteOffset == 0);
1477  PMDL master_mdl;
1478  NTSTATUS Status;
1479  PFN_NUMBER *pfns, *parity1_pfns, *parity2_pfns;
1480  log_stripe* log_stripes = NULL;
1481 
1482  if ((address + length - c->offset) % (num_data_stripes * c->chunk_item->stripe_length) > 0) {
1483  uint64_t delta = (address + length - c->offset) % (num_data_stripes * c->chunk_item->stripe_length);
1484 
1485  delta = min(length, delta);
1486  Status = add_partial_stripe(Vcb, c, address + length - delta, (uint32_t)delta, (uint8_t*)data + length - delta);
1487  if (!NT_SUCCESS(Status)) {
1488  ERR("add_partial_stripe returned %08lx\n", Status);
1489  goto exit;
1490  }
1491 
1492  length -= (uint32_t)delta;
1493  }
1494 
1495  if (length > 0 && (address - c->offset) % (num_data_stripes * c->chunk_item->stripe_length) > 0) {
1496  uint64_t delta = (num_data_stripes * c->chunk_item->stripe_length) - ((address - c->offset) % (num_data_stripes * c->chunk_item->stripe_length));
1497 
1498  Status = add_partial_stripe(Vcb, c, address, (uint32_t)delta, data);
1499  if (!NT_SUCCESS(Status)) {
1500  ERR("add_partial_stripe returned %08lx\n", Status);
1501  goto exit;
1502  }
1503 
1504  address += delta;
1505  length -= (uint32_t)delta;
1506  irp_offset += delta;
1507  data = (uint8_t*)data + delta;
1508  }
1509 
1510  if (length == 0) {
1512  goto exit;
1513  }
1514 
1515  get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, num_data_stripes, &startoff, &startoffstripe);
1516  get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, num_data_stripes, &endoff, &endoffstripe);
1517 
1518  pos = 0;
1519  while (pos < length) {
1520  parity1 = (((address - c->offset + pos) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1521 
1522  if (pos == 0) {
1523  uint16_t stripe = (parity1 + startoffstripe + 2) % c->chunk_item->num_stripes;
1524  uint16_t parity2 = (parity1 + 1) % c->chunk_item->num_stripes;
1525  ULONG skip, writelen;
1526 
1527  i = startoffstripe;
1528  while (stripe != parity1) {
1529  if (i == startoffstripe) {
1530  writelen = (ULONG)min(length, c->chunk_item->stripe_length - (startoff % c->chunk_item->stripe_length));
1531 
1532  stripes[stripe].start = startoff;
1533  stripes[stripe].end = startoff + writelen;
1534 
1535  pos += writelen;
1536 
1537  if (pos == length)
1538  break;
1539  } else {
1540  writelen = (ULONG)min(length - pos, c->chunk_item->stripe_length);
1541 
1542  stripes[stripe].start = startoff - (startoff % c->chunk_item->stripe_length);
1543  stripes[stripe].end = stripes[stripe].start + writelen;
1544 
1545  pos += writelen;
1546 
1547  if (pos == length)
1548  break;
1549  }
1550 
1551  i++;
1552  stripe = (stripe + 1) % c->chunk_item->num_stripes;
1553  }
1554 
1555  if (pos == length)
1556  break;
1557 
1558  for (i = 0; i < startoffstripe; i++) {
1559  stripe = (parity1 + i + 2) % c->chunk_item->num_stripes;
1560 
1561  stripes[stripe].start = stripes[stripe].end = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1562  }
1563 
1564  stripes[parity1].start = stripes[parity1].end = stripes[parity2].start = stripes[parity2].end =
1565  startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1566 
1567  if (length - pos > c->chunk_item->num_stripes * num_data_stripes * c->chunk_item->stripe_length) {
1568  skip = (ULONG)(((length - pos) / (c->chunk_item->num_stripes * num_data_stripes * c->chunk_item->stripe_length)) - 1);
1569 
1570  for (i = 0; i < c->chunk_item->num_stripes; i++) {
1571  stripes[i].end += skip * c->chunk_item->num_stripes * c->chunk_item->stripe_length;
1572  }
1573 
1574  pos += skip * num_data_stripes * c->chunk_item->num_stripes * c->chunk_item->stripe_length;
1575  }
1576  } else if (length - pos >= c->chunk_item->stripe_length * num_data_stripes) {
1577  for (i = 0; i < c->chunk_item->num_stripes; i++) {
1578  stripes[i].end += c->chunk_item->stripe_length;
1579  }
1580 
1581  pos += c->chunk_item->stripe_length * num_data_stripes;
1582  } else {
1583  uint16_t stripe = (parity1 + 2) % c->chunk_item->num_stripes;
1584 
1585  i = 0;
1586  while (stripe != parity1) {
1587  if (endoffstripe == i) {
1588  stripes[stripe].end = endoff + 1;
1589  break;
1590  } else if (endoffstripe > i)
1591  stripes[stripe].end = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
1592 
1593  i++;
1594  stripe = (stripe + 1) % c->chunk_item->num_stripes;
1595  }
1596 
1597  break;
1598  }
1599  }
1600 
1601  parity_start = 0xffffffffffffffff;
1602  parity_end = 0;
1603 
1604  for (i = 0; i < c->chunk_item->num_stripes; i++) {
1605  if (stripes[i].start != 0 || stripes[i].end != 0) {
1606  parity_start = min(stripes[i].start, parity_start);
1607  parity_end = max(stripes[i].end, parity_end);
1608  }
1609  }
1610 
1611  if (parity_end == parity_start) {
1613  goto exit;
1614  }
1615 
1616  parity1 = (((address - c->offset) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1617  stripes[parity1].start = stripes[(parity1 + 1) % c->chunk_item->num_stripes].start = parity_start;
1618 
1619  parity1 = (((address - c->offset + length - 1) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1620  stripes[parity1].end = stripes[(parity1 + 1) % c->chunk_item->num_stripes].end = parity_end;
1621 
1622  log_stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(log_stripe) * num_data_stripes, ALLOC_TAG);
1623  if (!log_stripes) {
1624  ERR("out of memory\n");
1626  goto exit;
1627  }
1628 
1629  RtlZeroMemory(log_stripes, sizeof(log_stripe) * num_data_stripes);
1630 
1631  for (i = 0; i < num_data_stripes; i++) {
1632  log_stripes[i].mdl = IoAllocateMdl(NULL, (ULONG)(parity_end - parity_start), false, false, NULL);
1633  if (!log_stripes[i].mdl) {
1634  ERR("out of memory\n");
1636  goto exit;
1637  }
1638 
1639  log_stripes[i].mdl->MdlFlags |= MDL_PARTIAL;
1640  log_stripes[i].pfns = (PFN_NUMBER*)(log_stripes[i].mdl + 1);
1641  }
1642 
1643  wtc->parity1 = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(parity_end - parity_start), ALLOC_TAG);
1644  if (!wtc->parity1) {
1645  ERR("out of memory\n");
1647  goto exit;
1648  }
1649 
1650  wtc->parity2 = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(parity_end - parity_start), ALLOC_TAG);
1651  if (!wtc->parity2) {
1652  ERR("out of memory\n");
1654  goto exit;
1655  }
1656 
1657  wtc->parity1_mdl = IoAllocateMdl(wtc->parity1, (ULONG)(parity_end - parity_start), false, false, NULL);
1658  if (!wtc->parity1_mdl) {
1659  ERR("out of memory\n");
1661  goto exit;
1662  }
1663 
1664  MmBuildMdlForNonPagedPool(wtc->parity1_mdl);
1665 
1666  wtc->parity2_mdl = IoAllocateMdl(wtc->parity2, (ULONG)(parity_end - parity_start), false, false, NULL);
1667  if (!wtc->parity2_mdl) {
1668  ERR("out of memory\n");
1670  goto exit;
1671  }
1672 
1673  MmBuildMdlForNonPagedPool(wtc->parity2_mdl);
1674 
1675  if (file_write)
1676  master_mdl = Irp->MdlAddress;
1677  else if (((ULONG_PTR)data % PAGE_SIZE) != 0) {
1679  if (!wtc->scratch) {
1680  ERR("out of memory\n");
1682  goto exit;
1683  }
1684 
1685  RtlCopyMemory(wtc->scratch, data, length);
1686 
1687  master_mdl = IoAllocateMdl(wtc->scratch, length, false, false, NULL);
1688  if (!master_mdl) {
1689  ERR("out of memory\n");
1691  goto exit;
1692  }
1693 
1694  MmBuildMdlForNonPagedPool(master_mdl);
1695 
1696  wtc->mdl = master_mdl;
1697  } else {
1698  master_mdl = IoAllocateMdl(data, length, false, false, NULL);
1699  if (!master_mdl) {
1700  ERR("out of memory\n");
1702  goto exit;
1703  }
1704 
1706 
1707  _SEH2_TRY {
1711  } _SEH2_END;
1712 
1713  if (!NT_SUCCESS(Status)) {
1714  ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
1715  IoFreeMdl(master_mdl);
1716  goto exit;
1717  }
1718 
1719  wtc->mdl = master_mdl;
1720  }
1721 
1722  pfns = (PFN_NUMBER*)(master_mdl + 1);
1723  parity1_pfns = (PFN_NUMBER*)(wtc->parity1_mdl + 1);
1724  parity2_pfns = (PFN_NUMBER*)(wtc->parity2_mdl + 1);
1725 
1726  if (file_write)
1727  pfns = &pfns[irp_offset >> PAGE_SHIFT];
1728 
1729  for (i = 0; i < c->chunk_item->num_stripes; i++) {
1730  if (stripes[i].start != stripes[i].end) {
1731  stripes[i].mdl = IoAllocateMdl((uint8_t*)MmGetMdlVirtualAddress(master_mdl) + irp_offset, (ULONG)(stripes[i].end - stripes[i].start), false, false, NULL);
1732  if (!stripes[i].mdl) {
1733  ERR("IoAllocateMdl failed\n");
1735  goto exit;
1736  }
1737  }
1738  }
1739 
1740  stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(uint64_t) * c->chunk_item->num_stripes, ALLOC_TAG);
1741  if (!stripeoff) {
1742  ERR("out of memory\n");
1744  goto exit;
1745  }
1746 
1747  RtlZeroMemory(stripeoff, sizeof(uint64_t) * c->chunk_item->num_stripes);
1748 
1749  pos = 0;
1750  parity_pos = 0;
1751 
1752  while (pos < length) {
1753  PFN_NUMBER* stripe_pfns;
1754 
1755  parity1 = (((address - c->offset + pos) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
1756 
1757  if (pos == 0) {
1758  uint16_t stripe = (parity1 + startoffstripe + 2) % c->chunk_item->num_stripes, parity2;
1759  uint32_t writelen = (uint32_t)min(length - pos, min(stripes[stripe].end - stripes[stripe].start,
1760  c->chunk_item->stripe_length - (stripes[stripe].start % c->chunk_item->stripe_length)));
1761  uint32_t maxwritelen = writelen;
1762 
1763  stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1764 
1765  RtlCopyMemory(stripe_pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1766 
1767  RtlCopyMemory(log_stripes[startoffstripe].pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1768  log_stripes[startoffstripe].pfns += writelen >> PAGE_SHIFT;
1769 
1770  stripeoff[stripe] = writelen;
1771  pos += writelen;
1772 
1773  stripe = (stripe + 1) % c->chunk_item->num_stripes;
1774  i = startoffstripe + 1;
1775 
1776  while (stripe != parity1) {
1777  stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1778  writelen = (uint32_t)min(length - pos, min(stripes[stripe].end - stripes[stripe].start, c->chunk_item->stripe_length));
1779 
1780  if (writelen == 0)
1781  break;
1782 
1783  if (writelen > maxwritelen)
1784  maxwritelen = writelen;
1785 
1786  RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1787 
1788  RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1789  log_stripes[i].pfns += writelen >> PAGE_SHIFT;
1790 
1791  stripeoff[stripe] = writelen;
1792  pos += writelen;
1793 
1794  stripe = (stripe + 1) % c->chunk_item->num_stripes;
1795  i++;
1796  }
1797 
1798  stripe_pfns = (PFN_NUMBER*)(stripes[parity1].mdl + 1);
1799  RtlCopyMemory(stripe_pfns, parity1_pfns, maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1800  stripeoff[parity1] = maxwritelen;
1801 
1802  parity2 = (parity1 + 1) % c->chunk_item->num_stripes;
1803 
1804  stripe_pfns = (PFN_NUMBER*)(stripes[parity2].mdl + 1);
1805  RtlCopyMemory(stripe_pfns, parity2_pfns, maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1806  stripeoff[parity2] = maxwritelen;
1807 
1808  parity_pos = maxwritelen;
1809  } else if (length - pos >= c->chunk_item->stripe_length * num_data_stripes) {
1810  uint16_t stripe = (parity1 + 2) % c->chunk_item->num_stripes, parity2;
1811 
1812  i = 0;
1813  while (stripe != parity1) {
1814  stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1815 
1816  RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1817 
1818  RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1819  log_stripes[i].pfns += c->chunk_item->stripe_length >> PAGE_SHIFT;
1820 
1821  stripeoff[stripe] += c->chunk_item->stripe_length;
1822  pos += c->chunk_item->stripe_length;
1823 
1824  stripe = (stripe + 1) % c->chunk_item->num_stripes;
1825  i++;
1826  }
1827 
1828  stripe_pfns = (PFN_NUMBER*)(stripes[parity1].mdl + 1);
1829  RtlCopyMemory(&stripe_pfns[stripeoff[parity1] >> PAGE_SHIFT], &parity1_pfns[parity_pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1830  stripeoff[parity1] += c->chunk_item->stripe_length;
1831 
1832  parity2 = (parity1 + 1) % c->chunk_item->num_stripes;
1833 
1834  stripe_pfns = (PFN_NUMBER*)(stripes[parity2].mdl + 1);
1835  RtlCopyMemory(&stripe_pfns[stripeoff[parity2] >> PAGE_SHIFT], &parity2_pfns[parity_pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
1836  stripeoff[parity2] += c->chunk_item->stripe_length;
1837 
1838  parity_pos += c->chunk_item->stripe_length;
1839  } else {
1840  uint16_t stripe = (parity1 + 2) % c->chunk_item->num_stripes, parity2;
1841  uint32_t writelen, maxwritelen = 0;
1842 
1843  i = 0;
1844  while (pos < length) {
1845  stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
1846  writelen = (uint32_t)min(length - pos, min(stripes[stripe].end - stripes[stripe].start, c->chunk_item->stripe_length));
1847 
1848  if (writelen == 0)
1849  break;
1850 
1851  if (writelen > maxwritelen)
1852  maxwritelen = writelen;
1853 
1854  RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1855 
1856  RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1857  log_stripes[i].pfns += writelen >> PAGE_SHIFT;
1858 
1859  stripeoff[stripe] += writelen;
1860  pos += writelen;
1861 
1862  stripe = (stripe + 1) % c->chunk_item->num_stripes;
1863  i++;
1864  }
1865 
1866  stripe_pfns = (PFN_NUMBER*)(stripes[parity1].mdl + 1);
1867  RtlCopyMemory(&stripe_pfns[stripeoff[parity1] >> PAGE_SHIFT], &parity1_pfns[parity_pos >> PAGE_SHIFT], maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1868 
1869  parity2 = (parity1 + 1) % c->chunk_item->num_stripes;
1870 
1871  stripe_pfns = (PFN_NUMBER*)(stripes[parity2].mdl + 1);
1872  RtlCopyMemory(&stripe_pfns[stripeoff[parity2] >> PAGE_SHIFT], &parity2_pfns[parity_pos >> PAGE_SHIFT], maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
1873  }
1874  }
1875 
1876  for (i = 0; i < num_data_stripes; i++) {
1877  uint8_t* ss = MmGetSystemAddressForMdlSafe(log_stripes[c->chunk_item->num_stripes - 3 - i].mdl, priority);
1878 
1879  if (i == 0) {
1880  RtlCopyMemory(wtc->parity1, ss, (ULONG)(parity_end - parity_start));
1881  RtlCopyMemory(wtc->parity2, ss, (ULONG)(parity_end - parity_start));
1882  } else {
1883  do_xor(wtc->parity1, ss, (uint32_t)(parity_end - parity_start));
1884 
1885  galois_double(wtc->parity2, (uint32_t)(parity_end - parity_start));
1886  do_xor(wtc->parity2, ss, (uint32_t)(parity_end - parity_start));
1887  }
1888  }
1889 
1891 
1892 exit:
1893  if (log_stripes) {
1894  for (i = 0; i < num_data_stripes; i++) {
1895  if (log_stripes[i].mdl)
1896  IoFreeMdl(log_stripes[i].mdl);
1897  }
1898 
1899  ExFreePool(log_stripes);
1900  }
1901 
1902  if (stripeoff)
1903  ExFreePool(stripeoff);
1904 
1905  return Status;
1906 }
1907 
1908 __attribute__((nonnull(1,3,5)))
1911  NTSTATUS Status;
1912  uint32_t i;
1913  CHUNK_ITEM_STRIPE* cis;
1914  write_stripe* stripes = NULL;
1915  uint64_t total_writing = 0;
1916  ULONG allowed_missing, missing;
1917 
1918  TRACE("(%p, %I64x, %p, %x)\n", Vcb, address, data, length);
1919 
1920  if (!c) {
1922  if (!c) {
1923  ERR("could not get chunk for address %I64x\n", address);
1924  return STATUS_INTERNAL_ERROR;
1925  }
1926  }
1927 
1928  stripes = ExAllocatePoolWithTag(PagedPool, sizeof(write_stripe) * c->chunk_item->num_stripes, ALLOC_TAG);
1929  if (!stripes) {
1930  ERR("out of memory\n");
1932  }
1933 
1934  RtlZeroMemory(stripes, sizeof(write_stripe) * c->chunk_item->num_stripes);
1935 
1936  cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
1937 
1938  if (c->chunk_item->type & BLOCK_FLAG_RAID0) {
1939  Status = prepare_raid0_write(c, address, data, length, stripes, file_write ? Irp : NULL, irp_offset, wtc);
1940  if (!NT_SUCCESS(Status)) {
1941  ERR("prepare_raid0_write returned %08lx\n", Status);
1942  goto prepare_failed;
1943  }
1944 
1945  allowed_missing = 0;
1946  } else if (c->chunk_item->type & BLOCK_FLAG_RAID10) {
1947  Status = prepare_raid10_write(c, address, data, length, stripes, file_write ? Irp : NULL, irp_offset, wtc);
1948  if (!NT_SUCCESS(Status)) {
1949  ERR("prepare_raid10_write returned %08lx\n", Status);
1950  goto prepare_failed;
1951  }
1952 
1953  allowed_missing = 1;
1954  } else if (c->chunk_item->type & BLOCK_FLAG_RAID5) {
1955  Status = prepare_raid5_write(Vcb, c, address, data, length, stripes, file_write ? Irp : NULL, irp_offset, priority, wtc);
1956  if (!NT_SUCCESS(Status)) {
1957  ERR("prepare_raid5_write returned %08lx\n", Status);
1958  goto prepare_failed;
1959  }
1960 
1961  allowed_missing = 1;
1962  } else if (c->chunk_item->type & BLOCK_FLAG_RAID6) {
1963  Status = prepare_raid6_write(Vcb, c, address, data, length, stripes, file_write ? Irp : NULL, irp_offset, priority, wtc);
1964  if (!NT_SUCCESS(Status)) {
1965  ERR("prepare_raid6_write returned %08lx\n", Status);
1966  goto prepare_failed;
1967  }
1968 
1969  allowed_missing = 2;
1970  } else { // write same data to every location - SINGLE, DUP, RAID1, RAID1C3, RAID1C4
1971  for (i = 0; i < c->chunk_item->num_stripes; i++) {
1972  stripes[i].start = address - c->offset;
1973  stripes[i].end = stripes[i].start + length;
1974  stripes[i].data = data;
1975  stripes[i].irp_offset = irp_offset;
1976 
1977  if (c->devices[i]->devobj) {
1978  if (file_write) {
1979  uint8_t* va;
1980  ULONG writelen = (ULONG)(stripes[i].end - stripes[i].start);
1981 
1982  va = (uint8_t*)MmGetMdlVirtualAddress(Irp->MdlAddress) + stripes[i].irp_offset;
1983 
1984  stripes[i].mdl = IoAllocateMdl(va, writelen, false, false, NULL);
1985  if (!stripes[i].mdl) {
1986  ERR("IoAllocateMdl failed\n");
1988  goto prepare_failed;
1989  }
1990 
1991  IoBuildPartialMdl(Irp->MdlAddress, stripes[i].mdl, va, writelen);
1992  } else {
1993  stripes[i].mdl = IoAllocateMdl(stripes[i].data, (ULONG)(stripes[i].end - stripes[i].start), false, false, NULL);
1994  if (!stripes[i].mdl) {
1995  ERR("IoAllocateMdl failed\n");
1997  goto prepare_failed;
1998  }
1999 
2001 
2002  _SEH2_TRY {
2006  } _SEH2_END;
2007 
2008  if (!NT_SUCCESS(Status)) {
2009  ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
2010  IoFreeMdl(stripes[i].mdl);
2011  stripes[i].mdl = NULL;
2012  goto prepare_failed;
2013  }
2014  }
2015  }
2016  }
2017 
2018  allowed_missing = c->chunk_item->num_stripes - 1;
2019  }
2020 
2021  missing = 0;
2022  for (i = 0; i < c->chunk_item->num_stripes; i++) {
2023  if (!c->devices[i]->devobj)
2024  missing++;
2025  }
2026 
2027  if (missing > allowed_missing) {
2028  ERR("cannot write as %lu missing devices (maximum %lu)\n", missing, allowed_missing);
2030  goto prepare_failed;
2031  }
2032 
2033  for (i = 0; i < c->chunk_item->num_stripes; i++) {
2036 
2038  if (!stripe) {
2039  ERR("out of memory\n");
2041  goto end;
2042  }
2043 
2044  if (stripes[i].start == stripes[i].end || !c->devices[i]->devobj) {
2045  stripe->status = WriteDataStatus_Ignore;
2046  stripe->Irp = NULL;
2047  stripe->buf = stripes[i].data;
2048  stripe->mdl = NULL;
2049  } else {
2050  stripe->context = (struct _write_data_context*)wtc;
2051  stripe->buf = stripes[i].data;
2052  stripe->device = c->devices[i];
2053  RtlZeroMemory(&stripe->iosb, sizeof(IO_STATUS_BLOCK));
2054  stripe->status = WriteDataStatus_Pending;
2055  stripe->mdl = stripes[i].mdl;
2056 
2057  if (!Irp) {
2058  stripe->Irp = IoAllocateIrp(stripe->device->devobj->StackSize, false);
2059 
2060  if (!stripe->Irp) {
2061  ERR("IoAllocateIrp failed\n");
2062  ExFreePool(stripe);
2064  goto end;
2065  }
2066  } else {
2068 
2069  if (!stripe->Irp) {
2070  ERR("IoMakeAssociatedIrp failed\n");
2071  ExFreePool(stripe);
2073  goto end;
2074  }
2075  }
2076 
2080 
2082  stripe->Irp->AssociatedIrp.SystemBuffer = MmGetSystemAddressForMdlSafe(stripes[i].mdl, priority);
2083 
2084  stripe->Irp->Flags = IRP_BUFFERED_IO;
2085  } else if (stripe->device->devobj->Flags & DO_DIRECT_IO)
2086  stripe->Irp->MdlAddress = stripe->mdl;
2087  else
2088  stripe->Irp->UserBuffer = MmGetSystemAddressForMdlSafe(stripes[i].mdl, priority);
2089 
2090 #ifdef DEBUG_PARANOID
2091  if (stripes[i].end < stripes[i].start) {
2092  ERR("trying to write stripe with negative length (%I64x < %I64x)\n", stripes[i].end, stripes[i].start);
2093  int3;
2094  }
2095 #endif
2096 
2097  IrpSp->Parameters.Write.Length = (ULONG)(stripes[i].end - stripes[i].start);
2098  IrpSp->Parameters.Write.ByteOffset.QuadPart = stripes[i].start + cis[i].offset;
2099 
2100  total_writing += IrpSp->Parameters.Write.Length;
2101 
2102  stripe->Irp->UserIosb = &stripe->iosb;
2103  wtc->stripes_left++;
2104 
2105  IoSetCompletionRoutine(stripe->Irp, write_data_completion, stripe, true, true, true);
2106  }
2107 
2108  InsertTailList(&wtc->stripes, &stripe->list_entry);
2109  }
2110 
2111  if (diskacc)
2112  fFsRtlUpdateDiskCounters(0, total_writing);
2113 
2115 
2116 end:
2117 
2118  if (stripes) ExFreePool(stripes);
2119 
2120  if (!NT_SUCCESS(Status))
2122 
2123  return Status;
2124 
2125 prepare_failed:
2126  for (i = 0; i < c->chunk_item->num_stripes; i++) {
2127  if (stripes[i].mdl && (i == 0 || stripes[i].mdl != stripes[i-1].mdl)) {
2128  if (stripes[i].mdl->MdlFlags & MDL_PAGES_LOCKED)
2130 
2131  IoFreeMdl(stripes[i].mdl);
2132  }
2133  }
2134 
2135  if (wtc->parity1_mdl) {
2136  if (wtc->parity1_mdl->MdlFlags & MDL_PAGES_LOCKED)
2137  MmUnlockPages(wtc->parity1_mdl);
2138 
2139  IoFreeMdl(wtc->parity1_mdl);
2140  wtc->parity1_mdl = NULL;
2141  }
2142 
2143  if (wtc->parity2_mdl) {
2144  if (wtc->parity2_mdl->MdlFlags & MDL_PAGES_LOCKED)
2145  MmUnlockPages(wtc->parity2_mdl);
2146 
2147  IoFreeMdl(wtc->parity2_mdl);
2148  wtc->parity2_mdl = NULL;
2149  }
2150 
2151  if (wtc->mdl) {
2152  if (wtc->mdl->MdlFlags & MDL_PAGES_LOCKED)
2153  MmUnlockPages(wtc->mdl);
2154 
2155  IoFreeMdl(wtc->mdl);
2156  wtc->mdl = NULL;
2157  }
2158 
2159  if (wtc->parity1) {
2160  ExFreePool(wtc->parity1);
2161  wtc->parity1 = NULL;
2162  }
2163 
2164  if (wtc->parity2) {
2165  ExFreePool(wtc->parity2);
2166  wtc->parity2 = NULL;
2167  }
2168 
2169  if (wtc->scratch) {
2170  ExFreePool(wtc->scratch);
2171  wtc->scratch = NULL;
2172  }
2173 
2175  return Status;
2176 }
2177 
2178 __attribute__((nonnull(1,4,5)))
2180  uint64_t startoff, endoff;
2181  uint16_t startoffstripe, endoffstripe, datastripes;
2182 
2183  datastripes = c->chunk_item->num_stripes - (c->chunk_item->type & BLOCK_FLAG_RAID5 ? 1 : 2);
2184 
2185  get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, datastripes, &startoff, &startoffstripe);
2186  get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, datastripes, &endoff, &endoffstripe);
2187 
2188  startoff -= startoff % c->chunk_item->stripe_length;
2189  endoff = sector_align(endoff, c->chunk_item->stripe_length);
2190 
2191  *lockaddr = c->offset + (startoff * datastripes);
2192  *locklen = (endoff - startoff) * datastripes;
2193 }
2194 
2195 __attribute__((nonnull(1,3)))
2197  write_data_context wtc;
2198  NTSTATUS Status;
2199  uint64_t lockaddr, locklen;
2200 
2203  wtc.stripes_left = 0;
2204  wtc.parity1 = wtc.parity2 = wtc.scratch = NULL;
2205  wtc.mdl = wtc.parity1_mdl = wtc.parity2_mdl = NULL;
2206 
2207  if (!c) {
2209  if (!c) {
2210  ERR("could not get chunk for address %I64x\n", address);
2211  return STATUS_INTERNAL_ERROR;
2212  }
2213  }
2214 
2215  if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6) {
2216  get_raid56_lock_range(c, address, length, &lockaddr, &locklen);
2217  chunk_lock_range(Vcb, c, lockaddr, locklen);
2218  }
2219 
2220  _SEH2_TRY {
2221  Status = write_data(Vcb, address, data, length, &wtc, Irp, c, file_write, irp_offset, priority);
2224  } _SEH2_END;
2225 
2226  if (!NT_SUCCESS(Status)) {
2227  ERR("write_data returned %08lx\n", Status);
2228 
2229  if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6)
2230  chunk_unlock_range(Vcb, c, lockaddr, locklen);
2231 
2233  return Status;
2234  }
2235 
2236  if (wtc.stripes.Flink != &wtc.stripes) {
2237  // launch writes and wait
2238  LIST_ENTRY* le = wtc.stripes.Flink;
2239  bool no_wait = true;
2240 
2241  while (le != &wtc.stripes) {
2243 
2244  if (stripe->status != WriteDataStatus_Ignore) {
2246  no_wait = false;
2247  }
2248 
2249  le = le->Flink;
2250  }
2251 
2252  if (!no_wait)
2254 
2255  le = wtc.stripes.Flink;
2256  while (le != &wtc.stripes) {
2258 
2259  if (stripe->status != WriteDataStatus_Ignore && !NT_SUCCESS(stripe->iosb.Status)) {
2260  Status = stripe->iosb.Status;
2261 
2263  break;
2264  }
2265 
2266  le = le->Flink;
2267  }
2268 
2270  }
2271 
2272  if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6)
2273  chunk_unlock_range(Vcb, c, lockaddr, locklen);
2274 
2275  return Status;
2276 }
2277 
2278 __attribute__((nonnull(2,3)))
2279 _Function_class_(IO_COMPLETION_ROUTINE)
2280 static NTSTATUS __stdcall write_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
2281  write_data_stripe* stripe = conptr;
2283  LIST_ENTRY* le;
2284 
2286 
2287  // FIXME - we need a lock here
2288 
2289  if (stripe->status == WriteDataStatus_Cancelling) {
2291  goto end;
2292  }
2293 
2294  stripe->iosb = Irp->IoStatus;
2295 
2296  if (NT_SUCCESS(Irp->IoStatus.Status)) {
2297  stripe->status = WriteDataStatus_Success;
2298  } else {
2299  le = context->stripes.Flink;
2300 
2301  stripe->status = WriteDataStatus_Error;
2302 
2303  while (le != &context->stripes) {
2305 
2306  if (s2->status == WriteDataStatus_Pending) {
2307  s2->status = WriteDataStatus_Cancelling;
2308  IoCancelIrp(s2->Irp);
2309  }
2310 
2311  le = le->Flink;
2312  }
2313  }
2314 
2315 end:
2316  if (InterlockedDecrement(&context->stripes_left) == 0)
2317  KeSetEvent(&context->Event, 0, false);
2318 
2320 }
2321 
2322 __attribute__((nonnull(1)))
2324  LIST_ENTRY* le;
2325  PMDL last_mdl = NULL;
2326 
2327  if (wtc->parity1_mdl) {
2328  if (wtc->parity1_mdl->MdlFlags & MDL_PAGES_LOCKED)
2329  MmUnlockPages(wtc->parity1_mdl);
2330 
2331  IoFreeMdl(wtc->parity1_mdl);
2332  }
2333 
2334  if (wtc->parity2_mdl) {
2335  if (wtc->parity2_mdl->MdlFlags & MDL_PAGES_LOCKED)
2336  MmUnlockPages(wtc->parity2_mdl);
2337 
2338  IoFreeMdl(wtc->parity2_mdl);
2339  }
2340 
2341  if (wtc->mdl) {
2342  if (wtc->mdl->MdlFlags & MDL_PAGES_LOCKED)
2343  MmUnlockPages(wtc->mdl);
2344 
2345  IoFreeMdl(wtc->mdl);
2346  }
2347 
2348  if (wtc->parity1)
2349  ExFreePool(wtc->parity1);
2350 
2351  if (wtc->parity2)
2352  ExFreePool(wtc->parity2);
2353 
2354  if (wtc->scratch)
2355  ExFreePool(wtc->scratch);
2356 
2357  le = wtc->stripes.Flink;
2358  while (le != &wtc->stripes) {
2360 
2361  if (stripe->mdl && stripe->mdl != last_mdl) {
2362  if (stripe->mdl->MdlFlags & MDL_PAGES_LOCKED)
2363  MmUnlockPages(stripe->mdl);
2364 
2365  IoFreeMdl(stripe->mdl);
2366  }
2367 
2368  last_mdl = stripe->mdl;
2369 
2370  if (stripe->Irp)
2371  IoFreeIrp(stripe->Irp);
2372 
2373  le = le->Flink;
2374  }
2375 
2376  while (!IsListEmpty(&wtc->stripes)) {
2378 
2379  ExFreePool(stripe);
2380  }
2381 }
2382 
2383 __attribute__((nonnull(1,2,3)))
2385  LIST_ENTRY* le = prevextle->Flink;
2386 
2387  while (le != &fcb->extents) {
2389 
2390  if (ext->offset >= newext->offset) {
2391  InsertHeadList(ext->list_entry.Blink, &newext->list_entry);
2392  return;
2393  }
2394 
2395  le = le->Flink;
2396  }
2397 
2398  InsertTailList(&fcb->extents, &newext->list_entry);
2399 }
2400 
2401 __attribute__((nonnull(1,2,6)))
2403  NTSTATUS Status;
2404  LIST_ENTRY* le;
2405 
2406  le = fcb->extents.Flink;
2407 
2408  while (le != &fcb->extents) {
2409  LIST_ENTRY* le2 = le->Flink;
2411 
2412  if (!ext->ignore) {
2413  EXTENT_DATA* ed = &ext->extent_data;
2414  uint64_t len;
2415 
2416  if (ed->type == EXTENT_TYPE_INLINE)
2417  len = ed->decoded_size;
2418  else
2419  len = ((EXTENT_DATA2*)ed->data)->num_bytes;
2420 
2421  if (ext->offset < end_data && ext->offset + len > start_data) {
2422  if (ed->type == EXTENT_TYPE_INLINE) {
2423  if (start_data <= ext->offset && end_data >= ext->offset + len) { // remove all
2424  remove_fcb_extent(fcb, ext, rollback);
2425 
2427  fcb->inode_item_changed = true;
2428  } else {
2429  ERR("trying to split inline extent\n");
2430 #ifdef DEBUG_PARANOID
2431  int3;
2432 #endif
2433  return STATUS_INTERNAL_ERROR;
2434  }
2435  } else {
2436  EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
2437 
2438  if (start_data <= ext->offset && end_data >= ext->offset + len) { // remove all
2439  if (ed2->size != 0) {
2440  chunk* c;
2441 
2443  fcb->inode_item_changed = true;
2444 
2446 
2447  if (!c) {
2448  ERR("get_chunk_from_address(%I64x) failed\n", ed2->address);
2449  } else {
2450  Status = update_changed_extent_ref(Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, -1,
2452  if (!NT_SUCCESS(Status)) {
2453  ERR("update_changed_extent_ref returned %08lx\n", Status);
2454  goto end;
2455  }
2456  }
2457  }
2458 
2459  remove_fcb_extent(fcb, ext, rollback);
2460  } else if (start_data <= ext->offset && end_data < ext->offset + len) { // remove beginning
2461  EXTENT_DATA2* ned2;
2462  extent* newext;
2463 
2464  if (ed2->size != 0) {
2465  fcb->inode_item.st_blocks -= end_data - ext->offset;
2466  fcb->inode_item_changed = true;
2467  }
2468 
2469  newext = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
2470  if (!newext) {
2471  ERR("out of memory\n");
2473  goto end;
2474  }
2475 
2476  ned2 = (EXTENT_DATA2*)newext->extent_data.data;
2477 
2478  newext->extent_data.generation = Vcb->superblock.generation;
2479  newext->extent_data.decoded_size = ed->decoded_size;
2480  newext->extent_data.compression = ed->compression;
2481  newext->extent_data.encryption = ed->encryption;
2482  newext->extent_data.encoding = ed->encoding;
2483  newext->extent_data.type = ed->type;
2484  ned2->address = ed2->address;
2485  ned2->size = ed2->size;
2486  ned2->offset = ed2->offset + (end_data - ext->offset);
2487  ned2->num_bytes = ed2->num_bytes - (end_data - ext->offset);
2488 
2489  newext->offset = end_data;
2490  newext->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2);
2491  newext->unique = ext->unique;
2492  newext->ignore = false;
2493  newext->inserted = true;
2494 
2495  if (ext->csum) {
2496  if (ed->compression == BTRFS_COMPRESSION_NONE) {
2497  newext->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)((ned2->num_bytes * Vcb->csum_size) >> Vcb->sector_shift), ALLOC_TAG);
2498  if (!newext->csum) {
2499  ERR("out of memory\n");
2501  ExFreePool(newext);
2502  goto end;
2503  }
2504 
2505  RtlCopyMemory(newext->csum, (uint8_t*)ext->csum + (((end_data - ext->offset) * Vcb->csum_size) >> Vcb->sector_shift),
2506  (ULONG)((ned2->num_bytes * Vcb->csum_size) >> Vcb->sector_shift));
2507  } else {
2508  newext->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)((ed2->size * Vcb->csum_size) >> Vcb->sector_shift), ALLOC_TAG);
2509  if (!newext->csum) {
2510  ERR("out of memory\n");
2512  ExFreePool(newext);
2513  goto end;
2514  }
2515 
2516  RtlCopyMemory(newext->csum, ext->csum, (ULONG)((ed2->size * Vcb->csum_size) >> Vcb->sector_shift));
2517  }
2518  } else
2519  newext->csum = NULL;
2520 
2521  add_extent(fcb, &ext->list_entry, newext);
2522 
2523  remove_fcb_extent(fcb, ext, rollback);
2524  } else if (start_data > ext->offset && end_data >= ext->offset + len) { // remove end
2525  EXTENT_DATA2* ned2;
2526  extent* newext;
2527 
2528  if (ed2->size != 0) {
2529  fcb->inode_item.st_blocks -= ext->offset + len - start_data;
2530  fcb->inode_item_changed = true;
2531  }
2532 
2533  newext = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
2534  if (!newext) {
2535  ERR("out of memory\n");
2537  goto end;
2538  }
2539 
2540  ned2 = (EXTENT_DATA2*)newext->extent_data.data;
2541 
2542  newext->extent_data.generation = Vcb->superblock.generation;
2543  newext->extent_data.decoded_size = ed->decoded_size;
2544  newext->extent_data.compression = ed->compression;
2545  newext->extent_data.encryption = ed->encryption;
2546  newext->extent_data.encoding = ed->encoding;
2547  newext->extent_data.type = ed->type;
2548  ned2->address = ed2->address;
2549  ned2->size = ed2->size;
2550  ned2->offset = ed2->offset;
2551  ned2->num_bytes = start_data - ext->offset;
2552 
2553  newext->offset = ext->offset;
2554  newext->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2);
2555  newext->unique = ext->unique;
2556  newext->ignore = false;
2557  newext->inserted = true;
2558 
2559  if (ext->csum) {
2560  if (ed->compression == BTRFS_COMPRESSION_NONE) {
2561  newext->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)((ned2->num_bytes * Vcb->csum_size) >> Vcb->sector_shift), ALLOC_TAG);
2562  if (!newext->csum) {
2563  ERR("out of memory\n");
2565  ExFreePool(newext);
2566  goto end;
2567  }
2568 
2569  RtlCopyMemory(newext->csum, ext->csum, (ULONG)((ned2->num_bytes * Vcb->csum_size) >> Vcb->sector_shift));
2570  } else {
2571  newext->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)((ed2->size * Vcb->csum_size) >> Vcb->sector_shift), ALLOC_TAG);
2572  if (!newext->csum) {
2573  ERR("out of memory\n");
2575  ExFreePool(newext);
2576  goto end;
2577  }
2578 
2579  RtlCopyMemory(newext->csum, ext->csum, (ULONG)((ed2->size * Vcb->csum_size) >> Vcb->sector_shift));
2580  }
2581  } else
2582  newext->csum = NULL;
2583 
2584  InsertHeadList(&ext->list_entry, &newext->list_entry);
2585 
2586  remove_fcb_extent(fcb, ext, rollback);
2587  } else if (start_data > ext->offset && end_data < ext->offset + len) { // remove middle
2588  EXTENT_DATA2 *neda2, *nedb2;
2589  extent *newext1, *newext2;
2590 
2591  if (ed2->size != 0) {
2592  chunk* c;
2593 
2594  fcb->inode_item.st_blocks -= end_data - start_data;
2595  fcb->inode_item_changed = true;
2596 
2598 
2599  if (!c) {
2600  ERR("get_chunk_from_address(%I64x) failed\n", ed2->address);
2601  } else {
2602  Status = update_changed_extent_ref(Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 1,
2604  if (!NT_SUCCESS(Status)) {
2605  ERR("update_changed_extent_ref returned %08lx\n", Status);
2606  goto end;
2607  }
2608  }
2609  }
2610 
2611  newext1 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
2612  if (!newext1) {
2613  ERR("out of memory\n");
2615  goto end;
2616  }
2617 
2618  newext2 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
2619  if (!newext2) {
2620  ERR("out of memory\n");
2622  ExFreePool(newext1);
2623  goto end;
2624  }
2625 
2626  neda2 = (EXTENT_DATA2*)newext1->extent_data.data;
2627 
2628  newext1->extent_data.generation = Vcb->superblock.generation;
2629  newext1->extent_data.decoded_size = ed->decoded_size;
2630  newext1->extent_data.compression = ed->compression;
2631  newext1->extent_data.encryption = ed->encryption;
2632  newext1->extent_data.encoding = ed->encoding;
2633  newext1->extent_data.type = ed->type;
2634  neda2->address = ed2->address;
2635  neda2->size = ed2->size;
2636  neda2->offset = ed2->offset;
2637  neda2->num_bytes = start_data - ext->offset;
2638 
2639  nedb2 = (EXTENT_DATA2*)newext2->extent_data.data;
2640 
2641  newext2->extent_data.generation = Vcb->superblock.generation;
2642  newext2->extent_data.decoded_size = ed->decoded_size;
2643  newext2->extent_data.compression = ed->compression;
2644  newext2->extent_data.encryption = ed->encryption;
2645  newext2->extent_data.encoding = ed->encoding;
2646  newext2->extent_data.type = ed->type;
2647  nedb2->address = ed2->address;
2648  nedb2->size = ed2->size;
2649  nedb2->offset = ed2->offset + (end_data - ext->offset);
2650  nedb2->num_bytes = ext->offset + len - end_data;
2651 
2652  newext1->offset = ext->offset;
2653  newext1->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2);
2654  newext1->unique = ext->unique;
2655  newext1->ignore = false;
2656  newext1->inserted = true;
2657 
2658  newext2->offset = end_data;
2659  newext2->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2);
2660  newext2->unique = ext->unique;
2661  newext2->ignore = false;
2662  newext2->inserted = true;
2663 
2664  if (ext->csum) {
2665  if (ed->compression == BTRFS_COMPRESSION_NONE) {
2666  newext1->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)((neda2->num_bytes * Vcb->csum_size) >> Vcb->sector_shift), ALLOC_TAG);
2667  if (!newext1->csum) {
2668  ERR("out of memory\n");
2670  ExFreePool(newext1);
2671  ExFreePool(newext2);
2672  goto end;
2673  }
2674 
2675  newext2->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)((nedb2->num_bytes * Vcb->csum_size) >> Vcb->sector_shift), ALLOC_TAG);
2676  if (!newext2->csum) {
2677  ERR("out of memory\n");
2679  ExFreePool(newext1->csum);
2680  ExFreePool(newext1);
2681  ExFreePool(newext2);
2682  goto end;
2683  }
2684 
2685  RtlCopyMemory(newext1->csum, ext->csum, (ULONG)((neda2->num_bytes * Vcb->csum_size) >> Vcb->sector_shift));
2686  RtlCopyMemory(newext2->csum, (uint8_t*)ext->csum + (((end_data - ext->offset) * Vcb->csum_size) >> Vcb->sector_shift),
2687  (ULONG)((nedb2->num_bytes * Vcb->csum_size) >> Vcb->sector_shift));
2688  } else {
2689  newext1->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)((ed2->size * Vcb->csum_size) >> Vcb->sector_shift), ALLOC_TAG);
2690  if (!newext1->csum) {
2691  ERR("out of memory\n");
2693  ExFreePool(newext1);
2694  ExFreePool(newext2);
2695  goto end;
2696  }
2697 
2698  newext2->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)((ed2->size * Vcb->csum_size) >> Vcb->sector_shift), ALLOC_TAG);
2699  if (!newext2->csum) {
2700  ERR("out of memory\n");
2702  ExFreePool(newext1->csum);
2703  ExFreePool(newext1);
2704  ExFreePool(newext2);
2705  goto end;
2706  }
2707 
2708  RtlCopyMemory(newext1->csum, ext->csum, (ULONG)((ed2->size * Vcb->csum_size) >> Vcb->sector_shift));
2709  RtlCopyMemory(newext2->csum, ext->csum, (ULONG)((ed2->size * Vcb->csum_size) >> Vcb->sector_shift));
2710  }
2711  } else {
2712  newext1->csum = NULL;
2713  newext2->csum = NULL;
2714  }
2715 
2716  InsertHeadList(&ext->list_entry, &newext1->list_entry);
2717  add_extent(fcb, &newext1->list_entry, newext2);
2718 
2719  remove_fcb_extent(fcb, ext, rollback);
2720  }
2721  }
2722  }
2723  }
2724 
2725  le = le2;
2726  }
2727 
2729 
2730 end:
2731  fcb->extents_changed = true;
2733 
2734  return Status;
2735 }
2736 
2737 __attribute__((nonnull(1,2,3)))
2738 static void add_insert_extent_rollback(LIST_ENTRY* rollback, fcb* fcb, extent* ext) {
2739  rollback_extent* re;
2740 
2742  if (!re) {
2743  ERR("out of memory\n");
2744  return;
2745  }
2746 
2747  re->fcb = fcb;
2748  re->ext = ext;
2749 
2751 }
2752 
2753 #ifdef _MSC_VER
2754 #pragma warning(push)
2755 #pragma warning(suppress: 28194)
2756 #endif
2757 __attribute__((nonnull(1,3,7)))
2760  extent* ext;
2761  LIST_ENTRY* le;
2762 
2763  ext = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + edsize, ALLOC_TAG);
2764  if (!ext) {
2765  ERR("out of memory\n");
2767  }
2768 
2769  ext->offset = offset;
2770  ext->datalen = edsize;
2771  ext->unique = unique;
2772  ext->ignore = false;
2773  ext->inserted = true;
2774  ext->csum = csum;
2775 
2776  RtlCopyMemory(&ext->extent_data, ed, edsize);
2777 
2778  le = fcb->extents.Flink;
2779  while (le != &fcb->extents) {
2780  extent* oldext = CONTAINING_RECORD(le, extent, list_entry);
2781 
2782  if (oldext->offset >= offset) {
2783  InsertHeadList(le->Blink, &ext->list_entry);
2784  goto end;
2785  }
2786 
2787  le = le->Flink;
2788  }
2789 
2790  InsertTailList(&fcb->extents, &ext->list_entry);
2791 
2792 end:
2793  add_insert_extent_rollback(rollback, fcb, ext);
2794 
2795  return STATUS_SUCCESS;
2796 }
2797 #ifdef _MSC_VER
2798 #pragma warning(pop)
2799 #endif
2800 
2801 __attribute__((nonnull(1, 2, 3)))
2802 static void remove_fcb_extent(fcb* fcb, extent* ext, LIST_ENTRY* rollback) {
2803  if (!ext->ignore) {
2804  rollback_extent* re;
2805 
2806  ext->ignore = true;
2807 
2809  if (!re) {
2810  ERR("out of memory\n");
2811  return;
2812  }
2813 
2814  re->fcb = fcb;
2815  re->ext = ext;
2816 
2818  }
2819 }
2820 
2821 _Requires_lock_held_(c->lock)
2823 __attribute__((nonnull(1,2,3,9)))
2826  uint64_t address;
2827  NTSTATUS Status;
2828  EXTENT_DATA* ed;
2829  EXTENT_DATA2* ed2;
2830  uint16_t edsize = (uint16_t)(offsetof(EXTENT_DATA, data[0]) + sizeof(EXTENT_DATA2));
2831  void* csum = NULL;
2832 
2833  TRACE("(%p, (%I64x, %I64x), %I64x, %I64x, %I64x, %u, %p, %p)\n", Vcb, fcb->subvol->id, fcb->inode, c->offset, start_data, length, prealloc, data, rollback);
2834 
2836  return false;
2837 
2838  // add extent data to inode
2839  ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG);
2840  if (!ed) {
2841  ERR("out of memory\n");
2842  return false;
2843  }
2844 
2845  ed->generation = Vcb->superblock.generation;
2846  ed->decoded_size = decoded_size;
2847  ed->compression = compression;
2851 
2852  ed2 = (EXTENT_DATA2*)ed->data;
2853  ed2->address = address;
2854  ed2->size = length;
2855  ed2->offset = 0;
2856  ed2->num_bytes = decoded_size;
2857 
2859  ULONG sl = (ULONG)(length >> Vcb->sector_shift);
2860 
2861  csum = ExAllocatePoolWithTag(PagedPool, sl * Vcb->csum_size, ALLOC_TAG);
2862  if (!csum) {
2863  ERR("out of memory\n");
2864  ExFreePool(ed);
2865  return false;
2866  }
2867 
2868  do_calc_job(Vcb, data, sl, csum);
2869  }
2870 
2871  Status = add_extent_to_fcb(fcb, start_data, ed, edsize, true, csum, rollback);
2872  if (!NT_SUCCESS(Status)) {
2873  ERR("add_extent_to_fcb returned %08lx\n", Status);
2874  if (csum) ExFreePool(csum);
2875  ExFreePool(ed);
2876  return false;
2877  }
2878 
2879  ExFreePool(ed);
2880 
2881  c->used += length;
2883 
2885 
2886  fcb->extents_changed = true;
2887  fcb->inode_item_changed = true;
2889 
2890  ExAcquireResourceExclusiveLite(&c->changed_extents_lock, true);
2891 
2893 
2894  ExReleaseResourceLite(&c->changed_extents_lock);
2895 
2897 
2898  if (data) {
2901  if (!NT_SUCCESS(Status))
2902  ERR("write_data_complete returned %08lx\n", Status);
2903  }
2904 
2905  return true;
2906 }
2907 
2908 __attribute__((nonnull(1,2,5,7,10)))
2909 static bool try_extend_data(device_extension* Vcb, fcb* fcb, uint64_t start_data, uint64_t length, void* data,
2910  PIRP Irp, uint64_t* written, bool file_write, uint64_t irp_offset, LIST_ENTRY* rollback) {
2911  bool success = false;
2912  EXTENT_DATA* ed;
2913  EXTENT_DATA2* ed2;
2914  chunk* c;
2915  LIST_ENTRY* le;
2916  extent* ext = NULL;
2917 
2918  le = fcb->extents.Flink;
2919 
2920  while (le != &fcb->extents) {
2921  extent* nextext = CONTAINING_RECORD(le, extent, list_entry);
2922 
2923  if (!nextext->ignore) {
2924  if (nextext->offset == start_data) {
2925  ext = nextext;
2926  break;
2927  } else if (nextext->offset > start_data)
2928  break;
2929 
2930  ext = nextext;
2931  }
2932 
2933  le = le->Flink;
2934  }
2935 
2936  if (!ext)
2937  return false;
2938 
2939  ed = &ext->extent_data;
2940 
2941  if (ed->type != EXTENT_TYPE_REGULAR && ed->type != EXTENT_TYPE_PREALLOC) {
2942  TRACE("not extending extent which is not regular or prealloc\n");
2943  return false;
2944  }
2945 
2946  ed2 = (EXTENT_DATA2*)ed->data;
2947 
2948  if (ext->offset + ed2->num_bytes != start_data) {
2949  TRACE("last EXTENT_DATA does not run up to start_data (%I64x + %I64x != %I64x)\n", ext->offset, ed2->num_bytes, start_data);
2950  return false;
2951  }
2952 
2954 
2955  if (c->reloc || c->readonly || c->chunk_item->type != Vcb->data_flags)
2956  return false;
2957 
2959 
2960  if (length > c->chunk_item->size - c->used) {
2962  return false;
2963  }
2964 
2965  if (!c->cache_loaded) {
2967 
2968  if (!NT_SUCCESS(Status)) {
2969  ERR("load_cache_chunk returned %08lx\n", Status);
2971  return false;
2972  }
2973  }
2974 
2975  le = c->space.Flink;
2976  while (le != &c->space) {
2978 
2979  if (s->address == ed2->address + ed2->size) {
2980  uint64_t newlen = min(min(s->size, length), MAX_EXTENT_SIZE);
2981 
2982  success = insert_extent_chunk(Vcb, fcb, c, start_data, newlen, false, data, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen, file_write, irp_offset);
2983 
2984  if (success)
2985  *written += newlen;
2986  else
2988 
2989  return success;
2990  } else if (s->address > ed2->address + ed2->size)
2991  break;
2992 
2993  le = le->Flink;
2994  }
2995 
2997 
2998  return false;
2999 }
3000 
3001 __attribute__((nonnull(1)))
3002 static NTSTATUS insert_chunk_fragmented(fcb* fcb, uint64_t start, uint64_t length, uint8_t* data, bool prealloc, LIST_ENTRY* rollback) {
3003  LIST_ENTRY* le;
3004  uint64_t flags = fcb->Vcb->data_flags;
3005  bool page_file = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE;
3006  NTSTATUS Status;
3007  chunk* c;
3008 
3009  ExAcquireResourceSharedLite(&fcb->Vcb->chunk_lock, true);
3010 
3011  // first create as many chunks as we can
3012  do {
3013  Status = alloc_chunk(fcb->Vcb, flags, &c, false);
3014  } while (NT_SUCCESS(Status));
3015 
3016  if (Status != STATUS_DISK_FULL) {
3017  ERR("alloc_chunk returned %08lx\n", Status);
3018  ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
3019  return Status;
3020  }
3021 
3022  le = fcb->Vcb->chunks.Flink;
3023  while (le != &fcb->Vcb->chunks) {
3025 
3026  if (!c->readonly && !c->reloc) {
3028 
3029  if (c->chunk_item->type == flags) {
3030  while (!IsListEmpty(&c->space_size) && length > 0) {
3031  space* s = CONTAINING_RECORD(c->space_size.Flink, space, list_entry_size);
3032  uint64_t extlen = min(length, s->size);
3033 
3034  if (insert_extent_chunk(fcb->Vcb, fcb, c, start, extlen, prealloc && !page_file, data, NULL, rollback, BTRFS_COMPRESSION_NONE, extlen, false, 0)) {
3035  start += extlen;
3036  length -= extlen;
3037  if (data) data += extlen;
3038 
3040  }
3041  }
3042  }
3043 
3045 
3046  if (length == 0)
3047  break;
3048  }
3049 
3050  le = le->Flink;
3051  }
3052 
3053  ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
3054 
3055  return length == 0 ? STATUS_SUCCESS : STATUS_DISK_FULL;
3056 }
3057 
3058 __attribute__((nonnull(1,4)))
3059 static NTSTATUS insert_prealloc_extent(fcb* fcb, uint64_t start, uint64_t length, LIST_ENTRY* rollback) {
3060  LIST_ENTRY* le;
3061  chunk* c;
3062  uint64_t flags;
3063  NTSTATUS Status;
3064  bool page_file = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE;
3065 
3066  flags = fcb->Vcb->data_flags;
3067 
3068  do {
3069  uint64_t extlen = min(MAX_EXTENT_SIZE, length);
3070 
3071  ExAcquireResourceSharedLite(&fcb->Vcb->chunk_lock, true);
3072 
3073  le = fcb->Vcb->chunks.Flink;
3074  while (le != &fcb->Vcb->chunks) {
3076 
3077  if (!c->readonly && !c->reloc) {
3079 
3080  if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= extlen) {
3081  if (insert_extent_chunk(fcb->Vcb, fcb, c, start, extlen, !page_file, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, extlen, false, 0)) {
3082  ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
3083  goto cont;
3084  }
3085  }
3086 
3088  }
3089 
3090  le = le->Flink;
3091  }
3092 
3093  ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
3094 
3095  ExAcquireResourceExclusiveLite(&fcb->Vcb->chunk_lock, true);
3096 
3097  Status = alloc_chunk(fcb->Vcb, flags, &c, false);
3098 
3099  ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
3100 
3101  if (!NT_SUCCESS(Status)) {
3102  ERR("alloc_chunk returned %08lx\n", Status);
3103  goto end;
3104  }
3105 
3107 
3108  if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= extlen) {
3109  if (insert_extent_chunk(fcb->Vcb, fcb, c, start, extlen, !page_file, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, extlen, false, 0))
3110  goto cont;
3111  }
3112 
3114 
3115  Status = insert_chunk_fragmented(fcb, start, length, NULL, true, rollback);
3116  if (!NT_SUCCESS(Status))
3117  ERR("insert_chunk_fragmented returned %08lx\n", Status);
3118 
3119  goto end;
3120 
3121 cont:
3122  length -= extlen;
3123  start += extlen;
3124  } while (length > 0);
3125 
3127 
3128 end:
3129  return Status;
3130 }
3131 
3132 __attribute__((nonnull(1,2,5,9)))
3133 static NTSTATUS insert_extent(device_extension* Vcb, fcb* fcb, uint64_t start_data, uint64_t length, void* data,
3134  PIRP Irp, bool file_write, uint64_t irp_offset, LIST_ENTRY* rollback) {
3135  NTSTATUS Status;
3136  LIST_ENTRY* le;
3137  chunk* c;
3138  uint64_t flags, orig_length = length, written = 0;
3139 
3140  TRACE("(%p, (%I64x, %I64x), %I64x, %I64x, %p)\n", Vcb, fcb->subvol->id, fcb->inode, start_data, length, data);
3141 
3142  if (start_data > 0) {
3143  try_extend_data(Vcb, fcb, start_data, length, data, Irp, &written, file_write, irp_offset, rollback);
3144 
3145  if (written == length)
3146  return STATUS_SUCCESS;
3147  else if (written > 0) {
3148  start_data += written;
3149  irp_offset += written;
3150  length -= written;
3151  data = &((uint8_t*)data)[written];
3152  }
3153  }
3154 
3155  flags = Vcb->data_flags;
3156 
3157  while (written < orig_length) {
3158  uint64_t newlen = min(length, MAX_EXTENT_SIZE);
3159  bool done = false;
3160 
3161  // Rather than necessarily writing the whole extent at once, we deal with it in blocks of 128 MB.
3162  // First, see if we can write the extent part to an existing chunk.
3163 
3164  ExAcquireResourceSharedLite(&Vcb->chunk_lock, true);
3165 
3166  le = Vcb->chunks.Flink;
3167  while (le != &Vcb->chunks) {
3169 
3170  if (!c->readonly && !c->reloc) {
3172 
3173  if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= newlen &&
3174  insert_extent_chunk(Vcb, fcb, c, start_data, newlen, false, data, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen, file_write, irp_offset)) {
3175  written += newlen;
3176 
3177  if (written == orig_length) {
3178  ExReleaseResourceLite(&Vcb->chunk_lock);
3179  return STATUS_SUCCESS;
3180  } else {
3181  done = true;
3182  start_data += newlen;
3183  irp_offset += newlen;
3184  length -= newlen;
3185  data = &((uint8_t*)data)[newlen];
3186  break;
3187  }
3188  } else
3190  }
3191 
3192  le = le->Flink;
3193  }
3194 
3195  ExReleaseResourceLite(&Vcb->chunk_lock);
3196 
3197  if (done) continue;
3198 
3199  // Otherwise, see if we can put it in a new chunk.
3200 
3201  ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, true);
3202 
3203  Status = alloc_chunk(Vcb, flags, &c, false);
3204 
3205  ExReleaseResourceLite(&Vcb->chunk_lock);
3206 
3207  if (!NT_SUCCESS(Status)) {
3208  ERR("alloc_chunk returned %08lx\n", Status);
3209  return Status;
3210  }
3211 
3212  if (c) {
3214 
3215  if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= newlen &&
3216  insert_extent_chunk(Vcb, fcb, c, start_data, newlen, false, data, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen, file_write, irp_offset)) {
3217  written += newlen;
3218 
3219  if (written == orig_length)
3220  return STATUS_SUCCESS;
3221  else {
3222  done = true;
3223  start_data += newlen;
3224  irp_offset += newlen;
3225  length -= newlen;
3226  data = &((uint8_t*)data)[newlen];
3227  }
3228  } else
3230  }
3231 
3232  if (!done) {
3233  Status = insert_chunk_fragmented(fcb, start_data, length, data, false, rollback);
3234  if (!NT_SUCCESS(Status))
3235  ERR("insert_chunk_fragmented returned %08lx\n", Status);
3236 
3237  return Status;
3238  }
3239  }
3240 
3241  return STATUS_DISK_FULL;
3242 }
3243 
3244 __attribute__((nonnull(1,4)))
3246  NTSTATUS Status;
3247 
3248  // FIXME - convert into inline extent if short enough
3249 
3250  if (end > 0 && fcb_is_inline(fcb)) {
3251  uint8_t* buf;
3252  bool make_inline = end <= fcb->Vcb->options.max_inline;
3253 
3254  buf = ExAllocatePoolWithTag(PagedPool, (ULONG)(make_inline ? (offsetof(EXTENT_DATA, data[0]) + end) : sector_align(end, fcb->Vcb->superblock.sector_size)), ALLOC_TAG);
3255  if (!buf) {
3256  ERR("out of memory\n");
3258  }
3259 
3260  Status = read_file(fcb, make_inline ? (buf + offsetof(EXTENT_DATA, data[0])) : buf, 0, end, NULL, Irp);
3261  if (!NT_SUCCESS(Status)) {
3262  ERR("read_file returned %08lx\n", Status);
3263  ExFreePool(buf);
3264  return Status;
3265  }
3266 
3268  if (!NT_SUCCESS(Status)) {
3269  ERR("excise_extents returned %08lx\n", Status);
3270  ExFreePool(buf);
3271  return Status;
3272  }
3273 
3274  if (!make_inline) {
3275  RtlZeroMemory(buf + end, (ULONG)(sector_align(end, fcb->Vcb->superblock.sector_size) - end));
3276 
3277  Status = do_write_file(fcb, 0, sector_align(end, fcb->Vcb->superblock.sector_size), buf, Irp, false, 0, rollback);
3278  if (!NT_SUCCESS(Status)) {
3279  ERR("do_write_file returned %08lx\n", Status);
3280  ExFreePool(buf);
3281  return Status;
3282  }
3283  } else {
3284  EXTENT_DATA* ed = (EXTENT_DATA*)buf;
3285 
3286  ed->generation = fcb->Vcb->superblock.generation;
3287  ed->decoded_size = end;
3291  ed->type = EXTENT_TYPE_INLINE;
3292 
3293  Status = add_extent_to_fcb(fcb, 0, ed, (uint16_t)(offsetof(EXTENT_DATA, data[0]) + end), false, NULL, rollback);
3294  if (!NT_SUCCESS(Status)) {
3295  ERR("add_extent_to_fcb returned %08lx\n", Status);
3296  ExFreePool(buf);
3297  return Status;
3298  }
3299 
3301 
3303  fcb->inode_item_changed = true;
3304  TRACE("setting st_size to %I64x\n", end);
3305 
3306  fcb->Header.AllocationSize.QuadPart = sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size);
3307  fcb->Header.FileSize.QuadPart = fcb->inode_item.st_size;
3308  fcb->Header.ValidDataLength.QuadPart = fcb->inode_item.st_size;
3309  }
3310 
3311  ExFreePool(buf);
3312  return STATUS_SUCCESS;
3313  }
3314 
3315  Status = excise_extents(fcb->Vcb, fcb, sector_align(end, fcb->Vcb->superblock.sector_size),
3316  sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size), Irp, rollback);
3317  if (!NT_SUCCESS(Status)) {
3318  ERR("excise_extents returned %08lx\n", Status);
3319  return Status;
3320  }
3321 
3323  fcb->inode_item_changed = true;
3324  TRACE("setting st_size to %I64x\n", end);
3325 
3326  fcb->Header.AllocationSize.QuadPart = sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size);
3327  fcb->Header.FileSize.QuadPart = fcb->inode_item.st_size;
3328  fcb->Header.ValidDataLength.QuadPart = fcb->inode_item.st_size;
3329  // FIXME - inform cache manager of this
3330 
3331  TRACE("fcb %p FileSize = %I64x\n", fcb, fcb->Header.FileSize.QuadPart);
3332 
3333  return STATUS_SUCCESS;
3334 }
3335 
3336 __attribute__((nonnull(1,6)))
3338  uint64_t oldalloc, newalloc;
3339  bool cur_inline;
3340  NTSTATUS Status;
3341 
3342  TRACE("(%p, %p, %I64x, %u)\n", fcb, fileref, end, prealloc);
3343 
3344  if (fcb->ads) {
3345  if (end > 0xffff)
3346  return STATUS_DISK_FULL;
3347 
3348  return stream_set_end_of_file_information(fcb->Vcb, (uint16_t)end, fcb, fileref, false);
3349  } else {
3350  extent* ext = NULL;
3351  LIST_ENTRY* le;
3352 
3353  le = fcb->extents.Blink;
3354  while (le != &fcb->extents) {
3356 
3357  if (!ext2->ignore) {
3358  ext = ext2;
3359  break;
3360  }
3361 
3362  le = le->Blink;
3363  }
3364 
3365  oldalloc = 0;
3366  if (ext) {
3367  EXTENT_DATA* ed = &ext->extent_data;
3368  EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
3369 
3370  oldalloc = ext->offset + (ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes);
3371  cur_inline = ed->type == EXTENT_TYPE_INLINE;
3372 
3373  if (cur_inline && end > fcb->Vcb->options.max_inline) {
3374  uint64_t origlength, length;
3375  uint8_t* data;
3376 
3377  TRACE("giving inline file proper extents\n");
3378 
3379  origlength = ed->decoded_size;
3380 
3381  cur_inline = false;
3382 
3383  length = sector_align(origlength, fcb->Vcb->superblock.sector_size);
3384 
3386  if (!data) {
3387  ERR("could not allocate %I64x bytes for data\n", length);
3389  }
3390 
3391  Status = read_file(fcb, data, 0, origlength, NULL, Irp);
3392  if (!NT_SUCCESS(Status)) {
3393  ERR("read_file returned %08lx\n", Status);
3394  ExFreePool(data);
3395  return Status;
3396  }
3397 
3398  RtlZeroMemory(data + origlength, (ULONG)(length - origlength));
3399 
3401  if (!NT_SUCCESS(Status)) {
3402  ERR("excise_extents returned %08lx\n", Status);
3403  ExFreePool(data);
3404  return Status;
3405  }
3406 
3407  Status = do_write_file(fcb, 0, length, data, Irp, false, 0, rollback);
3408  if (!NT_SUCCESS(Status)) {
3409  ERR("do_write_file returned %08lx\n", Status);
3410  ExFreePool(data);
3411  return Status;
3412  }
3413 
3414  oldalloc = ext->offset + length;
3415 
3416  ExFreePool(data);
3417  }
3418 
3419  if (cur_inline) {
3420  uint16_t edsize;
3421 
3422  if (end > oldalloc) {
3423  edsize = (uint16_t)(offsetof(EXTENT_DATA, data[0]) + end - ext->offset);
3424  ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG);
3425 
3426  if (!ed) {
3427  ERR("out of memory\n");
3429  }
3430 
3431  ed->generation = fcb->Vcb->superblock.generation;
3432  ed->decoded_size = end - ext->offset;
3436  ed->type = EXTENT_TYPE_INLINE;
3437 
3438  Status = read_file(fcb, ed->data, ext->offset, oldalloc, NULL, Irp);
3439  if (!NT_SUCCESS(Status)) {
3440  ERR("read_file returned %08lx\n", Status);
3441  ExFreePool(ed);
3442  return Status;
3443  }
3444 
3445  RtlZeroMemory(ed->data + oldalloc - ext->offset, (ULONG)(end - oldalloc));
3446 
3447  remove_fcb_extent(fcb, ext, rollback);
3448 
3449  Status = add_extent_to_fcb(fcb, ext->offset, ed, edsize, ext->unique, NULL, rollback);
3450  if (!NT_SUCCESS(Status)) {
3451  ERR("add_extent_to_fcb returned %08lx\n", Status);
3452  ExFreePool(ed);
3453  return Status;
3454  }
3455 
3456  ExFreePool(ed);
3457 
3458  fcb->extents_changed = true;
3460  }
3461 
3462  TRACE("extending inline file (oldalloc = %I64x, end = %I64x)\n", oldalloc, end);
3463 
3465  TRACE("setting st_size to %I64x\n", end);
3466 
3468 
3469  fcb->Header.AllocationSize.QuadPart = fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
3470  } else {
3471  newalloc = sector_align(end, fcb->Vcb->superblock.sector_size);
3472 
3473  if (newalloc > oldalloc) {
3474  if (prealloc) {
3475  // FIXME - try and extend previous extent first
3476 
3477  Status = insert_prealloc_extent(fcb, oldalloc, newalloc - oldalloc, rollback);
3478 
3479  if (!NT_SUCCESS(Status) && Status != STATUS_DISK_FULL) {
3480  ERR("insert_prealloc_extent returned %08lx\n", Status);
3481  return Status;
3482  }
3483  }
3484 
3485  fcb->extents_changed = true;
3486  }
3487 
3489  fcb->inode_item_changed = true;
3491 
3492  TRACE("setting st_size to %I64x\n", end);
3493 
3494  TRACE("newalloc = %I64x\n", newalloc);
3495 
3496  fcb->Header.AllocationSize.QuadPart = newalloc;
3497  fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
3498  }
3499  } else {
3500  if (end > fcb->Vcb->options.max_inline) {
3501  newalloc = sector_align(end, fcb->Vcb->superblock.sector_size);
3502 
3503  if (prealloc) {
3504  Status = insert_prealloc_extent(fcb, 0, newalloc, rollback);
3505 
3506  if (!NT_SUCCESS(Status) && Status != STATUS_DISK_FULL) {
3507  ERR("insert_prealloc_extent returned %08lx\n", Status);
3508  return Status;
3509  }
3510  }
3511 
3512  fcb->extents_changed = true;
3513  fcb->inode_item_changed = true;
3515 
3517  TRACE("setting st_size to %I64x\n", end);
3518 
3519  TRACE("newalloc = %I64x\n", newalloc);
3520 
3521  fcb->Header.AllocationSize.QuadPart = newalloc;
3522  fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
3523  } else {
3524  EXTENT_DATA* ed;
3525  uint16_t edsize;
3526 
3527  edsize = (uint16_t)(offsetof(EXTENT_DATA, data[0]) + end);
3528  ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG);
3529 
3530  if (!ed) {
3531  ERR("out of memory\n");
3533  }
3534 
3535  ed->generation = fcb->Vcb->superblock.generation;
3536  ed->decoded_size = end;
3540  ed->type = EXTENT_TYPE_INLINE;
3541 
3542  RtlZeroMemory(ed->data, (ULONG)end);
3543 
3544  Status = add_extent_to_fcb(fcb, 0, ed, edsize, false, NULL, rollback);
3545  if (!NT_SUCCESS(Status)) {
3546  ERR("add_extent_to_fcb returned %08lx\n", Status);
3547  ExFreePool(ed);
3548  return Status;
3549  }
3550 
3551  ExFreePool(ed);
3552 
3553  fcb->extents_changed = true;
3554  fcb->inode_item_changed = true;
3556 
3558  TRACE("setting st_size to %I64x\n", end);
3559 
3561 
3562  fcb->Header.AllocationSize.QuadPart = fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
3563  }
3564  }
3565  }
3566 
3567  return STATUS_SUCCESS;
3568 }
3569 
3570 __attribute__((nonnull(1,2,5,6,11)))
3571 static NTSTATUS do_write_file_prealloc(fcb* fcb, extent* ext, uint64_t start_data, uint64_t end_data, void* data, uint64_t* written,
3572  PIRP Irp, bool file_write, uint64_t irp_offset, ULONG priority, LIST_ENTRY* rollback) {
3573  EXTENT_DATA* ed = &ext->extent_data;
3574  EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
3575  NTSTATUS Status;
3576  chunk* c = NULL;
3577 
3578  if (start_data <= ext->offset && end_data >= ext->offset + ed2->num_bytes) { // replace all
3579  extent* newext;
3580 
3581  newext = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3582  if (!newext) {
3583  ERR("out of memory\n");
3585  }
3586 
3587  RtlCopyMemory(&newext->extent_data, &ext->extent_data, ext->datalen);
3588 
3590 
3591  Status = write_data_complete(fcb->Vcb, ed2->address + ed2->offset, (uint8_t*)data + ext->offset - start_data, (uint32_t)ed2->num_bytes, Irp,
3592  NULL, file_write, irp_offset + ext->offset - start_data, priority);
3593  if (!NT_SUCCESS(Status)) {
3594  ERR("write_data_complete returned %08lx\n", Status);
3595  return Status;
3596  }
3597 
3599  ULONG sl = (ULONG)(ed2->num_bytes >> fcb->Vcb->sector_shift);
3600  void* csum = ExAllocatePoolWithTag(PagedPool, sl * fcb->Vcb->csum_size, ALLOC_TAG);
3601 
3602  if (!csum) {
3603  ERR("out of memory\n");
3604  ExFreePool(newext);
3606  }
3607 
3608  do_calc_job(fcb->Vcb, (uint8_t*)data + ext->offset - start_data, sl, csum);
3609 
3610  newext->csum = csum;
3611  } else
3612  newext->csum = NULL;
3613 
3614  *written = ed2->num_bytes;
3615 
3616  newext->offset = ext->offset;
3617  newext->datalen = ext->datalen;
3618  newext->unique = ext->unique;
3619  newext->ignore = false;
3620  newext->inserted = true;
3621  InsertHeadList(&ext->list_entry, &newext->list_entry);
3622 
3623  add_insert_extent_rollback(rollback, fcb, newext);
3624 
3625  remove_fcb_extent(fcb, ext, rollback);
3626 
3628  } else if (start_data <= ext->offset && end_data < ext->offset + ed2->num_bytes) { // replace beginning
3629  EXTENT_DATA2* ned2;
3630  extent *newext1, *newext2;
3631 
3632  newext1 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3633  if (!newext1) {
3634  ERR("out of memory\n");
3636  }
3637 
3638  newext2 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3639  if (!newext2) {
3640  ERR("out of memory\n");
3641  ExFreePool(newext1);
3643  }
3644 
3645  RtlCopyMemory(&newext1->extent_data, &ext->extent_data, ext->datalen);
3647  ned2 = (EXTENT_DATA2*)newext1->extent_data.data;
3648  ned2->num_bytes = end_data - ext->offset;
3649 
3650  RtlCopyMemory(&newext2->extent_data, &ext->extent_data, ext->datalen);
3651  ned2 = (EXTENT_DATA2*)newext2->extent_data.data;
3652  ned2->offset += end_data - ext->offset;
3653  ned2->num_bytes -= end_data - ext->offset;
3654 
3655  Status = write_data_complete(fcb->Vcb, ed2->address + ed2->offset, (uint8_t*)data + ext->offset - start_data, (uint32_t)(end_data - ext->offset),
3656  Irp, NULL, file_write, irp_offset + ext->offset - start_data, priority);
3657  if (!NT_SUCCESS(Status)) {
3658  ERR("write_data_complete returned %08lx\n", Status);
3659  ExFreePool(newext1);
3660  ExFreePool(newext2);
3661  return Status;
3662  }
3663 
3665  ULONG sl = (ULONG)((end_data - ext->offset) >> fcb->Vcb->sector_shift);
3666  void* csum = ExAllocatePoolWithTag(PagedPool, sl * fcb->Vcb->csum_size, ALLOC_TAG);
3667 
3668  if (!csum) {
3669  ERR("out of memory\n");
3670  ExFreePool(newext1);
3671  ExFreePool(newext2);
3673  }
3674 
3675  do_calc_job(fcb->Vcb, (uint8_t*)data + ext->offset - start_data, sl, csum);
3676 
3677  newext1->csum = csum;
3678  } else
3679  newext1->csum = NULL;
3680 
3681  *written = end_data - ext->offset;
3682 
3683  newext1->offset = ext->offset;
3684  newext1->datalen = ext->datalen;
3685  newext1->unique = ext->unique;
3686  newext1->ignore = false;
3687  newext1->inserted = true;
3688  InsertHeadList(&ext->list_entry, &newext1->list_entry);
3689 
3690  add_insert_extent_rollback(rollback, fcb, newext1);
3691 
3692  newext2->offset = end_data;
3693  newext2->datalen = ext->datalen;
3694  newext2->unique = ext->unique;
3695  newext2->ignore = false;
3696  newext2->inserted = true;
3697  newext2->csum = NULL;
3698  add_extent(fcb, &newext1->list_entry, newext2);
3699 
3700  add_insert_extent_rollback(rollback, fcb, newext2);
3701 
3703 
3704  if (!c)
3705  ERR("get_chunk_from_address(%I64x) failed\n", ed2->address);
3706  else {
3707  Status = update_changed_extent_ref(fcb->Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 1,
3709 
3710  if (!NT_SUCCESS(Status)) {
3711  ERR("update_changed_extent_ref returned %08lx\n", Status);
3712  return Status;
3713  }
3714  }
3715 
3716  remove_fcb_extent(fcb, ext, rollback);
3717  } else if (start_data > ext->offset && end_data >= ext->offset + ed2->num_bytes) { // replace end
3718  EXTENT_DATA2* ned2;
3719  extent *newext1, *newext2;
3720 
3721  newext1 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3722  if (!newext1) {
3723  ERR("out of memory\n");
3725  }
3726 
3727  newext2 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3728  if (!newext2) {
3729  ERR("out of memory\n");
3730  ExFreePool(newext1);
3732  }
3733 
3734  RtlCopyMemory(&newext1->extent_data, &ext->extent_data, ext->datalen);
3735 
3736  ned2 = (EXTENT_DATA2*)newext1->extent_data.data;
3737  ned2->num_bytes = start_data - ext->offset;
3738 
3739  RtlCopyMemory(&newext2->extent_data, &ext->extent_data, ext->datalen);
3740 
3742  ned2 = (EXTENT_DATA2*)newext2->extent_data.data;
3743  ned2->offset += start_data - ext->offset;
3744  ned2->num_bytes = ext->offset + ed2->num_bytes - start_data;
3745 
3746  Status = write_data_complete(fcb->Vcb, ed2->address + ned2->offset, data, (uint32_t)ned2->num_bytes, Irp, NULL, file_write, irp_offset, priority);
3747  if (!NT_SUCCESS(Status)) {
3748  ERR("write_data_complete returned %08lx\n", Status);
3749  ExFreePool(newext1);
3750  ExFreePool(newext2);
3751  return Status;
3752  }
3753 
3755  ULONG sl = (ULONG)(ned2->num_bytes >> fcb->Vcb->sector_shift);
3756  void* csum = ExAllocatePoolWithTag(PagedPool, sl * fcb->Vcb->csum_size, ALLOC_TAG);
3757 
3758  if (!csum) {
3759  ERR("out of memory\n");
3760  ExFreePool(newext1);
3761  ExFreePool(newext2);
3763  }
3764 
3765  do_calc_job(fcb->Vcb, data, sl, csum);
3766 
3767  newext2->csum = csum;
3768  } else
3769  newext2->csum = NULL;
3770 
3771  *written = ned2->num_bytes;
3772 
3773  newext1->offset = ext->offset;
3774  newext1->datalen = ext->datalen;
3775  newext1->unique = ext->unique;
3776  newext1->ignore = false;
3777  newext1->inserted = true;
3778  newext1->csum = NULL;
3779  InsertHeadList(&ext->list_entry, &newext1->list_entry);
3780 
3781  add_insert_extent_rollback(rollback, fcb, newext1);
3782 
3783  newext2->offset = start_data;
3784  newext2->datalen = ext->datalen;
3785  newext2->unique = ext->unique;
3786  newext2->ignore = false;
3787  newext2->inserted = true;
3788  add_extent(fcb, &newext1->list_entry, newext2);
3789 
3790  add_insert_extent_rollback(rollback, fcb, newext2);
3791 
3793 
3794  if (!c)
3795  ERR("get_chunk_from_address(%I64x) failed\n", ed2->address);
3796  else {
3797  Status = update_changed_extent_ref(fcb->Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 1,
3799 
3800  if (!NT_SUCCESS(Status)) {
3801  ERR("update_changed_extent_ref returned %08lx\n", Status);
3802  return Status;
3803  }
3804  }
3805 
3806  remove_fcb_extent(fcb, ext, rollback);
3807  } else if (start_data > ext->offset && end_data < ext->offset + ed2->num_bytes) { // replace middle
3808  EXTENT_DATA2* ned2;
3809  extent *newext1, *newext2, *newext3;
3810 
3811  newext1 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3812  if (!newext1) {
3813  ERR("out of memory\n");
3815  }
3816 
3817  newext2 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3818  if (!newext2) {
3819  ERR("out of memory\n");
3820  ExFreePool(newext1);
3822  }
3823 
3824  newext3 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
3825  if (!newext3) {
3826  ERR("out of memory\n");
3827  ExFreePool(newext1);
3828  ExFreePool(newext2);
3830  }
3831 
3832  RtlCopyMemory(&newext1->extent_data, &ext->extent_data, ext->datalen);
3833  RtlCopyMemory(&newext2->extent_data, &ext->extent_data, ext->datalen);
3834  RtlCopyMemory(&newext3->extent_data, &ext->extent_data, ext->datalen);
3835 
3836  ned2 = (EXTENT_DATA2*)newext1->extent_data.data;
3837  ned2->num_bytes = start_data - ext->offset;
3838 
3840  ned2 = (EXTENT_DATA2*)newext2->extent_data.data;
3841  ned2->offset += start_data - ext->offset;
3842  ned2->num_bytes = end_data - start_data;
3843 
3844  ned2 = (EXTENT_DATA2*)newext3->extent_data.data;
3845  ned2->offset += end_data - ext->offset;
3846  ned2->num_bytes -= end_data - ext->offset;
3847 
3848  ned2 = (EXTENT_DATA2*)newext2->extent_data.data;
3849  Status = write_data_complete(fcb->Vcb, ed2->address + ned2->offset, data, (uint32_t)(end_data - start_data), Irp, NULL, file_write, irp_offset, priority);
3850  if (!NT_SUCCESS(Status)) {
3851  ERR("write_data_complete returned %08lx\n", Status);
3852  ExFreePool(newext1);
3853  ExFreePool(newext2);
3854  ExFreePool(newext3);
3855  return Status;
3856  }
3857 
3858  i