ReactOS 0.4.15-dev-5666-gc548b97
flushthread.c
Go to the documentation of this file.
1/* Copyright (c) Mark Harmstone 2016-17
2 *
3 * This file is part of WinBtrfs.
4 *
5 * WinBtrfs is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public Licence as published by
7 * the Free Software Foundation, either version 3 of the Licence, or
8 * (at your option) any later version.
9 *
10 * WinBtrfs is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public Licence for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public Licence
16 * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
17
18#include "btrfs_drv.h"
19#include "xxhash.h"
20#include "crc32c.h"
21#include <ata.h>
22#include <ntddscsi.h>
23#include <ntddstor.h>
24
25/* cf. __MAX_CSUM_ITEMS in Linux - it needs sizeof(leaf_node) bytes free
26 * so it can do a split. Linux tries to get it so a run will fit in a
27 * sector, but the MAX_CSUM_ITEMS logic is wrong... */
28#define MAX_CSUM_SIZE (4096 - sizeof(tree_header) - (2 * sizeof(leaf_node)))
29
30// #define DEBUG_WRITE_LOOPS
31
32#define BATCH_ITEM_LIMIT 1000
33
34typedef struct {
38
39typedef struct {
44
45typedef struct {
50
53
55 uint8_t objtype, uint64_t offset, _In_opt_ _When_(return >= 0, __drv_aliasesMem) void* data,
57
58_Function_class_(IO_COMPLETION_ROUTINE)
59static NTSTATUS __stdcall write_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
60 write_context* context = conptr;
61
63
64 context->iosb = Irp->IoStatus;
65 KeSetEvent(&context->Event, 0, false);
66
68}
69
74 PIRP Irp;
77
78 TRACE("(%p, %I64x, %p, %x)\n", device, address, data, length);
79
81
83
84 offset.QuadPart = address;
85
86 Irp = IoAllocateIrp(device->StackSize, false);
87
88 if (!Irp) {
89 ERR("IoAllocateIrp failed\n");
91 }
92
95 IrpSp->FileObject = fileobj;
96
97 if (device->Flags & DO_BUFFERED_IO) {
98 Irp->AssociatedIrp.SystemBuffer = data;
99
100 Irp->Flags = IRP_BUFFERED_IO;
101 } else if (device->Flags & DO_DIRECT_IO) {
102 Irp->MdlAddress = IoAllocateMdl(data, length, false, false, NULL);
103 if (!Irp->MdlAddress) {
104 DbgPrint("IoAllocateMdl failed\n");
106 goto exit;
107 }
108
110
111 _SEH2_TRY {
115 } _SEH2_END;
116
117 if (!NT_SUCCESS(Status)) {
118 ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
119 IoFreeMdl(Irp->MdlAddress);
120 goto exit;
121 }
122 } else {
123 Irp->UserBuffer = data;
124 }
125
126 IrpSp->Parameters.Write.Length = length;
127 IrpSp->Parameters.Write.ByteOffset = offset;
128
129 Irp->UserIosb = &context.iosb;
130
131 Irp->UserEvent = &context.Event;
132
133 IoSetCompletionRoutine(Irp, write_completion, &context, true, true, true);
134
136
137 if (Status == STATUS_PENDING) {
139 Status = context.iosb.Status;
140 }
141
142 if (!NT_SUCCESS(Status)) {
143 ERR("IoCallDriver returned %08lx\n", Status);
144 }
145
146 if (device->Flags & DO_DIRECT_IO) {
147 MmUnlockPages(Irp->MdlAddress);
148 IoFreeMdl(Irp->MdlAddress);
149 }
150
151exit:
152 IoFreeIrp(Irp);
153
154 return Status;
155}
156
159 if (!s) {
160 ERR("out of memory\n");
161 return;
162 }
163
164 s->address = address;
165 s->size = size;
166 dev->num_trim_entries++;
167
168 InsertTailList(&dev->trim_list, &s->list_entry);
169}
170
172 LIST_ENTRY* le;
173 ULONG type;
174
175 if (c->chunk_item->type & BLOCK_FLAG_DUPLICATE)
177 else if (c->chunk_item->type & BLOCK_FLAG_RAID0)
179 else if (c->chunk_item->type & BLOCK_FLAG_RAID1)
181 else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
183 else if (c->chunk_item->type & BLOCK_FLAG_RAID5)
185 else if (c->chunk_item->type & BLOCK_FLAG_RAID6)
187 else if (c->chunk_item->type & BLOCK_FLAG_RAID1C3)
189 else if (c->chunk_item->type & BLOCK_FLAG_RAID1C4)
191 else // SINGLE
193
194 le = c->deleting.Flink;
195 while (le != &c->deleting) {
197
198 if (!Vcb->options.no_barrier || !(c->chunk_item->type & BLOCK_FLAG_METADATA)) {
199 CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
200
202 uint16_t i;
203
204 for (i = 0; i < c->chunk_item->num_stripes; i++) {
205 if (c->devices[i] && c->devices[i]->devobj && !c->devices[i]->readonly && c->devices[i]->trim)
206 add_trim_entry(c->devices[i], s->address - c->offset + cis[i].offset, s->size);
207 }
208 } else if (type == BLOCK_FLAG_RAID0) {
209 uint64_t startoff, endoff;
210 uint16_t startoffstripe, endoffstripe, i;
211
212 get_raid0_offset(s->address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &startoff, &startoffstripe);
213 get_raid0_offset(s->address - c->offset + s->size - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &endoff, &endoffstripe);
214
215 for (i = 0; i < c->chunk_item->num_stripes; i++) {
216 if (c->devices[i] && c->devices[i]->devobj && !c->devices[i]->readonly && c->devices[i]->trim) {
217 uint64_t stripestart, stripeend;
218
219 if (startoffstripe > i)
220 stripestart = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
221 else if (startoffstripe == i)
222 stripestart = startoff;
223 else
224 stripestart = startoff - (startoff % c->chunk_item->stripe_length);
225
226 if (endoffstripe > i)
227 stripeend = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
228 else if (endoffstripe == i)
229 stripeend = endoff + 1;
230 else
231 stripeend = endoff - (endoff % c->chunk_item->stripe_length);
232
233 if (stripestart != stripeend)
234 add_trim_entry(c->devices[i], stripestart + cis[i].offset, stripeend - stripestart);
235 }
236 }
237 } else if (type == BLOCK_FLAG_RAID10) {
238 uint64_t startoff, endoff;
239 uint16_t sub_stripes, startoffstripe, endoffstripe, i;
240
241 sub_stripes = max(1, c->chunk_item->sub_stripes);
242
243 get_raid0_offset(s->address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes / sub_stripes, &startoff, &startoffstripe);
244 get_raid0_offset(s->address - c->offset + s->size - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes / sub_stripes, &endoff, &endoffstripe);
245
246 startoffstripe *= sub_stripes;
247 endoffstripe *= sub_stripes;
248
249 for (i = 0; i < c->chunk_item->num_stripes; i += sub_stripes) {
250 ULONG j;
251 uint64_t stripestart, stripeend;
252
253 if (startoffstripe > i)
254 stripestart = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
255 else if (startoffstripe == i)
256 stripestart = startoff;
257 else
258 stripestart = startoff - (startoff % c->chunk_item->stripe_length);
259
260 if (endoffstripe > i)
261 stripeend = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
262 else if (endoffstripe == i)
263 stripeend = endoff + 1;
264 else
265 stripeend = endoff - (endoff % c->chunk_item->stripe_length);
266
267 if (stripestart != stripeend) {
268 for (j = 0; j < sub_stripes; j++) {
269 if (c->devices[i+j] && c->devices[i+j]->devobj && !c->devices[i+j]->readonly && c->devices[i+j]->trim)
270 add_trim_entry(c->devices[i+j], stripestart + cis[i+j].offset, stripeend - stripestart);
271 }
272 }
273 }
274 }
275 // FIXME - RAID5(?), RAID6(?)
276 }
277
278 le = le->Flink;
279 }
280}
281
282typedef struct {
287#ifdef DEBUG_TRIM_EMULATION
288 PMDL mdl;
289 void* buf;
290#endif
292
293typedef struct {
298
299_Function_class_(IO_COMPLETION_ROUTINE)
300static NTSTATUS __stdcall ioctl_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
302 LONG left2 = InterlockedDecrement(&context->left);
303
305 UNUSED(Irp);
306
307 if (left2 == 0)
308 KeSetEvent(&context->Event, 0, false);
309
311}
312
313#ifdef DEBUG_TRIM_EMULATION
314static void trim_emulation(device* dev) {
315 LIST_ENTRY* le;
317 unsigned int i = 0, count = 0;
318
319 le = dev->trim_list.Flink;
320 while (le != &dev->trim_list) {
321 count++;
322 le = le->Flink;
323 }
324
325 context.left = count;
326
328
330 if (!context.stripes) {
331 ERR("out of memory\n");
332 return;
333 }
334
335 RtlZeroMemory(context.stripes, sizeof(ioctl_context_stripe) * context.left);
336
337 i = 0;
338 le = dev->trim_list.Flink;
339 while (le != &dev->trim_list) {
342
343 WARN("(%I64x, %I64x)\n", s->address, s->size);
344
345 stripe->Irp = IoAllocateIrp(dev->devobj->StackSize, false);
346
347 if (!stripe->Irp) {
348 ERR("IoAllocateIrp failed\n");
349 } else {
352 IrpSp->FileObject = dev->fileobj;
353
355
356 if (!stripe->buf) {
357 ERR("out of memory\n");
358 } else {
359 RtlZeroMemory(stripe->buf, (uint32_t)s->size); // FIXME - randomize instead?
360
361 stripe->mdl = IoAllocateMdl(stripe->buf, (uint32_t)s->size, false, false, NULL);
362
363 if (!stripe->mdl) {
364 ERR("IoAllocateMdl failed\n");
365 } else {
367
368 stripe->Irp->MdlAddress = stripe->mdl;
369
370 IrpSp->Parameters.Write.ByteOffset.QuadPart = s->address;
371 IrpSp->Parameters.Write.Length = s->size;
372
373 stripe->Irp->UserIosb = &stripe->iosb;
374
375 IoSetCompletionRoutine(stripe->Irp, ioctl_completion, &context, true, true, true);
376
377 IoCallDriver(dev->devobj, stripe->Irp);
378 }
379 }
380 }
381
382 i++;
383
384 le = le->Flink;
385 }
386
388
389 for (i = 0; i < count; i++) {
391
392 if (stripe->mdl)
393 IoFreeMdl(stripe->mdl);
394
395 if (stripe->buf)
396 ExFreePool(stripe->buf);
397 }
398
399 ExFreePool(context.stripes);
400}
401#endif
402
404 LIST_ENTRY* le;
405 chunk* c;
406#ifndef DEBUG_TRIM_EMULATION
407 ULONG num;
408#endif
409
410 TRACE("(%p)\n", Vcb);
411
412 ExAcquireResourceSharedLite(&Vcb->chunk_lock, true);
413
414 le = Vcb->chunks.Flink;
415 while (le != &Vcb->chunks) {
417
418 if (c->space_changed) {
420
421 if (c->space_changed) {
422 if (Vcb->trim && !Vcb->options.no_trim)
424
425 space_list_merge(&c->space, &c->space_size, &c->deleting);
426
427 while (!IsListEmpty(&c->deleting)) {
429
430 ExFreePool(s);
431 }
432 }
433
434 c->space_changed = false;
435
437 }
438
439 le = le->Flink;
440 }
441
442 ExReleaseResourceLite(&Vcb->chunk_lock);
443
444 if (Vcb->trim && !Vcb->options.no_trim) {
445#ifndef DEBUG_TRIM_EMULATION
447 ULONG total_num;
448
449 context.left = 0;
450
451 le = Vcb->devices.Flink;
452 while (le != &Vcb->devices) {
454
455 if (dev->devobj && !dev->readonly && dev->trim && dev->num_trim_entries > 0)
456 context.left++;
457
458 le = le->Flink;
459 }
460
461 if (context.left == 0)
462 return;
463
464 total_num = context.left;
465 num = 0;
466
468
470 if (!context.stripes) {
471 ERR("out of memory\n");
472 return;
473 }
474
475 RtlZeroMemory(context.stripes, sizeof(ioctl_context_stripe) * context.left);
476#endif
477
478 le = Vcb->devices.Flink;
479 while (le != &Vcb->devices) {
481
482 if (dev->devobj && !dev->readonly && dev->trim && dev->num_trim_entries > 0) {
483#ifdef DEBUG_TRIM_EMULATION
484 trim_emulation(dev);
485#else
486 LIST_ENTRY* le2;
488 DEVICE_DATA_SET_RANGE* ranges;
489 ULONG datalen = (ULONG)sector_align(sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES), sizeof(uint64_t)) + (dev->num_trim_entries * sizeof(DEVICE_DATA_SET_RANGE)), i;
491
493 if (!stripe->dmdsa) {
494 ERR("out of memory\n");
495 goto nextdev;
496 }
497
498 stripe->dmdsa->Size = sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES);
499 stripe->dmdsa->Action = DeviceDsmAction_Trim;
501 stripe->dmdsa->ParameterBlockOffset = 0;
502 stripe->dmdsa->ParameterBlockLength = 0;
503 stripe->dmdsa->DataSetRangesOffset = (ULONG)sector_align(sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES), sizeof(uint64_t));
504 stripe->dmdsa->DataSetRangesLength = dev->num_trim_entries * sizeof(DEVICE_DATA_SET_RANGE);
505
506 ranges = (DEVICE_DATA_SET_RANGE*)((uint8_t*)stripe->dmdsa + stripe->dmdsa->DataSetRangesOffset);
507
508 i = 0;
509
510 le2 = dev->trim_list.Flink;
511 while (le2 != &dev->trim_list) {
513
514 ranges[i].StartingOffset = s->address;
515 ranges[i].LengthInBytes = s->size;
516 i++;
517
518 le2 = le2->Flink;
519 }
520
521 stripe->Irp = IoAllocateIrp(dev->devobj->StackSize, false);
522
523 if (!stripe->Irp) {
524 ERR("IoAllocateIrp failed\n");
525 goto nextdev;
526 }
527
530 IrpSp->FileObject = dev->fileobj;
531
532 IrpSp->Parameters.DeviceIoControl.IoControlCode = IOCTL_STORAGE_MANAGE_DATA_SET_ATTRIBUTES;
533 IrpSp->Parameters.DeviceIoControl.InputBufferLength = datalen;
534 IrpSp->Parameters.DeviceIoControl.OutputBufferLength = 0;
535
536 stripe->Irp->AssociatedIrp.SystemBuffer = stripe->dmdsa;
537 stripe->Irp->Flags |= IRP_BUFFERED_IO;
538 stripe->Irp->UserBuffer = NULL;
539 stripe->Irp->UserIosb = &stripe->iosb;
540
541 IoSetCompletionRoutine(stripe->Irp, ioctl_completion, &context, true, true, true);
542
543 IoCallDriver(dev->devobj, stripe->Irp);
544
545nextdev:
546#endif
547 while (!IsListEmpty(&dev->trim_list)) {
549 ExFreePool(s);
550 }
551
552 dev->num_trim_entries = 0;
553
554#ifndef DEBUG_TRIM_EMULATION
555 num++;
556#endif
557 }
558
559 le = le->Flink;
560 }
561
562#ifndef DEBUG_TRIM_EMULATION
564
565 for (num = 0; num < total_num; num++) {
566 if (context.stripes[num].dmdsa)
567 ExFreePool(context.stripes[num].dmdsa);
568
569 if (context.stripes[num].Irp)
570 IoFreeIrp(context.stripes[num].Irp);
571 }
572
573 ExFreePool(context.stripes);
574#endif
575 }
576}
577
579 ULONG maxsize = Vcb->superblock.node_size - sizeof(tree_header);
580 LIST_ENTRY* le;
581
582 le = Vcb->trees.Flink;
583 while (le != &Vcb->trees) {
585
586 if (t->write) {
587 if (t->header.num_items == 0 && t->parent) {
588#ifdef DEBUG_WRITE_LOOPS
589 ERR("empty tree found, looping again\n");
590#endif
591 return false;
592 }
593
594 if (t->size > maxsize) {
595#ifdef DEBUG_WRITE_LOOPS
596 ERR("overlarge tree found (%u > %u), looping again\n", t->size, maxsize);
597#endif
598 return false;
599 }
600
601 if (!t->has_new_address) {
602#ifdef DEBUG_WRITE_LOOPS
603 ERR("tree found without new address, looping again\n");
604#endif
605 return false;
606 }
607 }
608
609 le = le->Flink;
610 }
611
612 return true;
613}
614
616 ULONG level;
617 LIST_ENTRY* le;
618
619 for (level = 0; level <= 255; level++) {
620 bool nothing_found = true;
621
622 TRACE("level = %lu\n", level);
623
624 le = Vcb->trees.Flink;
625 while (le != &Vcb->trees) {
627
628 if (t->write && t->header.level == level) {
629 TRACE("tree %p: root = %I64x, level = %x, parent = %p\n", t, t->header.tree_id, t->header.level, t->parent);
630
631 nothing_found = false;
632
633 if (t->parent) {
634 if (!t->parent->write)
635 TRACE("adding tree %p (level %x)\n", t->parent, t->header.level);
636
637 t->parent->write = true;
638 } else if (t->root != Vcb->root_root && t->root != Vcb->chunk_root) {
639 KEY searchkey;
642
643 searchkey.obj_id = t->root->id;
644 searchkey.obj_type = TYPE_ROOT_ITEM;
645 searchkey.offset = 0xffffffffffffffff;
646
647 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
648 if (!NT_SUCCESS(Status)) {
649 ERR("error - find_item returned %08lx\n", Status);
650 return Status;
651 }
652
653 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
654 ERR("could not find ROOT_ITEM for tree %I64x\n", searchkey.obj_id);
656 }
657
658 if (tp.item->size < sizeof(ROOT_ITEM)) { // if not full length, delete and create new entry
660
661 if (!ri) {
662 ERR("out of memory\n");
664 }
665
666 RtlCopyMemory(ri, &t->root->root_item, sizeof(ROOT_ITEM));
667
669 if (!NT_SUCCESS(Status)) {
670 ERR("delete_tree_item returned %08lx\n", Status);
671 ExFreePool(ri);
672 return Status;
673 }
674
676 if (!NT_SUCCESS(Status)) {
677 ERR("insert_tree_item returned %08lx\n", Status);
678 ExFreePool(ri);
679 return Status;
680 }
681 }
682
683 tree* t2 = tp.tree;
684 while (t2) {
685 t2->write = true;
686
687 t2 = t2->parent;
688 }
689 }
690 }
691
692 le = le->Flink;
693 }
694
695 if (nothing_found)
696 break;
697 }
698
699 return STATUS_SUCCESS;
700}
701
703 while (t->parent) {
704 t = t->parent;
705 t->write = true;
706 }
707}
708
712 traverse_ptr insert_tp;
713
715 if (!eism) {
716 ERR("out of memory\n");
717 return false;
718 }
719
720 eism->ei.refcount = 1;
721 eism->ei.generation = Vcb->superblock.generation;
724 eism->tbr.offset = root_id;
725
726 Status = insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, level, eism, sizeof(EXTENT_ITEM_SKINNY_METADATA), &insert_tp, Irp);
727 if (!NT_SUCCESS(Status)) {
728 ERR("insert_tree_item returned %08lx\n", Status);
729 ExFreePool(eism);
730 return false;
731 }
732
734
735 space_list_subtract(c, address, Vcb->superblock.node_size, rollback);
736
738
739 add_parents_to_cache(insert_tp.tree);
740
741 return true;
742}
743
745 LIST_ENTRY* le;
746 space* s;
747
748 TRACE("(%p, %I64x, %p)\n", Vcb, c->offset, address);
749
750 if (Vcb->superblock.node_size > c->chunk_item->size - c->used)
751 return false;
752
753 if (!c->cache_loaded) {
755
756 if (!NT_SUCCESS(Status)) {
757 ERR("load_cache_chunk returned %08lx\n", Status);
758 return false;
759 }
760 }
761
762 if (IsListEmpty(&c->space_size))
763 return false;
764
765 if (!c->last_alloc_set) {
766 s = CONTAINING_RECORD(c->space.Blink, space, list_entry);
767
768 c->last_alloc = s->address;
769 c->last_alloc_set = true;
770
771 if (s->size >= Vcb->superblock.node_size) {
772 *address = s->address;
773 c->last_alloc += Vcb->superblock.node_size;
774 return true;
775 }
776 }
777
778 le = c->space.Flink;
779 while (le != &c->space) {
781
782 if (s->address <= c->last_alloc && s->address + s->size >= c->last_alloc + Vcb->superblock.node_size) {
783 *address = c->last_alloc;
784 c->last_alloc += Vcb->superblock.node_size;
785 return true;
786 }
787
788 le = le->Flink;
789 }
790
791 le = c->space_size.Flink;
792 while (le != &c->space_size) {
793 s = CONTAINING_RECORD(le, space, list_entry_size);
794
795 if (s->size == Vcb->superblock.node_size) {
796 *address = s->address;
797 c->last_alloc = s->address + Vcb->superblock.node_size;
798 return true;
799 } else if (s->size < Vcb->superblock.node_size) {
800 if (le == c->space_size.Flink)
801 return false;
802
803 s = CONTAINING_RECORD(le->Blink, space, list_entry_size);
804
805 *address = s->address;
806 c->last_alloc = s->address + Vcb->superblock.node_size;
807
808 return true;
809 }
810
811 le = le->Flink;
812 }
813
814 s = CONTAINING_RECORD(c->space_size.Blink, space, list_entry_size);
815
816 if (s->size > Vcb->superblock.node_size) {
817 *address = s->address;
818 c->last_alloc = s->address + Vcb->superblock.node_size;
819 return true;
820 }
821
822 return false;
823}
824
828 EXTENT_ITEM_TREE2* eit2;
829 traverse_ptr insert_tp;
830
831 TRACE("(%p, %x, %I64x, %p, %p, %p, %p)\n", Vcb, level, root_id, c, new_address, Irp, rollback);
832
834 return false;
835
836 if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
838
839 if (b)
840 *new_address = address;
841
842 return b;
843 }
844
846 if (!eit2) {
847 ERR("out of memory\n");
848 return false;
849 }
850
851 eit2->eit.extent_item.refcount = 1;
852 eit2->eit.extent_item.generation = Vcb->superblock.generation;
854 eit2->eit.level = level;
856 eit2->tbr.offset = root_id;
857
858 Status = insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, Vcb->superblock.node_size, eit2, sizeof(EXTENT_ITEM_TREE2), &insert_tp, Irp);
859 if (!NT_SUCCESS(Status)) {
860 ERR("insert_tree_item returned %08lx\n", Status);
861 ExFreePool(eit2);
862 return false;
863 }
864
866
867 space_list_subtract(c, address, Vcb->superblock.node_size, rollback);
868
870
871 add_parents_to_cache(insert_tp.tree);
872
873 *new_address = address;
874
875 return true;
876}
877
880 chunk *origchunk = NULL, *c;
881 LIST_ENTRY* le;
883
884 if (t->root->id == BTRFS_ROOT_CHUNK)
885 flags = Vcb->system_flags;
886 else
887 flags = Vcb->metadata_flags;
888
889 if (t->has_address) {
890 origchunk = get_chunk_from_address(Vcb, t->header.address);
891
892 if (origchunk && !origchunk->readonly && !origchunk->reloc && origchunk->chunk_item->type == flags &&
893 insert_tree_extent(Vcb, t->header.level, t->root->id, origchunk, &addr, Irp, rollback)) {
894 t->new_address = addr;
895 t->has_new_address = true;
896 return STATUS_SUCCESS;
897 }
898 }
899
900 ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, true);
901
902 le = Vcb->chunks.Flink;
903 while (le != &Vcb->chunks) {
905
906 if (!c->readonly && !c->reloc) {
908
909 if (c != origchunk && c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
910 if (insert_tree_extent(Vcb, t->header.level, t->root->id, c, &addr, Irp, rollback)) {
912 ExReleaseResourceLite(&Vcb->chunk_lock);
913 t->new_address = addr;
914 t->has_new_address = true;
915 return STATUS_SUCCESS;
916 }
917 }
918
920 }
921
922 le = le->Flink;
923 }
924
925 // allocate new chunk if necessary
926
927 Status = alloc_chunk(Vcb, flags, &c, false);
928
929 if (!NT_SUCCESS(Status)) {
930 ERR("alloc_chunk returned %08lx\n", Status);
931 ExReleaseResourceLite(&Vcb->chunk_lock);
932 return Status;
933 }
934
936
937 if ((c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
938 if (insert_tree_extent(Vcb, t->header.level, t->root->id, c, &addr, Irp, rollback)) {
940 ExReleaseResourceLite(&Vcb->chunk_lock);
941 t->new_address = addr;
942 t->has_new_address = true;
943 return STATUS_SUCCESS;
944 }
945 }
946
948
949 ExReleaseResourceLite(&Vcb->chunk_lock);
950
951 ERR("couldn't find any metadata chunks with %x bytes free\n", Vcb->superblock.node_size);
952
953 return STATUS_DISK_FULL;
954}
955
958 uint64_t rc, root;
959
960 TRACE("(%p, %I64x, %p)\n", Vcb, address, t);
961
962 rc = get_extent_refcount(Vcb, address, Vcb->superblock.node_size, Irp);
963 if (rc == 0) {
964 ERR("error - refcount for extent %I64x was 0\n", address);
966 }
967
968 if (!t || t->parent)
969 root = parent_root;
970 else
971 root = t->header.tree_id;
972
973 Status = decrease_extent_refcount_tree(Vcb, address, Vcb->superblock.node_size, root, level, Irp);
974 if (!NT_SUCCESS(Status)) {
975 ERR("decrease_extent_refcount_tree returned %08lx\n", Status);
976 return Status;
977 }
978
979 if (rc == 1) {
981
982 if (c) {
984
985 if (!c->cache_loaded) {
987
988 if (!NT_SUCCESS(Status)) {
989 ERR("load_cache_chunk returned %08lx\n", Status);
991 return Status;
992 }
993 }
994
995 c->used -= Vcb->superblock.node_size;
996
997 space_list_add(c, address, Vcb->superblock.node_size, rollback);
998
1000 } else
1001 ERR("could not find chunk for address %I64x\n", address);
1002 }
1003
1004 return STATUS_SUCCESS;
1005}
1006
1008 LIST_ENTRY *le2, *list;
1009 changed_extent_ref* cer;
1010
1011 list = old ? &ce->old_refs : &ce->refs;
1012
1013 le2 = list->Flink;
1014 while (le2 != list) {
1016
1017 if (cer->type == TYPE_EXTENT_DATA_REF && cer->edr.root == edr->root && cer->edr.objid == edr->objid && cer->edr.offset == edr->offset) {
1018 cer->edr.count += edr->count;
1019 goto end;
1020 }
1021
1022 le2 = le2->Flink;
1023 }
1024
1026 if (!cer) {
1027 ERR("out of memory\n");
1029 }
1030
1032 RtlCopyMemory(&cer->edr, edr, sizeof(EXTENT_DATA_REF));
1034
1035end:
1036 if (old)
1037 ce->old_count += edr->count;
1038 else
1039 ce->count += edr->count;
1040
1041 return STATUS_SUCCESS;
1042}
1043
1045 LIST_ENTRY *le2, *list;
1046 changed_extent_ref* cer;
1047
1048 list = old ? &ce->old_refs : &ce->refs;
1049
1050 le2 = list->Flink;
1051 while (le2 != list) {
1053
1054 if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == sdr->offset) {
1055 cer->sdr.count += sdr->count;
1056 goto end;
1057 }
1058
1059 le2 = le2->Flink;
1060 }
1061
1063 if (!cer) {
1064 ERR("out of memory\n");
1066 }
1067
1069 RtlCopyMemory(&cer->sdr, sdr, sizeof(SHARED_DATA_REF));
1071
1072end:
1073 if (old)
1074 ce->old_count += sdr->count;
1075 else
1076 ce->count += sdr->count;
1077
1078 return STATUS_SUCCESS;
1079}
1080
1082 KEY searchkey;
1085
1086 if (!t->updated_extents && t->has_address) {
1088 if (!NT_SUCCESS(Status)) {
1089 ERR("update_tree_extents returned %08lx\n", Status);
1090 return false;
1091 }
1092 }
1093
1094 searchkey.obj_id = t->header.address;
1095 searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM;
1096 searchkey.offset = 0xffffffffffffffff;
1097
1098 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
1099 if (!NT_SUCCESS(Status)) {
1100 ERR("error - find_item returned %08lx\n", Status);
1101 return false;
1102 }
1103
1104 if (tp.item->key.obj_id == t->header.address && (tp.item->key.obj_type == TYPE_METADATA_ITEM || tp.item->key.obj_type == TYPE_EXTENT_ITEM))
1105 return false;
1106 else
1107 return true;
1108}
1109
1112 uint64_t rc = get_extent_refcount(Vcb, t->header.address, Vcb->superblock.node_size, Irp);
1113 uint64_t flags = get_extent_flags(Vcb, t->header.address, Irp);
1114
1115 if (rc == 0) {
1116 ERR("refcount for extent %I64x was 0\n", t->header.address);
1117 return STATUS_INTERNAL_ERROR;
1118 }
1119
1120 if (flags & EXTENT_ITEM_SHARED_BACKREFS || t->header.flags & HEADER_FLAG_SHARED_BACKREF || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) {
1121 TREE_BLOCK_REF tbr;
1122 bool unique = rc > 1 ? false : (t->parent ? shared_tree_is_unique(Vcb, t->parent, Irp, rollback) : false);
1123
1124 if (t->header.level == 0) {
1125 LIST_ENTRY* le;
1126
1127 le = t->itemlist.Flink;
1128 while (le != &t->itemlist) {
1130
1131 if (!td->inserted && td->key.obj_type == TYPE_EXTENT_DATA && td->size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
1132 EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
1133
1135 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
1136
1137 if (ed2->size > 0) {
1138 EXTENT_DATA_REF edr;
1139 changed_extent* ce = NULL;
1141
1142 if (c) {
1143 LIST_ENTRY* le2;
1144
1145 le2 = c->changed_extents.Flink;
1146 while (le2 != &c->changed_extents) {
1148
1149 if (ce2->address == ed2->address) {
1150 ce = ce2;
1151 break;
1152 }
1153
1154 le2 = le2->Flink;
1155 }
1156 }
1157
1158 edr.root = t->root->id;
1159 edr.objid = td->key.obj_id;
1160 edr.offset = td->key.offset - ed2->offset;
1161 edr.count = 1;
1162
1163 if (ce) {
1164 Status = add_changed_extent_ref_edr(ce, &edr, true);
1165 if (!NT_SUCCESS(Status)) {
1166 ERR("add_changed_extent_ref_edr returned %08lx\n", Status);
1167 return Status;
1168 }
1169
1170 Status = add_changed_extent_ref_edr(ce, &edr, false);
1171 if (!NT_SUCCESS(Status)) {
1172 ERR("add_changed_extent_ref_edr returned %08lx\n", Status);
1173 return Status;
1174 }
1175 }
1176
1178 if (!NT_SUCCESS(Status)) {
1179 ERR("increase_extent_refcount returned %08lx\n", Status);
1180 return Status;
1181 }
1182
1183 if ((flags & EXTENT_ITEM_SHARED_BACKREFS && unique) || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) {
1184 uint64_t sdrrc = find_extent_shared_data_refcount(Vcb, ed2->address, t->header.address, Irp);
1185
1186 if (sdrrc > 0) {
1187 SHARED_DATA_REF sdr;
1188
1189 sdr.offset = t->header.address;
1190 sdr.count = 1;
1191
1193 t->header.address, ce ? ce->superseded : false, Irp);
1194 if (!NT_SUCCESS(Status)) {
1195 ERR("decrease_extent_refcount returned %08lx\n", Status);
1196 return Status;
1197 }
1198
1199 if (ce) {
1200 LIST_ENTRY* le2;
1201
1202 le2 = ce->refs.Flink;
1203 while (le2 != &ce->refs) {
1205
1206 if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == sdr.offset) {
1207 ce->count--;
1208 cer->sdr.count--;
1209 break;
1210 }
1211
1212 le2 = le2->Flink;
1213 }
1214
1215 le2 = ce->old_refs.Flink;
1216 while (le2 != &ce->old_refs) {
1218
1219 if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == sdr.offset) {
1220 ce->old_count--;
1221
1222 if (cer->sdr.count > 1)
1223 cer->sdr.count--;
1224 else {
1226 ExFreePool(cer);
1227 }
1228
1229 break;
1230 }
1231
1232 le2 = le2->Flink;
1233 }
1234 }
1235 }
1236 }
1237
1238 // FIXME - clear shared flag if unique?
1239 }
1240 }
1241 }
1242
1243 le = le->Flink;
1244 }
1245 } else {
1246 LIST_ENTRY* le;
1247
1248 le = t->itemlist.Flink;
1249 while (le != &t->itemlist) {
1251
1252 if (!td->inserted) {
1253 tbr.offset = t->root->id;
1254
1256 &tbr, &td->key, t->header.level - 1, Irp);
1257 if (!NT_SUCCESS(Status)) {
1258 ERR("increase_extent_refcount returned %08lx\n", Status);
1259 return Status;
1260 }
1261
1262 if (unique || !(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) {
1263 uint64_t sbrrc = find_extent_shared_tree_refcount(Vcb, td->treeholder.address, t->header.address, Irp);
1264
1265 if (sbrrc > 0) {
1266 SHARED_BLOCK_REF sbr;
1267
1268 sbr.offset = t->header.address;
1269
1270 Status = decrease_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0,
1271 t->header.address, false, Irp);
1272 if (!NT_SUCCESS(Status)) {
1273 ERR("decrease_extent_refcount returned %08lx\n", Status);
1274 return Status;
1275 }
1276 }
1277 }
1278
1279 // FIXME - clear shared flag if unique?
1280 }
1281
1282 le = le->Flink;
1283 }
1284 }
1285
1286 if (unique) {
1287 uint64_t sbrrc = find_extent_shared_tree_refcount(Vcb, t->header.address, t->parent->header.address, Irp);
1288
1289 if (sbrrc == 1) {
1290 SHARED_BLOCK_REF sbr;
1291
1292 sbr.offset = t->parent->header.address;
1293
1294 Status = decrease_extent_refcount(Vcb, t->header.address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0,
1295 t->parent->header.address, false, Irp);
1296 if (!NT_SUCCESS(Status)) {
1297 ERR("decrease_extent_refcount returned %08lx\n", Status);
1298 return Status;
1299 }
1300 }
1301 }
1302
1303 if (t->parent)
1304 tbr.offset = t->parent->header.tree_id;
1305 else
1306 tbr.offset = t->header.tree_id;
1307
1308 Status = increase_extent_refcount(Vcb, t->header.address, Vcb->superblock.node_size, TYPE_TREE_BLOCK_REF, &tbr,
1309 t->parent ? &t->paritem->key : NULL, t->header.level, Irp);
1310 if (!NT_SUCCESS(Status)) {
1311 ERR("increase_extent_refcount returned %08lx\n", Status);
1312 return Status;
1313 }
1314
1315 // FIXME - clear shared flag if unique?
1316
1317 t->header.flags &= ~HEADER_FLAG_SHARED_BACKREF;
1318 }
1319
1320 if (rc > 1 || t->header.tree_id == t->root->id) {
1321 Status = reduce_tree_extent(Vcb, t->header.address, t, t->parent ? t->parent->header.tree_id : t->header.tree_id, t->header.level, Irp, rollback);
1322
1323 if (!NT_SUCCESS(Status)) {
1324 ERR("reduce_tree_extent returned %08lx\n", Status);
1325 return Status;
1326 }
1327 }
1328
1329 t->has_address = false;
1330
1331 if ((rc > 1 || t->header.tree_id != t->root->id) && !(flags & EXTENT_ITEM_SHARED_BACKREFS)) {
1332 if (t->header.tree_id == t->root->id) {
1334 update_extent_flags(Vcb, t->header.address, flags, Irp);
1335 }
1336
1337 if (t->header.level > 0) {
1338 LIST_ENTRY* le;
1339
1340 le = t->itemlist.Flink;
1341 while (le != &t->itemlist) {
1343
1344 if (!td->inserted) {
1345 if (t->header.tree_id == t->root->id) {
1346 SHARED_BLOCK_REF sbr;
1347
1348 sbr.offset = t->header.address;
1349
1350 Status = increase_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, &td->key, t->header.level - 1, Irp);
1351 } else {
1352 TREE_BLOCK_REF tbr;
1353
1354 tbr.offset = t->root->id;
1355
1356 Status = increase_extent_refcount(Vcb, td->treeholder.address, Vcb->superblock.node_size, TYPE_TREE_BLOCK_REF, &tbr, &td->key, t->header.level - 1, Irp);
1357 }
1358
1359 if (!NT_SUCCESS(Status)) {
1360 ERR("increase_extent_refcount returned %08lx\n", Status);
1361 return Status;
1362 }
1363 }
1364
1365 le = le->Flink;
1366 }
1367 } else {
1368 LIST_ENTRY* le;
1369
1370 le = t->itemlist.Flink;
1371 while (le != &t->itemlist) {
1373
1374 if (!td->inserted && td->key.obj_type == TYPE_EXTENT_DATA && td->size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
1375 EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
1376
1378 EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
1379
1380 if (ed2->size > 0) {
1381 changed_extent* ce = NULL;
1383
1384 if (c) {
1385 LIST_ENTRY* le2;
1386
1387 le2 = c->changed_extents.Flink;
1388 while (le2 != &c->changed_extents) {
1390
1391 if (ce2->address == ed2->address) {
1392 ce = ce2;
1393 break;
1394 }
1395
1396 le2 = le2->Flink;
1397 }
1398 }
1399
1400 if (t->header.tree_id == t->root->id) {
1401 SHARED_DATA_REF sdr;
1402
1403 sdr.offset = t->header.address;
1404 sdr.count = 1;
1405
1406 if (ce) {
1407 Status = add_changed_extent_ref_sdr(ce, &sdr, true);
1408 if (!NT_SUCCESS(Status)) {
1409 ERR("add_changed_extent_ref_edr returned %08lx\n", Status);
1410 return Status;
1411 }
1412
1413 Status = add_changed_extent_ref_sdr(ce, &sdr, false);
1414 if (!NT_SUCCESS(Status)) {
1415 ERR("add_changed_extent_ref_edr returned %08lx\n", Status);
1416 return Status;
1417 }
1418 }
1419
1421 } else {
1422 EXTENT_DATA_REF edr;
1423
1424 edr.root = t->root->id;
1425 edr.objid = td->key.obj_id;
1426 edr.offset = td->key.offset - ed2->offset;
1427 edr.count = 1;
1428
1429 if (ce) {
1430 Status = add_changed_extent_ref_edr(ce, &edr, true);
1431 if (!NT_SUCCESS(Status)) {
1432 ERR("add_changed_extent_ref_edr returned %08lx\n", Status);
1433 return Status;
1434 }
1435
1436 Status = add_changed_extent_ref_edr(ce, &edr, false);
1437 if (!NT_SUCCESS(Status)) {
1438 ERR("add_changed_extent_ref_edr returned %08lx\n", Status);
1439 return Status;
1440 }
1441 }
1442
1444 }
1445
1446 if (!NT_SUCCESS(Status)) {
1447 ERR("increase_extent_refcount returned %08lx\n", Status);
1448 return Status;
1449 }
1450 }
1451 }
1452 }
1453
1454 le = le->Flink;
1455 }
1456 }
1457 }
1458
1459 t->updated_extents = true;
1460 t->header.tree_id = t->root->id;
1461
1462 return STATUS_SUCCESS;
1463}
1464
1466 LIST_ENTRY* le;
1468 bool changed = false;
1469 uint8_t max_level = 0, level;
1470
1471 TRACE("(%p)\n", Vcb);
1472
1473 le = Vcb->trees.Flink;
1474 while (le != &Vcb->trees) {
1476
1477 if (t->write && !t->has_new_address) {
1478 chunk* c;
1479
1480 if (t->has_address) {
1481 c = get_chunk_from_address(Vcb, t->header.address);
1482
1483 if (c) {
1484 if (!c->cache_loaded) {
1486
1487 if (!c->cache_loaded) {
1489
1490 if (!NT_SUCCESS(Status)) {
1491 ERR("load_cache_chunk returned %08lx\n", Status);
1493 return Status;
1494 }
1495 }
1496
1498 }
1499 }
1500 }
1501
1503 if (!NT_SUCCESS(Status)) {
1504 ERR("get_tree_new_address returned %08lx\n", Status);
1505 return Status;
1506 }
1507
1508 TRACE("allocated extent %I64x\n", t->new_address);
1509
1510 c = get_chunk_from_address(Vcb, t->new_address);
1511
1512 if (c)
1513 c->used += Vcb->superblock.node_size;
1514 else {
1515 ERR("could not find chunk for address %I64x\n", t->new_address);
1516 return STATUS_INTERNAL_ERROR;
1517 }
1518
1519 changed = true;
1520
1521 if (t->header.level > max_level)
1522 max_level = t->header.level;
1523 }
1524
1525 le = le->Flink;
1526 }
1527
1528 if (!changed)
1529 return STATUS_SUCCESS;
1530
1531 level = max_level;
1532 do {
1533 le = Vcb->trees.Flink;
1534 while (le != &Vcb->trees) {
1536
1537 if (t->write && !t->updated_extents && t->has_address && t->header.level == level) {
1539 if (!NT_SUCCESS(Status)) {
1540 ERR("update_tree_extents returned %08lx\n", Status);
1541 return Status;
1542 }
1543 }
1544
1545 le = le->Flink;
1546 }
1547
1548 if (level == 0)
1549 break;
1550
1551 level--;
1552 } while (true);
1553
1554 return STATUS_SUCCESS;
1555}
1556
1558 LIST_ENTRY* le;
1560
1561 TRACE("(%p)\n", Vcb);
1562
1563 le = Vcb->trees.Flink;
1564 while (le != &Vcb->trees) {
1566
1567 if (t->write && !t->parent) {
1568 if (t->root != Vcb->root_root && t->root != Vcb->chunk_root) {
1569 KEY searchkey;
1571
1572 searchkey.obj_id = t->root->id;
1573 searchkey.obj_type = TYPE_ROOT_ITEM;
1574 searchkey.offset = 0xffffffffffffffff;
1575
1576 Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp);
1577 if (!NT_SUCCESS(Status)) {
1578 ERR("error - find_item returned %08lx\n", Status);
1579 return Status;
1580 }
1581
1582 if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
1583 ERR("could not find ROOT_ITEM for tree %I64x\n", searchkey.obj_id);
1584 return STATUS_INTERNAL_ERROR;
1585 }
1586
1587 TRACE("updating the address for root %I64x to %I64x\n", searchkey.obj_id, t->new_address);
1588
1589 t->root->root_item.block_number = t->new_address;
1590 t->root->root_item.root_level = t->header.level;
1591 t->root->root_item.generation = Vcb->superblock.generation;
1592 t->root->root_item.generation2 = Vcb->superblock.generation;
1593
1594 // item is guaranteed to be at least sizeof(ROOT_ITEM), due to add_parents
1595
1596 RtlCopyMemory(tp.item->data, &t->root->root_item, sizeof(ROOT_ITEM));
1597 }
1598
1599 t->root->treeholder.address = t->new_address;
1600 t->root->treeholder.generation = Vcb->superblock.generation;
1601 }
1602
1603 le = le->Flink;
1604 }
1605
1606 if (!no_cache && !(Vcb->superblock.compat_ro_flags & BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE)) {
1607 ExAcquireResourceSharedLite(&Vcb->chunk_lock, true);
1609 ExReleaseResourceLite(&Vcb->chunk_lock);
1610
1611 if (!NT_SUCCESS(Status)) {
1612 ERR("update_chunk_caches returned %08lx\n", Status);
1613 return Status;
1614 }
1615 }
1616
1617 return STATUS_SUCCESS;
1618}
1619
1621 chunk* c;
1622 LIST_ENTRY* le;
1623 tree_write* tw;
1625 ULONG i, num_bits;
1626 write_data_context* wtc;
1627 ULONG bit_num = 0;
1628 bool raid56 = false;
1629
1630 // merge together runs
1631 c = NULL;
1632 le = tree_writes->Flink;
1633 while (le != tree_writes) {
1635
1636 if (!c || tw->address < c->offset || tw->address >= c->offset + c->chunk_item->size)
1638 else {
1640
1641 if (tw->address == tw2->address + tw2->length) {
1643
1644 if (!data) {
1645 ERR("out of memory\n");
1647 }
1648
1649 RtlCopyMemory(data, tw2->data, tw2->length);
1650 RtlCopyMemory(&data[tw2->length], tw->data, tw->length);
1651
1652 if (!no_free || tw2->allocated)
1653 ExFreePool(tw2->data);
1654
1655 tw2->data = data;
1656 tw2->length += tw->length;
1657 tw2->allocated = true;
1658
1659 if (!no_free || tw->allocated)
1660 ExFreePool(tw->data);
1661
1663 ExFreePool(tw);
1664
1665 le = tw2->list_entry.Flink;
1666 continue;
1667 }
1668 }
1669
1670 tw->c = c;
1671
1672 if (c->chunk_item->type & (BLOCK_FLAG_RAID5 | BLOCK_FLAG_RAID6))
1673 raid56 = true;
1674
1675 le = le->Flink;
1676 }
1677
1678 num_bits = 0;
1679
1680 le = tree_writes->Flink;
1681 while (le != tree_writes) {
1683
1684 num_bits++;
1685
1686 le = le->Flink;
1687 }
1688
1690 if (!wtc) {
1691 ERR("out of memory\n");
1693 }
1694
1695 le = tree_writes->Flink;
1696
1697 while (le != tree_writes) {
1699
1700 TRACE("address: %I64x, size: %x\n", tw->address, tw->length);
1701
1702 KeInitializeEvent(&wtc[bit_num].Event, NotificationEvent, false);
1703 InitializeListHead(&wtc[bit_num].stripes);
1704 wtc[bit_num].need_wait = false;
1705 wtc[bit_num].stripes_left = 0;
1706 wtc[bit_num].parity1 = wtc[bit_num].parity2 = wtc[bit_num].scratch = NULL;
1707 wtc[bit_num].mdl = wtc[bit_num].parity1_mdl = wtc[bit_num].parity2_mdl = NULL;
1708
1709 Status = write_data(Vcb, tw->address, tw->data, tw->length, &wtc[bit_num], NULL, NULL, false, 0, HighPagePriority);
1710 if (!NT_SUCCESS(Status)) {
1711 ERR("write_data returned %08lx\n", Status);
1712
1713 for (i = 0; i < num_bits; i++) {
1715 }
1716 ExFreePool(wtc);
1717
1718 return Status;
1719 }
1720
1721 bit_num++;
1722
1723 le = le->Flink;
1724 }
1725
1726 for (i = 0; i < num_bits; i++) {
1727 if (wtc[i].stripes.Flink != &wtc[i].stripes) {
1728 // launch writes and wait
1729 le = wtc[i].stripes.Flink;
1730 while (le != &wtc[i].stripes) {
1732
1733 if (stripe->status != WriteDataStatus_Ignore) {
1734 wtc[i].need_wait = true;
1736 }
1737
1738 le = le->Flink;
1739 }
1740 }
1741 }
1742
1743 for (i = 0; i < num_bits; i++) {
1744 if (wtc[i].need_wait)
1746 }
1747
1748 for (i = 0; i < num_bits; i++) {
1749 le = wtc[i].stripes.Flink;
1750 while (le != &wtc[i].stripes) {
1752
1753 if (stripe->status != WriteDataStatus_Ignore && !NT_SUCCESS(stripe->iosb.Status)) {
1754 Status = stripe->iosb.Status;
1756 break;
1757 }
1758
1759 le = le->Flink;
1760 }
1761
1763 }
1764
1765 ExFreePool(wtc);
1766
1767 if (raid56) {
1768 c = NULL;
1769
1770 le = tree_writes->Flink;
1771 while (le != tree_writes) {
1773
1774 if (tw->c != c) {
1775 c = tw->c;
1776
1777 ExAcquireResourceExclusiveLite(&c->partial_stripes_lock, true);
1778
1779 while (!IsListEmpty(&c->partial_stripes)) {
1781
1783
1784 if (ps->bmparr)
1785 ExFreePool(ps->bmparr);
1786
1787 ExFreePool(ps);
1788
1789 if (!NT_SUCCESS(Status)) {
1790 ERR("flush_partial_stripe returned %08lx\n", Status);
1791 ExReleaseResourceLite(&c->partial_stripes_lock);
1792 return Status;
1793 }
1794 }
1795
1796 ExReleaseResourceLite(&c->partial_stripes_lock);
1797 }
1798
1799 le = le->Flink;
1800 }
1801 }
1802
1803 return STATUS_SUCCESS;
1804}
1805
1807 switch (Vcb->superblock.csum_type) {
1808 case CSUM_TYPE_CRC32C:
1809 *((uint32_t*)th) = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1810 break;
1811
1812 case CSUM_TYPE_XXHASH:
1813 *((uint64_t*)th) = XXH64((uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum), 0);
1814 break;
1815
1816 case CSUM_TYPE_SHA256:
1817 calc_sha256((uint8_t*)th, &th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1818 break;
1819
1820 case CSUM_TYPE_BLAKE2:
1821 blake2b((uint8_t*)th, BLAKE2_HASH_SIZE, &th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
1822 break;
1823 }
1824}
1825
1827 ULONG level;
1828 uint8_t *data, *body;
1830 LIST_ENTRY* le;
1831 LIST_ENTRY tree_writes;
1832 tree_write* tw;
1833
1834 TRACE("(%p)\n", Vcb);
1835
1836 InitializeListHead(&tree_writes);
1837
1838 for (level = 0; level <= 255; level++) {
1839 bool nothing_found = true;
1840
1841 TRACE("level = %lu\n", level);
1842
1843 le = Vcb->trees.Flink;
1844 while (le != &Vcb->trees) {
1846
1847 if (t->write && t->header.level == level) {
1848 KEY firstitem, searchkey;
1849 LIST_ENTRY* le2;
1851
1852 if (!t->has_new_address) {
1853 ERR("error - tried to write tree with no new address\n");
1854 return STATUS_INTERNAL_ERROR;
1855 }
1856
1857 le2 = t->itemlist.Flink;
1858 while (le2 != &t->itemlist) {
1860 if (!td->ignore) {
1861 firstitem = td->key;
1862 break;
1863 }
1864 le2 = le2->Flink;
1865 }
1866
1867 if (t->parent) {
1868 t->paritem->key = firstitem;
1869 t->paritem->treeholder.address = t->new_address;
1870 t->paritem->treeholder.generation = Vcb->superblock.generation;
1871 }
1872
1873 if (!(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)) {
1874 EXTENT_ITEM_TREE* eit;
1875
1876 searchkey.obj_id = t->new_address;
1877 searchkey.obj_type = TYPE_EXTENT_ITEM;
1878 searchkey.offset = Vcb->superblock.node_size;
1879
1880 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
1881 if (!NT_SUCCESS(Status)) {
1882 ERR("error - find_item returned %08lx\n", Status);
1883 return Status;
1884 }
1885
1886 if (keycmp(searchkey, tp.item->key)) {
1887 ERR("could not find %I64x,%x,%I64x in extent_root (found %I64x,%x,%I64x instead)\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
1888 return STATUS_INTERNAL_ERROR;
1889 }
1890
1891 if (tp.item->size < sizeof(EXTENT_ITEM_TREE)) {
1892 ERR("(%I64x,%x,%I64x) was %u bytes, expected at least %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_TREE));
1893 return STATUS_INTERNAL_ERROR;
1894 }
1895
1896 eit = (EXTENT_ITEM_TREE*)tp.item->data;
1897 eit->firstitem = firstitem;
1898 }
1899
1900 nothing_found = false;
1901 }
1902
1903 le = le->Flink;
1904 }
1905
1906 if (nothing_found)
1907 break;
1908 }
1909
1910 TRACE("allocated tree extents\n");
1911
1912 le = Vcb->trees.Flink;
1913 while (le != &Vcb->trees) {
1915 LIST_ENTRY* le2;
1916#ifdef DEBUG_PARANOID
1917 uint32_t num_items = 0, size = 0;
1918 bool crash = false;
1919#endif
1920
1921 if (t->write) {
1922#ifdef DEBUG_PARANOID
1923 bool first = true;
1924 KEY lastkey;
1925
1926 le2 = t->itemlist.Flink;
1927 while (le2 != &t->itemlist) {
1929 if (!td->ignore) {
1930 num_items++;
1931
1932 if (!first) {
1933 if (keycmp(td->key, lastkey) == 0) {
1934 ERR("(%I64x,%x,%I64x): duplicate key\n", td->key.obj_id, td->key.obj_type, td->key.offset);
1935 crash = true;
1936 } else if (keycmp(td->key, lastkey) == -1) {
1937 ERR("(%I64x,%x,%I64x): key out of order\n", td->key.obj_id, td->key.obj_type, td->key.offset);
1938 crash = true;
1939 }
1940 } else
1941 first = false;
1942
1943 lastkey = td->key;
1944
1945 if (t->header.level == 0)
1946 size += td->size;
1947 }
1948 le2 = le2->Flink;
1949 }
1950
1951 if (t->header.level == 0)
1952 size += num_items * sizeof(leaf_node);
1953 else
1954 size += num_items * sizeof(internal_node);
1955
1956 if (num_items != t->header.num_items) {
1957 ERR("tree %I64x, level %x: num_items was %x, expected %x\n", t->root->id, t->header.level, num_items, t->header.num_items);
1958 crash = true;
1959 }
1960
1961 if (size != t->size) {
1962 ERR("tree %I64x, level %x: size was %x, expected %x\n", t->root->id, t->header.level, size, t->size);
1963 crash = true;
1964 }
1965
1966 if (t->header.num_items == 0 && t->parent) {
1967 ERR("tree %I64x, level %x: tried to write empty tree with parent\n", t->root->id, t->header.level);
1968 crash = true;
1969 }
1970
1971 if (t->size > Vcb->superblock.node_size - sizeof(tree_header)) {
1972 ERR("tree %I64x, level %x: tried to write overlarge tree (%x > %Ix)\n", t->root->id, t->header.level, t->size, Vcb->superblock.node_size - sizeof(tree_header));
1973 crash = true;
1974 }
1975
1976 if (crash) {
1977 ERR("tree %p\n", t);
1978 le2 = t->itemlist.Flink;
1979 while (le2 != &t->itemlist) {
1981 if (!td->ignore) {
1982 ERR("%I64x,%x,%I64x inserted=%u\n", td->key.obj_id, td->key.obj_type, td->key.offset, td->inserted);
1983 }
1984 le2 = le2->Flink;
1985 }
1986 int3;
1987 }
1988#endif
1989 t->header.address = t->new_address;
1990 t->header.generation = Vcb->superblock.generation;
1991 t->header.tree_id = t->root->id;
1992 t->header.flags |= HEADER_FLAG_MIXED_BACKREF;
1993 t->header.fs_uuid = Vcb->superblock.metadata_uuid;
1994 t->has_address = true;
1995
1996 data = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
1997 if (!data) {
1998 ERR("out of memory\n");
2000 goto end;
2001 }
2002
2003 body = data + sizeof(tree_header);
2004
2005 RtlCopyMemory(data, &t->header, sizeof(tree_header));
2006 RtlZeroMemory(body, Vcb->superblock.node_size - sizeof(tree_header));
2007
2008 if (t->header.level == 0) {
2009 leaf_node* itemptr = (leaf_node*)body;
2010 int i = 0;
2011 uint8_t* dataptr = data + Vcb->superblock.node_size;
2012
2013 le2 = t->itemlist.Flink;
2014 while (le2 != &t->itemlist) {
2016 if (!td->ignore) {
2017 dataptr = dataptr - td->size;
2018
2019 itemptr[i].key = td->key;
2020 itemptr[i].offset = (uint32_t)((uint8_t*)dataptr - (uint8_t*)body);
2021 itemptr[i].size = td->size;
2022 i++;
2023
2024 if (td->size > 0)
2025 RtlCopyMemory(dataptr, td->data, td->size);
2026 }
2027
2028 le2 = le2->Flink;
2029 }
2030 } else {
2031 internal_node* itemptr = (internal_node*)body;
2032 int i = 0;
2033
2034 le2 = t->itemlist.Flink;
2035 while (le2 != &t->itemlist) {
2037 if (!td->ignore) {
2038 itemptr[i].key = td->key;
2039 itemptr[i].address = td->treeholder.address;
2040 itemptr[i].generation = td->treeholder.generation;
2041 i++;
2042 }
2043
2044 le2 = le2->Flink;
2045 }
2046 }
2047
2049
2051 if (!tw) {
2052 ERR("out of memory\n");
2055 goto end;
2056 }
2057
2058 tw->address = t->new_address;
2059 tw->length = Vcb->superblock.node_size;
2060 tw->data = data;
2061 tw->allocated = false;
2062
2063 if (IsListEmpty(&tree_writes))
2064 InsertTailList(&tree_writes, &tw->list_entry);
2065 else {
2066 bool inserted = false;
2067
2068 le2 = tree_writes.Flink;
2069 while (le2 != &tree_writes) {
2071
2072 if (tw2->address > tw->address) {
2073 InsertHeadList(le2->Blink, &tw->list_entry);
2074 inserted = true;
2075 break;
2076 }
2077
2078 le2 = le2->Flink;
2079 }
2080
2081 if (!inserted)
2082 InsertTailList(&tree_writes, &tw->list_entry);
2083 }
2084 }
2085
2086 le = le->Flink;
2087 }
2088
2089 Status = do_tree_writes(Vcb, &tree_writes, false);
2090 if (!NT_SUCCESS(Status)) {
2091 ERR("do_tree_writes returned %08lx\n", Status);
2092 goto end;
2093 }
2094
2096
2097end:
2098 while (!IsListEmpty(&tree_writes)) {
2099 le = RemoveHeadList(&tree_writes);
2101
2102 if (tw->data)
2103 ExFreePool(tw->data);
2104
2105 ExFreePool(tw);
2106 }
2107
2108 return Status;
2109}
2110
2112 KEY searchkey;
2114
2116
2117 sb->root_tree_addr = Vcb->superblock.root_tree_addr;
2118 sb->root_tree_generation = Vcb->superblock.generation;
2119 sb->root_level = Vcb->superblock.root_level;
2120
2121 sb->chunk_tree_addr = Vcb->superblock.chunk_tree_addr;
2122 sb->chunk_tree_generation = Vcb->superblock.chunk_root_generation;
2123 sb->chunk_root_level = Vcb->superblock.chunk_root_level;
2124
2125 searchkey.obj_id = BTRFS_ROOT_EXTENT;
2126 searchkey.obj_type = TYPE_ROOT_ITEM;
2127 searchkey.offset = 0xffffffffffffffff;
2128
2129 if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp))) {
2130 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
2131 ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
2132
2133 sb->extent_tree_addr = ri->block_number;
2134 sb->extent_tree_generation = ri->generation;
2135 sb->extent_root_level = ri->root_level;
2136 }
2137 }
2138
2139 searchkey.obj_id = BTRFS_ROOT_FSTREE;
2140
2141 if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp))) {
2142 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
2143 ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
2144
2145 sb->fs_tree_addr = ri->block_number;
2146 sb->fs_tree_generation = ri->generation;
2147 sb->fs_root_level = ri->root_level;
2148 }
2149 }
2150
2151 searchkey.obj_id = BTRFS_ROOT_DEVTREE;
2152
2153 if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp))) {
2154 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
2155 ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
2156
2157 sb->dev_root_addr = ri->block_number;
2158 sb->dev_root_generation = ri->generation;
2159 sb->dev_root_level = ri->root_level;
2160 }
2161 }
2162
2163 searchkey.obj_id = BTRFS_ROOT_CHECKSUM;
2164
2165 if (NT_SUCCESS(find_item(Vcb, Vcb->root_root, &tp, &searchkey, false, Irp))) {
2166 if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type && tp.item->size >= sizeof(ROOT_ITEM)) {
2167 ROOT_ITEM* ri = (ROOT_ITEM*)tp.item->data;
2168
2169 sb->csum_root_addr = ri->block_number;
2170 sb->csum_root_generation = ri->generation;
2171 sb->csum_root_level = ri->root_level;
2172 }
2173 }
2174
2175 sb->total_bytes = Vcb->superblock.total_bytes;
2176 sb->bytes_used = Vcb->superblock.bytes_used;
2177 sb->num_devices = Vcb->superblock.num_devices;
2178}
2179
2180typedef struct {
2181 void* context;
2189
2195
2196_Function_class_(IO_COMPLETION_ROUTINE)
2197static NTSTATUS __stdcall write_superblock_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
2200
2202
2203 stripe->Status = Irp->IoStatus.Status;
2204
2205 if (InterlockedDecrement(&context->left) == 0)
2206 KeSetEvent(&context->Event, 0, false);
2207
2209}
2210
2212 switch (sb->csum_type) {
2213 case CSUM_TYPE_CRC32C:
2214 *(uint32_t*)sb = ~calc_crc32c(0xffffffff, (uint8_t*)&sb->uuid, (ULONG)sizeof(superblock) - sizeof(sb->checksum));
2215 break;
2216
2217 case CSUM_TYPE_XXHASH:
2218 *(uint64_t*)sb = XXH64(&sb->uuid, sizeof(superblock) - sizeof(sb->checksum), 0);
2219 break;
2220
2221 case CSUM_TYPE_SHA256:
2222 calc_sha256((uint8_t*)sb, &sb->uuid, sizeof(superblock) - sizeof(sb->checksum));
2223 break;
2224
2225 case CSUM_TYPE_BLAKE2:
2226 blake2b((uint8_t*)sb, BLAKE2_HASH_SIZE, &sb->uuid, sizeof(superblock) - sizeof(sb->checksum));
2227 break;
2228 }
2229}
2230
2232 unsigned int i = 0;
2233
2234 // All the documentation says that the Linux driver only writes one superblock
2235 // if it thinks a disk is an SSD, but this doesn't seem to be the case!
2236
2237 while (superblock_addrs[i] > 0 && device->devitem.num_bytes >= superblock_addrs[i] + sizeof(superblock)) {
2238 ULONG sblen = (ULONG)sector_align(sizeof(superblock), Vcb->superblock.sector_size);
2239 superblock* sb;
2242
2244 if (!sb) {
2245 ERR("out of memory\n");
2247 }
2248
2249 RtlCopyMemory(sb, &Vcb->superblock, sizeof(superblock));
2250
2251 if (sblen > sizeof(superblock))
2252 RtlZeroMemory((uint8_t*)sb + sizeof(superblock), sblen - sizeof(superblock));
2253
2256
2258
2260 if (!stripe) {
2261 ERR("out of memory\n");
2262 ExFreePool(sb);
2264 }
2265
2266 stripe->buf = (uint8_t*)sb;
2267
2268 stripe->Irp = IoAllocateIrp(device->devobj->StackSize, false);
2269 if (!stripe->Irp) {
2270 ERR("IoAllocateIrp failed\n");
2272 ExFreePool(sb);
2274 }
2275
2279
2280 if (i == 0)
2282
2284 stripe->Irp->AssociatedIrp.SystemBuffer = sb;
2285 stripe->mdl = NULL;
2286
2287 stripe->Irp->Flags = IRP_BUFFERED_IO;
2288 } else if (device->devobj->Flags & DO_DIRECT_IO) {
2289 stripe->mdl = IoAllocateMdl(sb, sblen, false, false, NULL);
2290 if (!stripe->mdl) {
2291 ERR("IoAllocateMdl failed\n");
2292 IoFreeIrp(stripe->Irp);
2294 ExFreePool(sb);
2296 }
2297
2298 stripe->Irp->MdlAddress = stripe->mdl;
2299
2301 } else {
2302 stripe->Irp->UserBuffer = sb;
2303 stripe->mdl = NULL;
2304 }
2305
2306 IrpSp->Parameters.Write.Length = sblen;
2307 IrpSp->Parameters.Write.ByteOffset.QuadPart = superblock_addrs[i];
2308
2309 IoSetCompletionRoutine(stripe->Irp, write_superblock_completion, stripe, true, true, true);
2310
2311 stripe->context = context;
2312 stripe->device = device;
2313 InsertTailList(&context->stripes, &stripe->list_entry);
2314
2315 context->left++;
2316
2317 i++;
2318 }
2319
2320 if (i == 0)
2321 ERR("no superblocks written!\n");
2322
2323 return STATUS_SUCCESS;
2324}
2325
2327 uint64_t i;
2329 LIST_ENTRY* le;
2331
2332 TRACE("(%p)\n", Vcb);
2333
2334 le = Vcb->trees.Flink;
2335 while (le != &Vcb->trees) {
2337
2338 if (t->write && !t->parent) {
2339 if (t->root == Vcb->root_root) {
2340 Vcb->superblock.root_tree_addr = t->new_address;
2341 Vcb->superblock.root_level = t->header.level;
2342 } else if (t->root == Vcb->chunk_root) {
2343 Vcb->superblock.chunk_tree_addr = t->new_address;
2344 Vcb->superblock.chunk_root_generation = t->header.generation;
2345 Vcb->superblock.chunk_root_level = t->header.level;
2346 }
2347 }
2348
2349 le = le->Flink;
2350 }
2351
2352 for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS - 1; i++) {
2353 RtlCopyMemory(&Vcb->superblock.backup[i], &Vcb->superblock.backup[i+1], sizeof(superblock_backup));
2354 }
2355
2356 update_backup_superblock(Vcb, &Vcb->superblock.backup[BTRFS_NUM_BACKUP_ROOTS - 1], Irp);
2357
2359 InitializeListHead(&context.stripes);
2360 context.left = 0;
2361
2362 le = Vcb->devices.Flink;
2363 while (le != &Vcb->devices) {
2365
2366 if (dev->devobj && !dev->readonly) {
2368 if (!NT_SUCCESS(Status)) {
2369 ERR("write_superblock returned %08lx\n", Status);
2370 goto end;
2371 }
2372 }
2373
2374 le = le->Flink;
2375 }
2376
2377 if (IsListEmpty(&context.stripes)) {
2378 ERR("error - not writing any superblocks\n");
2380 goto end;
2381 }
2382
2383 le = context.stripes.Flink;
2384 while (le != &context.stripes) {
2386
2388
2389 le = le->Flink;
2390 }
2391
2393
2394 le = context.stripes.Flink;
2395 while (le != &context.stripes) {
2397
2398 if (!NT_SUCCESS(stripe->Status)) {
2399 ERR("device %I64x returned %08lx\n", stripe->device->devitem.dev_id, stripe->Status);
2401 Status = stripe->Status;
2402 goto end;
2403 }
2404
2405 le = le->Flink;
2406 }
2407
2409
2410end:
2411 while (!IsListEmpty(&context.stripes)) {
2413
2414 if (stripe->mdl) {
2415 if (stripe->mdl->MdlFlags & MDL_PAGES_LOCKED)
2416 MmUnlockPages(stripe->mdl);
2417
2418 IoFreeMdl(stripe->mdl);
2419 }
2420
2421 if (stripe->Irp)
2422 IoFreeIrp(stripe->Irp);
2423
2424 if (stripe->buf)
2425 ExFreePool(stripe->buf);
2426
2428 }
2429
2430 return Status;
2431}
2432
2434 LIST_ENTRY *le, *le2;
2436 uint64_t old_size;
2437
2438 if (ce->count == 0 && ce->old_count == 0) {
2439 while (!IsListEmpty(&ce->refs)) {
2441 ExFreePool(cer);
2442 }
2443
2444 while (!IsListEmpty(&ce->old_refs)) {
2446 ExFreePool(cer);
2447 }
2448
2449 goto end;
2450 }
2451
2452 le = ce->refs.Flink;
2453 while (le != &ce->refs) {
2455 uint32_t old_count = 0;
2456
2457 if (cer->type == TYPE_EXTENT_DATA_REF) {
2458 le2 = ce->old_refs.Flink;
2459 while (le2 != &ce->old_refs) {
2461
2462 if (cer2->type == TYPE_EXTENT_DATA_REF && cer2->edr.root == cer->edr.root && cer2->edr.objid == cer->edr.objid && cer2->edr.offset == cer->edr.offset) {
2463 old_count = cer2->edr.count;
2464 break;
2465 }
2466
2467 le2 = le2->Flink;
2468 }
2469
2470 old_size = ce->old_count > 0 ? ce->old_size : ce->size;
2471
2472 if (cer->edr.count > old_count) {
2473 Status = increase_extent_refcount_data(Vcb, ce->address, old_size, cer->edr.root, cer->edr.objid, cer->edr.offset, cer->edr.count - old_count, Irp);
2474
2475 if (!NT_SUCCESS(Status)) {
2476 ERR("increase_extent_refcount_data returned %08lx\n", Status);
2477 return Status;
2478 }
2479 }
2480 } else if (cer->type == TYPE_SHARED_DATA_REF) {
2481 le2 = ce->old_refs.Flink;
2482 while (le2 != &ce->old_refs) {
2484
2485 if (cer2->type == TYPE_SHARED_DATA_REF && cer2->sdr.offset == cer->sdr.offset) {
2487 ExFreePool(cer2);
2488 break;
2489 }
2490
2491 le2 = le2->Flink;
2492 }
2493 }
2494
2495 le = le->Flink;
2496 }
2497
2498 le = ce->refs.Flink;
2499 while (le != &ce->refs) {
2501 LIST_ENTRY* le3 = le->Flink;
2502 uint32_t old_count = 0;
2503
2504 if (cer->type == TYPE_EXTENT_DATA_REF) {
2505 le2 = ce->old_refs.Flink;
2506 while (le2 != &ce->old_refs) {
2508
2509 if (cer2->type == TYPE_EXTENT_DATA_REF && cer2->edr.root == cer->edr.root && cer2->edr.objid == cer->edr.objid && cer2->edr.offset == cer->edr.offset) {
2510 old_count = cer2->edr.count;
2511
2513 ExFreePool(cer2);
2514 break;
2515 }
2516
2517 le2 = le2->Flink;
2518 }
2519
2520 old_size = ce->old_count > 0 ? ce->old_size : ce->size;
2521
2522 if (cer->edr.count < old_count) {
2523 Status = decrease_extent_refcount_data(Vcb, ce->address, old_size, cer->edr.root, cer->edr.objid, cer->edr.offset,
2524 old_count - cer->edr.count, ce->superseded, Irp);
2525
2526 if (!NT_SUCCESS(Status)) {
2527 ERR("decrease_extent_refcount_data returned %08lx\n", Status);
2528 return Status;
2529 }
2530 }
2531
2532 if (ce->size != ce->old_size && ce->old_count > 0) {
2533 KEY searchkey;
2535 void* data;
2536
2537 searchkey.obj_id = ce->address;
2538 searchkey.obj_type = TYPE_EXTENT_ITEM;
2539 searchkey.offset = ce->old_size;
2540
2541 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
2542 if (!NT_SUCCESS(Status)) {
2543 ERR("error - find_item returned %08lx\n", Status);
2544 return Status;
2545 }
2546
2547 if (keycmp(searchkey, tp.item->key)) {
2548 ERR("could not find (%I64x,%x,%I64x) in extent tree\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
2549 return STATUS_INTERNAL_ERROR;
2550 }
2551
2552 if (tp.item->size > 0) {
2554
2555 if (!data) {
2556 ERR("out of memory\n");
2558 }
2559
2561 } else
2562 data = NULL;
2563
2564 Status = insert_tree_item(Vcb, Vcb->extent_root, ce->address, TYPE_EXTENT_ITEM, ce->size, data, tp.item->size, NULL, Irp);
2565 if (!NT_SUCCESS(Status)) {
2566 ERR("insert_tree_item returned %08lx\n", Status);
2567 if (data) ExFreePool(data);
2568 return Status;
2569 }
2570
2572 if (!NT_SUCCESS(Status)) {
2573 ERR("delete_tree_item returned %08lx\n", Status);
2574 return Status;
2575 }
2576 }
2577 }
2578
2580 ExFreePool(cer);
2581
2582 le = le3;
2583 }
2584
2585#ifdef DEBUG_PARANOID
2586 if (!IsListEmpty(&ce->old_refs))
2587 WARN("old_refs not empty\n");
2588#endif
2589
2590end:
2591 if (ce->count == 0 && !ce->superseded) {
2592 c->used -= ce->size;
2593 space_list_add(c, ce->address, ce->size, rollback);
2594 }
2595
2597 ExFreePool(ce);
2598
2599 return STATUS_SUCCESS;
2600}
2601
2603 KEY searchkey;
2604 traverse_ptr tp, next_tp;
2606 uint64_t startaddr, endaddr;
2607 ULONG len;
2609 ULONG* bmparr;
2610 ULONG runlength, index;
2611
2612 TRACE("(%p, %I64x, %lx, %p, %p)\n", Vcb, address, length, csum, Irp);
2613
2614 searchkey.obj_id = EXTENT_CSUM_ID;
2615 searchkey.obj_type = TYPE_EXTENT_CSUM;
2616 searchkey.offset = address;
2617
2618 // FIXME - create checksum_root if it doesn't exist at all
2619
2620 Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, false, Irp);
2621 if (Status == STATUS_NOT_FOUND) { // tree is completely empty
2622 if (csum) { // not deleted
2623 ULONG length2 = length;
2624 uint64_t off = address;
2625 void* data = csum;
2626
2627 do {
2628 uint16_t il = (uint16_t)min(length2, MAX_CSUM_SIZE / Vcb->csum_size);
2629
2630 void* checksums = ExAllocatePoolWithTag(PagedPool, il * Vcb->csum_size, ALLOC_TAG);
2631 if (!checksums) {
2632 ERR("out of memory\n");
2633 return;
2634 }
2635
2636 RtlCopyMemory(checksums, data, il * Vcb->csum_size);
2637
2638 Status = insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, off, checksums,
2639 il * Vcb->csum_size, NULL, Irp);
2640 if (!NT_SUCCESS(Status)) {
2641 ERR("insert_tree_item returned %08lx\n", Status);
2642 ExFreePool(checksums);
2643 return;
2644 }
2645
2646 length2 -= il;
2647
2648 if (length2 > 0) {
2649 off += (uint64_t)il << Vcb->sector_shift;
2650 data = (uint8_t*)data + (il * Vcb->csum_size);
2651 }
2652 } while (length2 > 0);
2653 }
2654 } else if (!NT_SUCCESS(Status)) {
2655 ERR("find_item returned %08lx\n", Status);
2656 return;
2657 } else {
2658 uint32_t tplen;
2659 void* checksums;
2660
2661 // FIXME - check entry is TYPE_EXTENT_CSUM?
2662
2663 if (tp.item->key.offset < address && tp.item->key.offset + (((uint64_t)tp.item->size << Vcb->sector_shift) / Vcb->csum_size) >= address)
2665 else
2667
2668 searchkey.obj_id = EXTENT_CSUM_ID;
2669 searchkey.obj_type = TYPE_EXTENT_CSUM;
2670 searchkey.offset = address + (length << Vcb->sector_shift);
2671
2672 Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, false, Irp);
2673 if (!NT_SUCCESS(Status)) {
2674 ERR("find_item returned %08lx\n", Status);
2675 return;
2676 }
2677
2678 tplen = tp.item->size / Vcb->csum_size;
2679
2680 if (tp.item->key.offset + (tplen << Vcb->sector_shift) >= address + (length << Vcb->sector_shift))
2681 endaddr = tp.item->key.offset + (tplen << Vcb->sector_shift);
2682 else
2683 endaddr = address + (length << Vcb->sector_shift);
2684
2685 TRACE("cs starts at %I64x (%lx sectors)\n", address, length);
2686 TRACE("startaddr = %I64x\n", startaddr);
2687 TRACE("endaddr = %I64x\n", endaddr);
2688
2689 len = (ULONG)((endaddr - startaddr) >> Vcb->sector_shift);
2690
2691 checksums = ExAllocatePoolWithTag(PagedPool, Vcb->csum_size * len, ALLOC_TAG);
2692 if (!checksums) {
2693 ERR("out of memory\n");
2694 return;
2695 }
2696
2697 bmparr = ExAllocatePoolWithTag(PagedPool, sizeof(ULONG) * ((len/8)+1), ALLOC_TAG);
2698 if (!bmparr) {
2699 ERR("out of memory\n");
2700 ExFreePool(checksums);
2701 return;
2702 }
2703
2704 RtlInitializeBitMap(&bmp, bmparr, len);
2706
2707 searchkey.obj_id = EXTENT_CSUM_ID;
2708 searchkey.obj_type = TYPE_EXTENT_CSUM;
2709 searchkey.offset = address;
2710
2711 Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, false, Irp);
2712 if (!NT_SUCCESS(Status)) {
2713 ERR("find_item returned %08lx\n", Status);
2714 ExFreePool(checksums);
2715 ExFreePool(bmparr);
2716 return;
2717 }
2718
2719 // set bit = free space, cleared bit = allocated sector
2720
2721 while (tp.item->key.offset < endaddr) {
2722 if (tp.item->key.offset >= startaddr) {
2723 if (tp.item->size > 0) {
2724 ULONG itemlen = (ULONG)min((len - ((tp.item->key.offset - startaddr) >> Vcb->sector_shift)) * Vcb->csum_size, tp.item->size);
2725
2726 RtlCopyMemory((uint8_t*)checksums + (((tp.item->key.offset - startaddr) * Vcb->csum_size) >> Vcb->sector_shift),
2727 tp.item->data, itemlen);
2728 RtlClearBits(&bmp, (ULONG)((tp.item->key.offset - startaddr) >> Vcb->sector_shift), itemlen / Vcb->csum_size);
2729 }
2730
2732 if (!NT_SUCCESS(Status)) {
2733 ERR("delete_tree_item returned %08lx\n", Status);
2734 ExFreePool(checksums);
2735 ExFreePool(bmparr);
2736 return;
2737 }
2738 }
2739
2740 if (find_next_item(Vcb, &tp, &next_tp, false, Irp)) {
2741 tp = next_tp;
2742 } else
2743 break;
2744 }
2745
2746 if (!csum) { // deleted
2747 RtlSetBits(&bmp, (ULONG)((address - startaddr) >> Vcb->sector_shift), length);
2748 } else {
2749 RtlCopyMemory((uint8_t*)checksums + (((address - startaddr) * Vcb->csum_size) >> Vcb->sector_shift),
2750 csum, length * Vcb->csum_size);
2751 RtlClearBits(&bmp, (ULONG)((address - startaddr) >> Vcb->sector_shift), length);
2752 }
2753
2754 runlength = RtlFindFirstRunClear(&bmp, &index);
2755
2756 while (runlength != 0) {
2757 if (index >= len)
2758 break;
2759
2760 if (index + runlength >= len) {
2761 runlength = len - index;
2762
2763 if (runlength == 0)
2764 break;
2765 }
2766
2767 do {
2768 uint16_t rl;
2769 uint64_t off;
2770 void* data;
2771
2772 if (runlength * Vcb->csum_size > MAX_CSUM_SIZE)
2773 rl = (uint16_t)(MAX_CSUM_SIZE / Vcb->csum_size);
2774 else
2775 rl = (uint16_t)runlength;
2776
2777 data = ExAllocatePoolWithTag(PagedPool, Vcb->csum_size * rl, ALLOC_TAG);
2778 if (!data) {
2779 ERR("out of memory\n");
2780 ExFreePool(bmparr);
2781 ExFreePool(checksums);
2782 return;
2783 }
2784
2785 RtlCopyMemory(data, (uint8_t*)checksums + (Vcb->csum_size * index), Vcb->csum_size * rl);
2786
2787 off = startaddr + ((uint64_t)index << Vcb->sector_shift);
2788
2789 Status = insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, off, data, Vcb->csum_size * rl, NULL, Irp);
2790 if (!NT_SUCCESS(Status)) {
2791 ERR("insert_tree_item returned %08lx\n", Status);
2793 ExFreePool(bmparr);
2794 ExFreePool(checksums);
2795 return;
2796 }
2797
2798 runlength -= rl;
2799 index += rl;
2800 } while (runlength > 0);
2801
2802 runlength = RtlFindNextForwardRunClear(&bmp, index, &index);
2803 }
2804
2805 ExFreePool(bmparr);
2806 ExFreePool(checksums);
2807 }
2808}
2809
2811 LIST_ENTRY *le = Vcb->chunks.Flink, *le2;
2812 chunk* c;
2813 KEY searchkey;
2815 BLOCK_GROUP_ITEM* bgi;
2817
2818 TRACE("(%p)\n", Vcb);
2819
2820 ExAcquireResourceSharedLite(&Vcb->chunk_lock, true);
2821
2822 while (le != &Vcb->chunks) {
2824
2826
2827 if (!c->cache_loaded && (!IsListEmpty(&c->changed_extents) || c->used != c->oldused)) {
2829
2830 if (!NT_SUCCESS(Status)) {
2831 ERR("load_cache_chunk returned %08lx\n", Status);
2833 goto end;
2834 }
2835 }
2836
2837 le2 = c->changed_extents.Flink;
2838 while (le2 != &c->changed_extents) {
2839 LIST_ENTRY* le3 = le2->Flink;
2841
2843 if (!NT_SUCCESS(Status)) {
2844 ERR("flush_changed_extent returned %08lx\n", Status);
2846 goto end;
2847 }
2848
2849 le2 = le3;
2850 }
2851
2852 // This is usually done by update_chunks, but we have to check again in case any new chunks
2853 // have been allocated since.
2854 if (c->created) {
2856 if (!NT_SUCCESS(Status)) {
2857 ERR("create_chunk returned %08lx\n", Status);
2859 goto end;
2860 }
2861 }
2862
2863 if (c->old_cache) {
2864 if (c->old_cache->dirty) {
2865 LIST_ENTRY batchlist;
2866
2867 InitializeListHead(&batchlist);
2868
2869 Status = flush_fcb(c->old_cache, false, &batchlist, Irp);
2870 if (!NT_SUCCESS(Status)) {
2871 ERR("flush_fcb returned %08lx\n", Status);
2873 clear_batch_list(Vcb, &batchlist);
2874 goto end;
2875 }
2876
2877 Status = commit_batch_list(Vcb, &batchlist, Irp);
2878 if (!NT_SUCCESS(Status)) {
2879 ERR("commit_batch_list returned %08lx\n", Status);
2881 goto end;
2882 }
2883 }
2884
2885 free_fcb(c->old_cache);
2886
2887 if (c->old_cache->refcount == 0)
2888 reap_fcb(c->old_cache);
2889
2890 c->old_cache = NULL;
2891 }
2892
2893 if (c->used != c->oldused) {
2894 searchkey.obj_id = c->offset;
2895 searchkey.obj_type = TYPE_BLOCK_GROUP_ITEM;
2896 searchkey.offset = c->chunk_item->size;
2897
2898 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
2899 if (!NT_SUCCESS(Status)) {
2900 ERR("error - find_item returned %08lx\n", Status);
2902 goto end;
2903 }
2904
2905 if (keycmp(searchkey, tp.item->key)) {
2906 ERR("could not find (%I64x,%x,%I64x) in extent_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
2909 goto end;
2910 }
2911
2912 if (tp.item->size < sizeof(BLOCK_GROUP_ITEM)) {
2913 ERR("(%I64x,%x,%I64x) was %u bytes, expected %Iu\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(BLOCK_GROUP_ITEM));
2916 goto end;
2917 }
2918
2920 if (!bgi) {
2921 ERR("out of memory\n");
2924 goto end;
2925 }
2926
2927 RtlCopyMemory(bgi, tp.item->data, tp.item->size);
2928 bgi->used = c->used;
2929
2930#ifdef DEBUG_PARANOID
2931 if (bgi->used & 0x8000000000000000) {
2932 ERR("refusing to write BLOCK_GROUP_ITEM with negative usage value (%I64x)\n", bgi->used);
2933 int3;
2934 }
2935#endif
2936
2937 TRACE("adjusting usage of chunk %I64x to %I64x\n", c->offset, c->used);
2938
2940 if (!NT_SUCCESS(Status)) {
2941 ERR("delete_tree_item returned %08lx\n", Status);
2942 ExFreePool(bgi);
2944 goto end;
2945 }
2946
2947 Status = insert_tree_item(Vcb, Vcb->extent_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, bgi, tp.item->size, NULL, Irp);
2948 if (!NT_SUCCESS(Status)) {
2949 ERR("insert_tree_item returned %08lx\n", Status);
2950 ExFreePool(bgi);
2952 goto end;
2953 }
2954
2955 Vcb->superblock.bytes_used += c->used - c->oldused;
2956 c->oldused = c->used;
2957 }
2958
2960
2961 le = le->Flink;
2962 }
2963
2965
2966end:
2967 ExReleaseResourceLite(&Vcb->chunk_lock);
2968
2969 return Status;
2970}
2971
2972static void get_first_item(tree* t, KEY* key) {
2973 LIST_ENTRY* le;
2974
2975 le = t->itemlist.Flink;
2976 while (le != &t->itemlist) {
2978
2979 *key = td->key;
2980 return;
2981 }
2982}
2983
2985 tree *nt, *pt;
2986 tree_data* td;
2987 tree_data* oldlastitem;
2988
2989 TRACE("splitting tree in %I64x at (%I64x,%x,%I64x)\n", t->root->id, newfirstitem->key.obj_id, newfirstitem->key.obj_type, newfirstitem->key.offset);
2990
2992 if (!nt) {
2993 ERR("out of memory\n");
2995 }
2996
2997 if (t->header.level > 0) {
2999 if (!nt->nonpaged) {
3000 ERR("out of memory\n");
3001 ExFreePool(nt);
3003 }
3004
3005 ExInitializeFastMutex(&nt->nonpaged->mutex);
3006 } else
3007 nt->nonpaged = NULL;
3008
3009 RtlCopyMemory(&nt->header, &t->header, sizeof(tree_header));
3010 nt->header.address = 0;
3011 nt->header.generation = Vcb->superblock.generation;
3012 nt->header.num_items = t->header.num_items - numitems;
3014
3015 nt->has_address = false;
3016 nt->Vcb = Vcb;
3017 nt->parent = t->parent;
3018
3019#ifdef DEBUG_PARANOID
3020 if (nt->parent && nt->parent->header.level <= nt->header.level) int3;
3021#endif
3022
3023 nt->root = t->root;
3024 nt->new_address = 0;
3025 nt->has_new_address = false;
3026 nt->updated_extents = false;
3027 nt->uniqueness_determined = true;
3028 nt->is_unique = true;
3029 nt->list_entry_hash.Flink = NULL;
3030 nt->buf = NULL;
3031 InitializeListHead(&nt->itemlist);
3032
3033 oldlastitem = CONTAINING_RECORD(newfirstitem->list_entry.Blink, tree_data, list_entry);
3034
3035 nt->itemlist.Flink = &newfirstitem->list_entry;
3036 nt->itemlist.Blink = t->itemlist.Blink;
3037 nt->itemlist.Flink->Blink = &nt->itemlist;
3038 nt->itemlist.Blink->Flink = &nt->itemlist;
3039
3040 t->itemlist.Blink = &oldlastitem->list_entry;
3041 t->itemlist.Blink->Flink = &t->itemlist;
3042
3043 nt->size = t->size - size;
3044 t->size = size;
3045 t->header.num_items = numitems;
3046 nt->write = true;
3047
3048 InsertTailList(&Vcb->trees, &nt->list_entry);
3049
3050 if (nt->header.level > 0) {
3051 LIST_ENTRY* le = nt->itemlist.Flink;
3052
3053 while (le != &nt->itemlist) {
3055
3056 if (td2->treeholder.tree) {
3057 td2->treeholder.tree->parent = nt;
3058#ifdef DEBUG_PARANOID
3059 if (td2->treeholder.tree->parent && td2->treeholder.tree->parent->header.level <= td2->treeholder.tree->header.level) int3;
3060#endif
3061 }
3062
3063 le = le->Flink;
3064 }
3065 } else {
3066 LIST_ENTRY* le = nt->itemlist.Flink;
3067
3068 while (le != &nt->itemlist) {
3070
3071 if (!td2->inserted && td2->data) {
3073
3074 if (!data) {
3075 ERR("out of memory\n");
3077 }
3078
3079 RtlCopyMemory(data, td2->data, td2->size);
3080 td2->data = data;
3081 td2->inserted = true;
3082 }
3083
3084 le = le->Flink;
3085 }
3086 }
3087
3088 if (nt->parent) {
3089 td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside);
3090 if (!td) {
3091 ERR("out of memory\n");
3093 }
3094
3095 td->key = newfirstitem->key;
3096
3097 InsertHeadList(&t->paritem->list_entry, &td->list_entry);
3098
3099 td->ignore = false;
3100 td->inserted = true;
3101 td->treeholder.tree = nt;
3102 nt->paritem = td;
3103
3104 nt->parent->header.num_items++;
3105 nt->parent->size += sizeof(internal_node);
3106
3107 goto end;
3108 }
3109
3110 TRACE("adding new tree parent\n");
3111
3112 if (nt->header.level == 255) {
3113 ERR("cannot add parent to tree at level 255\n");
3114 return STATUS_INTERNAL_ERROR;
3115 }
3116
3118 if (!pt) {
3119 ERR("out of memory\n");
3121 }
3122
3124 if (!pt->nonpaged) {
3125 ERR("out of memory\n");
3126 ExFreePool(pt);
3128 }
3129
3130 ExInitializeFastMutex(&pt->nonpaged->mutex);
3131
3132 RtlCopyMemory(&pt->header, &nt->header, sizeof(tree_header));
3133 pt->header.address = 0;
3134 pt->header.num_items = 2;
3135 pt->header.level = nt->header.level + 1;
3137
3138 pt->has_address = false;
3139 pt->Vcb = Vcb;
3140 pt->parent = NULL;
3141 pt->paritem = NULL;
3142 pt->root = t->root;
3143 pt->new_address = 0;
3144 pt->has_new_address = false;
3145 pt->updated_extents = false;
3146 pt->size = pt->header.num_items * sizeof(internal_node);
3147 pt->uniqueness_determined = true;
3148 pt->is_unique = true;
3149 pt->list_entry_hash.Flink = NULL;
3150 pt->buf = NULL;
3151 InitializeListHead(&pt->itemlist);
3152
3153 InsertTailList(&Vcb->trees, &pt->list_entry);
3154
3155 td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside);
3156 if (!td) {
3157 ERR("out of memory\n");
3159 }
3160
3161 get_first_item(t, &td->key);
3162 td->ignore = false;
3163 td->inserted = false;
3164 td->treeholder.address = 0;
3165 td->treeholder.generation = Vcb->superblock.generation;
3166 td->treeholder.tree = t;
3167 InsertTailList(&pt->itemlist, &td->list_entry);
3168 t->paritem = td;
3169
3170 td = ExAllocateFromPagedLookasideList(&Vcb->tree_data_lookaside);
3171 if (!td) {
3172 ERR("out of memory\n");
3174 }
3175
3176 td->key = newfirstitem->key;
3177 td->ignore = false;
3178 td->inserted = false;
3179 td->treeholder.address = 0;
3180 td->treeholder.generation = Vcb->superblock.generation;
3181 td->treeholder.tree = nt;
3182 InsertTailList(&pt->itemlist, &td->list_entry);
3183 nt->paritem = td;
3184
3185 pt->write = true;
3186
3187 t->root->treeholder.tree = pt;
3188
3189 t->parent = pt;
3190 nt->parent = pt;
3191
3192#ifdef DEBUG_PARANOID
3193 if (t->parent && t->parent->header.level <= t->header.level) int3;
3194 if (nt->parent && nt->parent->header.level <= nt->header.level) int3;
3195#endif
3196
3197end:
3198 t->root->root_item.bytes_used += Vcb->superblock.node_size;
3199
3200 return STATUS_SUCCESS;
3201}
3202
3204 LIST_ENTRY* le;
3205 uint32_t size, ds, numitems;
3206
3207 size = 0;
3208 numitems = 0;
3209
3210 // FIXME - naïve implementation: maximizes number of filled trees
3211
3212 le = t->itemlist.Flink;
3213 while (le != &t->itemlist) {
3215
3216 if (!td->ignore) {
3217 if (t->header.level == 0)
3218 ds = sizeof(leaf_node) + td->size;
3219 else
3220 ds = sizeof(internal_node);
3221
3222 if (numitems == 0 && ds > Vcb->superblock.node_size - sizeof(tree_header)) {
3223 ERR("(%I64x,%x,%I64x) in tree %I64x is too large (%x > %Ix)\n",
3224 td->key.obj_id, td->key.obj_type, td->key.offset, t->root->id,
3225 ds, Vcb->superblock.node_size - sizeof(tree_header));
3226 return STATUS_INTERNAL_ERROR;
3227 }
3228
3229 // FIXME - move back if previous item was deleted item with same key
3230 if (size + ds > Vcb->superblock.node_size - sizeof(tree_header))
3231 return split_tree_at(Vcb, t, td, numitems, size);
3232
3233 size += ds;
3234 numitems++;
3235 }
3236
3237 le = le->Flink;
3238 }
3239
3240 return STATUS_SUCCESS;
3241}
3242
3244 KEY searchkey;
3247 bool ret = false;
3248 EXTENT_ITEM* ei;
3249 uint8_t* type;
3250
3251 if (t->uniqueness_determined)
3252 return t->is_unique;
3253
3254 if (t->parent && !is_tree_unique(Vcb, t->parent, Irp))
3255 goto end;
3256
3257 if (t->has_address) {
3258 searchkey.obj_id = t->header.address;
3259 searchkey.obj_type = Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA ? TYPE_METADATA_ITEM : TYPE_EXTENT_ITEM;
3260 searchkey.offset = 0xffffffffffffffff;
3261
3262 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
3263 if (!NT_SUCCESS(Status)) {
3264 ERR("error - find_item returned %08lx\n", Status);
3265 goto end;
3266 }
3267
3268 if (tp.item->key.obj_id != t->header.address || (tp.item->key.obj_type != TYPE_METADATA_ITEM && tp.item->key.obj_type != TYPE_EXTENT_ITEM))
3269 goto end;
3270
3271 if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->size == sizeof(EXTENT_ITEM_V0))
3272 goto end;
3273
3274 if (tp.item->size < sizeof(EXTENT_ITEM))
3275 goto end;
3276
3277 ei = (EXTENT_ITEM*)tp.item->data;
3278
3279 if (ei->refcount > 1)
3280 goto end;
3281
3283 EXTENT_ITEM2* ei2;
3284
3285 if (tp.item->size < sizeof(EXTENT_ITEM) + sizeof(EXTENT_ITEM2))
3286 goto end;
3287
3288 ei2 = (EXTENT_ITEM2*)&ei[1];
3289 type = (uint8_t*)&ei2[1];
3290 } else
3291 type = (uint8_t*)&ei[1];
3292
3293 if (type >= tp.item->data + tp.item->size || *type != TYPE_TREE_BLOCK_REF)
3294 goto end;
3295 }
3296
3297 ret = true;
3298
3299end:
3300 t->is_unique = ret;
3301 t->uniqueness_determined = true;
3302
3303 return ret;
3304}
3305
3306static NTSTATUS try_tree_amalgamate(device_extension* Vcb, tree* t, bool* done, bool* done_deletions, PIRP Irp, LIST_ENTRY* rollback) {
3307 LIST_ENTRY* le;
3308 tree_data* nextparitem = NULL;
3310 tree *next_tree, *par;
3311
3312 *done = false;
3313
3314 TRACE("trying to amalgamate tree in root %I64x, level %x (size %u)\n", t->root->id, t->header.level, t->size);
3315
3316 // FIXME - doesn't capture everything, as it doesn't ascend
3317 le = t->paritem->list_entry.Flink;
3318 while (le != &t->parent->itemlist) {
3320
3321 if (!td->ignore) {
3322 nextparitem = td;
3323 break;
3324 }
3325
3326 le = le->Flink;
3327 }
3328
3329 if (!nextparitem)
3330 return STATUS_SUCCESS;
3331
3332 TRACE("nextparitem: key = %I64x,%x,%I64x\n", nextparitem->key.obj_id, nextparitem->key.obj_type, nextparitem->key.offset);
3333
3334 if (!nextparitem->treeholder.tree) {
3335 Status = do_load_tree(Vcb, &nextparitem->treeholder, t->root, t->parent, nextparitem, NULL);
3336 if (!NT_SUCCESS(Status)) {
3337 ERR("do_load_tree returned %08lx\n", Status);
3338 return Status;
3339 }
3340 }
3341
3342 if (!is_tree_unique(Vcb, nextparitem->treeholder.tree, Irp))
3343 return STATUS_SUCCESS;
3344
3345 next_tree = nextparitem->treeholder.tree;
3346
3347 if (!next_tree->updated_extents && next_tree->has_address) {
3348 Status = update_tree_extents(Vcb, next_tree, Irp, rollback);
3349 if (!NT_SUCCESS(Status)) {
3350 ERR("update_tree_extents returned %08lx\n", Status);
3351 return Status;
3352 }
3353 }
3354
3355 if (t->size + next_tree->size <= Vcb->superblock.node_size - sizeof(tree_header)) {
3356 // merge two trees into one
3357
3358 t->header.num_items += next_tree->header.num_items;
3359 t->size += next_tree->size;
3360
3361 if (next_tree->header.level > 0) {
3362 le = next_tree->itemlist.Flink;
3363
3364 while (le != &next_tree->itemlist) {
3366
3367 if (td2->treeholder.tree) {
3368 td2->treeholder.tree->parent = t;
3369#ifdef DEBUG_PARANOID
3370 if (td2->treeholder.tree->parent && td2->treeholder.tree->parent->header.level <= td2->treeholder.tree->header.level) int3;
3371#endif
3372 }
3373
3374 td2->inserted = true;
3375 le = le->Flink;
3376 }
3377 } else {
3378 le = next_tree->itemlist.Flink;
3379
3380 while (le != &next_tree->itemlist) {
3382
3383 if (!td2->inserted && td2->data) {
3385
3386 if (!data) {
3387 ERR("out of memory\n");
3389 }
3390
3391 RtlCopyMemory(data, td2->data, td2->size);
3392 td2->data = data;
3393 td2->inserted = true;
3394 }
3395
3396 le = le->Flink;
3397 }
3398 }
3399
3400 t->itemlist.Blink->Flink = next_tree->itemlist.Flink;
3401 t->itemlist.Blink->Flink->Blink = t->itemlist.Blink;
3402 t->itemlist.Blink = next_tree->itemlist.Blink;
3403 t->itemlist.Blink->Flink = &t->itemlist;
3404
3405 next_tree->itemlist.Flink = next_tree->itemlist.Blink = &next_tree->itemlist;
3406
3407 next_tree->header.num_items = 0;
3408 next_tree->size = 0;
3409
3410 if (next_tree->has_new_address) { // delete associated EXTENT_ITEM
3411 Status = reduce_tree_extent(Vcb, next_tree->new_address, next_tree, next_tree->parent->header.tree_id, next_tree->header.level, Irp, rollback);
3412
3413 if (!NT_SUCCESS(Status)) {
3414 ERR("reduce_tree_extent returned %08lx\n", Status);
3415 return Status;
3416 }
3417 } else if (next_tree->has_address) {
3418 Status = reduce_tree_extent(Vcb, next_tree->header.address, next_tree, next_tree->parent->header.tree_id, next_tree->header.level, Irp, rollback);
3419
3420 if (!NT_SUCCESS(Status)) {
3421 ERR("reduce_tree_extent returned %08lx\n", Status);
3422 return Status;
3423 }
3424 }
3425
3426 if (!nextparitem->ignore) {
3427 nextparitem->ignore = true;
3428 next_tree->parent->header.num_items--;
3429 next_tree->parent->size -= sizeof(internal_node);
3430
3431 *done_deletions = true;
3432 }
3433
3434 par = next_tree->parent;
3435 while (par) {
3436 par->write = true;
3437 par = par->parent;
3438 }
3439
3440 RemoveEntryList(&nextparitem->list_entry);
3441 ExFreePool(next_tree->paritem);
3442 next_tree->paritem = NULL;
3443
3444 next_tree->root->root_item.bytes_used -= Vcb->superblock.node_size;
3445
3446 free_tree(next_tree);
3447
3448 *done = true;
3449 } else {
3450 // rebalance by moving items from second tree into first
3451 ULONG avg_size = (t->size + next_tree->size) / 2;
3452 KEY firstitem = {0, 0, 0};
3453 bool changed = false;
3454
3455 TRACE("attempting rebalance\n");
3456
3457 le = next_tree->itemlist.Flink;
3458 while (le != &next_tree->itemlist && t->size < avg_size && next_tree->header.num_items > 1) {
3460 ULONG size;
3461
3462 if (!td->ignore) {
3463 if (next_tree->header.level == 0)
3464 size = sizeof(leaf_node) + td->size;
3465 else
3466 size = sizeof(internal_node);
3467 } else
3468 size = 0;
3469
3470 if (t->size + size < Vcb->superblock.node_size - sizeof(tree_header)) {
3472 InsertTailList(&t->itemlist, &td->list_entry);
3473
3474 if (next_tree->header.level > 0 && td->treeholder.tree) {
3475 td->treeholder.tree->parent = t;
3476#ifdef DEBUG_PARANOID
3477 if (td->treeholder.tree->parent && td->treeholder.tree->parent->header.level <= td->treeholder.tree->header.level) int3;
3478#endif
3479 } else if (next_tree->header.level == 0 && !td->inserted && td->size > 0) {
3481
3482 if (!data) {
3483 ERR("out of memory\n");
3485 }
3486
3487 RtlCopyMemory(data, td->data, td->size);
3488 td->data = data;
3489 }
3490
3491 td->inserted = true;
3492
3493 if (!td->ignore) {
3494 next_tree->size -= size;
3495 t->size += size;
3496 next_tree->header.num_items--;
3497 t->header.num_items++;
3498 }
3499
3500 changed = true;
3501 } else
3502 break;
3503
3504 le = next_tree->itemlist.Flink;
3505 }
3506
3507 le = next_tree->itemlist.Flink;
3508 while (le != &next_tree->itemlist) {
3510
3511 if (!td->ignore) {
3512 firstitem = td->key;
3513 break;
3514 }
3515
3516 le = le->Flink;
3517 }
3518
3519 // FIXME - once ascension is working, make this work with parent's parent, etc.
3520 if (next_tree->paritem)
3521 next_tree->paritem->key = firstitem;
3522
3523 par = next_tree;
3524 while (par) {
3525 par->write = true;
3526 par = par->parent;
3527 }
3528
3529 if (changed)
3530 *done = true;
3531 }
3532
3533 return STATUS_SUCCESS;
3534}
3535
3537 KEY searchkey;
3540
3541 if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
3542 searchkey.obj_id = address;
3543 searchkey.obj_type = TYPE_METADATA_ITEM;
3544 searchkey.offset = t->header.level;
3545
3546 Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, false, Irp);
3547 if (!NT_SUCCESS(Status)) {
3548 ERR("error - find_item returned %08lx\n", Status);
3549 return Status;
3550 }
3551
3552 if (!keycmp(tp.item->key, searchkey)) {
3554
3555 if (tp.item->size > 0) {
3557
3558 if (!eism) {
3559 ERR("out of memory\n");
3561 }
3562
3563 RtlCopyMemory(eism, tp.item->data, tp.item->size);
3564 } else
3565 eism = NULL;
3566
3568 if (!NT_SUCCESS(Status)) {
3569 ERR("delete_tree_item returned %08lx\n", Status);
3570 if (eism) ExFreePool(eism);
3571 return Status;
3572 }