ReactOS  0.4.13-dev-100-gc8611ae
balance.c
Go to the documentation of this file.
1 /* Copyright (c) Mark Harmstone 2016-17
2  *
3  * This file is part of WinBtrfs.
4  *
5  * WinBtrfs is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public Licence as published by
7  * the Free Software Foundation, either version 3 of the Licence, or
8  * (at your option) any later version.
9  *
10  * WinBtrfs is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser General Public Licence for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public Licence
16  * along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
17 
18 #include "btrfs_drv.h"
19 #include "btrfsioctl.h"
20 #include <ntddstor.h>
21 
22 typedef struct {
27  tree* t;
32 
33 typedef struct {
36 
37  union {
40  };
41 
46 
47 typedef struct {
55 } data_reloc;
56 
57 typedef struct {
60 
61  union {
64  };
65 
69 
70 #ifndef __REACTOS__
71 #ifndef _MSC_VER // not in mingw yet
72 #define DEVICE_DSM_FLAG_TRIM_NOT_FS_ALLOCATED 0x80000000
73 #endif
74 #endif
75 
76 #define BALANCE_UNIT 0x100000 // only read 1 MB at a time
77 
79  BOOL skinny, metadata_reloc** mr2, chunk* c, LIST_ENTRY* rollback) {
81  metadata_reloc* mr;
82  EXTENT_ITEM* ei;
83  UINT16 len;
84  UINT64 inline_rc;
85  UINT8* ptr;
86 
88  if (!mr) {
89  ERR("out of memory\n");
91  }
92 
93  mr->address = tp->item->key.obj_id;
94  mr->data = NULL;
95  mr->ei = (EXTENT_ITEM*)tp->item->data;
96  mr->system = FALSE;
98 
100  if (!NT_SUCCESS(Status)) {
101  ERR("delete_tree_item returned %08x\n", Status);
102  ExFreePool(mr);
103  return Status;
104  }
105 
106  if (!c)
108 
109  if (c) {
111 
112  c->used -= Vcb->superblock.node_size;
113 
114  space_list_add(c, tp->item->key.obj_id, Vcb->superblock.node_size, rollback);
115 
117  }
118 
119  ei = (EXTENT_ITEM*)tp->item->data;
120  inline_rc = 0;
121 
122  len = tp->item->size - sizeof(EXTENT_ITEM);
123  ptr = (UINT8*)tp->item->data + sizeof(EXTENT_ITEM);
124  if (!skinny) {
125  len -= sizeof(EXTENT_ITEM2);
126  ptr += sizeof(EXTENT_ITEM2);
127  }
128 
129  while (len > 0) {
130  UINT8 secttype = *ptr;
131  UINT16 sectlen = secttype == TYPE_TREE_BLOCK_REF ? sizeof(TREE_BLOCK_REF) : (secttype == TYPE_SHARED_BLOCK_REF ? sizeof(SHARED_BLOCK_REF) : 0);
133 
134  len--;
135 
136  if (sectlen > len) {
137  ERR("(%llx,%x,%llx): %x bytes left, expecting at least %x\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset, len, sectlen);
138  return STATUS_INTERNAL_ERROR;
139  }
140 
141  if (sectlen == 0) {
142  ERR("(%llx,%x,%llx): unrecognized extent type %x\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset, secttype);
143  return STATUS_INTERNAL_ERROR;
144  }
145 
147  if (!ref) {
148  ERR("out of memory\n");
150  }
151 
152  if (secttype == TYPE_TREE_BLOCK_REF) {
153  ref->type = TYPE_TREE_BLOCK_REF;
154  RtlCopyMemory(&ref->tbr, ptr + sizeof(UINT8), sizeof(TREE_BLOCK_REF));
155  inline_rc++;
156  } else if (secttype == TYPE_SHARED_BLOCK_REF) {
157  ref->type = TYPE_SHARED_BLOCK_REF;
158  RtlCopyMemory(&ref->sbr, ptr + sizeof(UINT8), sizeof(SHARED_BLOCK_REF));
159  inline_rc++;
160  } else {
161  ERR("unexpected tree type %x\n", secttype);
162  ExFreePool(ref);
163  return STATUS_INTERNAL_ERROR;
164  }
165 
166  ref->parent = NULL;
167  ref->top = FALSE;
169 
170  len -= sectlen;
171  ptr += sizeof(UINT8) + sectlen;
172  }
173 
174  if (inline_rc < ei->refcount) { // look for non-inline entries
175  traverse_ptr tp2 = *tp, next_tp;
176 
177  while (find_next_item(Vcb, &tp2, &next_tp, FALSE, NULL)) {
178  tp2 = next_tp;
179 
180  if (tp2.item->key.obj_id == tp->item->key.obj_id) {
181  if (tp2.item->key.obj_type == TYPE_TREE_BLOCK_REF) {
183  if (!ref) {
184  ERR("out of memory\n");
186  }
187 
188  ref->type = TYPE_TREE_BLOCK_REF;
189  ref->tbr.offset = tp2.item->key.offset;
190  ref->parent = NULL;
191  ref->top = FALSE;
193 
194  Status = delete_tree_item(Vcb, &tp2);
195  if (!NT_SUCCESS(Status)) {
196  ERR("delete_tree_item returned %08x\n", Status);
197  return Status;
198  }
199  } else if (tp2.item->key.obj_type == TYPE_SHARED_BLOCK_REF) {
201  if (!ref) {
202  ERR("out of memory\n");
204  }
205 
206  ref->type = TYPE_SHARED_BLOCK_REF;
207  ref->sbr.offset = tp2.item->key.offset;
208  ref->parent = NULL;
209  ref->top = FALSE;
211 
212  Status = delete_tree_item(Vcb, &tp2);
213  if (!NT_SUCCESS(Status)) {
214  ERR("delete_tree_item returned %08x\n", Status);
215  return Status;
216  }
217  }
218  } else
219  break;
220  }
221  }
222 
224 
225  if (mr2)
226  *mr2 = mr;
227 
228  return STATUS_SUCCESS;
229 }
230 
233  LIST_ENTRY* le;
234  KEY searchkey;
236  BOOL skinny = FALSE;
238 
239  le = items->Flink;
240  while (le != items) {
242 
243  if (mr->address == address) {
244  *mr2 = mr;
245  return STATUS_SUCCESS;
246  }
247 
248  le = le->Flink;
249  }
250 
251  searchkey.obj_id = address;
252  searchkey.obj_type = TYPE_METADATA_ITEM;
253  searchkey.offset = 0xffffffffffffffff;
254 
255  Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, NULL);
256  if (!NT_SUCCESS(Status)) {
257  ERR("find_item returned %08x\n", Status);
258  return Status;
259  }
260 
262  skinny = TRUE;
263  else if (tp.item->key.obj_id == address && tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->key.offset == Vcb->superblock.node_size &&
264  tp.item->size >= sizeof(EXTENT_ITEM)) {
266 
267  if (!(ei->flags & EXTENT_ITEM_TREE_BLOCK)) {
268  ERR("EXTENT_ITEM for %llx found, but tree flag not set\n", address);
269  return STATUS_INTERNAL_ERROR;
270  }
271  } else {
272  ERR("could not find valid EXTENT_ITEM for address %llx\n", address);
273  return STATUS_INTERNAL_ERROR;
274  }
275 
276  Status = add_metadata_reloc(Vcb, items, &tp, skinny, mr2, NULL, rollback);
277  if (!NT_SUCCESS(Status)) {
278  ERR("add_metadata_reloc returned %08x\n", Status);
279  return Status;
280  }
281 
282  return STATUS_SUCCESS;
283 }
284 
286  LIST_ENTRY newlist, *le;
287 
288  if (mr->refs.Flink == mr->refs.Blink) // 0 or 1 items
289  return;
290 
291  // insertion sort
292 
293  InitializeListHead(&newlist);
294 
295  while (!IsListEmpty(&mr->refs)) {
297  BOOL inserted = FALSE;
298 
299  if (ref->type == TYPE_TREE_BLOCK_REF)
300  ref->hash = ref->tbr.offset;
301  else if (ref->type == TYPE_SHARED_BLOCK_REF)
302  ref->hash = ref->parent->new_address;
303 
304  le = newlist.Flink;
305  while (le != &newlist) {
307 
308  if (ref->type < ref2->type || (ref->type == ref2->type && ref->hash > ref2->hash)) {
310  inserted = TRUE;
311  break;
312  }
313 
314  le = le->Flink;
315  }
316 
317  if (!inserted)
318  InsertTailList(&newlist, &ref->list_entry);
319  }
320 
321  newlist.Flink->Blink = &mr->refs;
322  newlist.Blink->Flink = &mr->refs;
323  mr->refs.Flink = newlist.Flink;
324  mr->refs.Blink = newlist.Blink;
325 }
326 
329  LIST_ENTRY* le;
330  UINT64 rc = 0;
331  UINT16 inline_len;
332  BOOL all_inline = TRUE;
333  metadata_reloc_ref* first_noninline = NULL;
334  EXTENT_ITEM* ei;
335  UINT8* ptr;
336 
337  inline_len = sizeof(EXTENT_ITEM);
338  if (!(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA))
339  inline_len += sizeof(EXTENT_ITEM2);
340 
342 
343  le = mr->refs.Flink;
344  while (le != &mr->refs) {
346  UINT16 extlen = 0;
347 
348  rc++;
349 
350  if (ref->type == TYPE_TREE_BLOCK_REF)
351  extlen += sizeof(TREE_BLOCK_REF);
352  else if (ref->type == TYPE_SHARED_BLOCK_REF)
353  extlen += sizeof(SHARED_BLOCK_REF);
354 
355  if (all_inline) {
356  if ((ULONG)(inline_len + 1 + extlen) > (Vcb->superblock.node_size >> 2)) {
357  all_inline = FALSE;
358  first_noninline = ref;
359  } else
360  inline_len += extlen + 1;
361  }
362 
363  le = le->Flink;
364  }
365 
366  ei = ExAllocatePoolWithTag(PagedPool, inline_len, ALLOC_TAG);
367  if (!ei) {
368  ERR("out of memory\n");
370  }
371 
372  ei->refcount = rc;
373  ei->generation = mr->ei->generation;
374  ei->flags = mr->ei->flags;
375  ptr = (UINT8*)&ei[1];
376 
377  if (!(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)) {
378  EXTENT_ITEM2* ei2 = (EXTENT_ITEM2*)ptr;
379 
380  ei2->firstitem = *(KEY*)&mr->data[1];
381  ei2->level = mr->data->level;
382 
383  ptr += sizeof(EXTENT_ITEM2);
384  }
385 
386  le = mr->refs.Flink;
387  while (le != &mr->refs) {
389 
390  if (ref == first_noninline)
391  break;
392 
393  *ptr = ref->type;
394  ptr++;
395 
396  if (ref->type == TYPE_TREE_BLOCK_REF) {
398 
399  tbr->offset = ref->tbr.offset;
400 
401  ptr += sizeof(TREE_BLOCK_REF);
402  } else if (ref->type == TYPE_SHARED_BLOCK_REF) {
404 
405  sbr->offset = ref->parent->new_address;
406 
407  ptr += sizeof(SHARED_BLOCK_REF);
408  }
409 
410  le = le->Flink;
411  }
412 
413  if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)
414  Status = insert_tree_item(Vcb, Vcb->extent_root, mr->new_address, TYPE_METADATA_ITEM, mr->data->level, ei, inline_len, NULL, NULL);
415  else
416  Status = insert_tree_item(Vcb, Vcb->extent_root, mr->new_address, TYPE_EXTENT_ITEM, Vcb->superblock.node_size, ei, inline_len, NULL, NULL);
417 
418  if (!NT_SUCCESS(Status)) {
419  ERR("insert_tree_item returned %08x\n", Status);
420  ExFreePool(ei);
421  return Status;
422  }
423 
424  if (!all_inline) {
425  le = &first_noninline->list_entry;
426 
427  while (le != &mr->refs) {
429 
430  if (ref->type == TYPE_TREE_BLOCK_REF) {
431  Status = insert_tree_item(Vcb, Vcb->extent_root, mr->new_address, TYPE_TREE_BLOCK_REF, ref->tbr.offset, NULL, 0, NULL, NULL);
432  if (!NT_SUCCESS(Status)) {
433  ERR("insert_tree_item returned %08x\n", Status);
434  return Status;
435  }
436  } else if (ref->type == TYPE_SHARED_BLOCK_REF) {
437  Status = insert_tree_item(Vcb, Vcb->extent_root, mr->new_address, TYPE_SHARED_BLOCK_REF, ref->parent->new_address, NULL, 0, NULL, NULL);
438  if (!NT_SUCCESS(Status)) {
439  ERR("insert_tree_item returned %08x\n", Status);
440  return Status;
441  }
442  }
443 
444  le = le->Flink;
445  }
446  }
447 
449  if (mr->data->level > 0) {
450  UINT16 i;
451  internal_node* in = (internal_node*)&mr->data[1];
452 
453  for (i = 0; i < mr->data->num_items; i++) {
455 
456  if (sbrrc > 0) {
457  SHARED_BLOCK_REF sbr;
458 
459  sbr.offset = mr->new_address;
460 
461  Status = increase_extent_refcount(Vcb, in[i].address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0, NULL);
462  if (!NT_SUCCESS(Status)) {
463  ERR("increase_extent_refcount returned %08x\n", Status);
464  return Status;
465  }
466 
467  sbr.offset = mr->address;
468 
469  Status = decrease_extent_refcount(Vcb, in[i].address, Vcb->superblock.node_size, TYPE_SHARED_BLOCK_REF, &sbr, NULL, 0,
470  sbr.offset, FALSE, NULL);
471  if (!NT_SUCCESS(Status)) {
472  ERR("decrease_extent_refcount returned %08x\n", Status);
473  return Status;
474  }
475  }
476  }
477  } else {
478  UINT16 i;
479  leaf_node* ln = (leaf_node*)&mr->data[1];
480 
481  for (i = 0; i < mr->data->num_items; i++) {
482  if (ln[i].key.obj_type == TYPE_EXTENT_DATA && ln[i].size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
483  EXTENT_DATA* ed = (EXTENT_DATA*)((UINT8*)mr->data + sizeof(tree_header) + ln[i].offset);
484 
487 
488  if (ed2->size > 0) { // not sparse
490 
491  if (sdrrc > 0) {
492  SHARED_DATA_REF sdr;
493  chunk* c;
494 
495  sdr.offset = mr->new_address;
496  sdr.count = sdrrc;
497 
499  if (!NT_SUCCESS(Status)) {
500  ERR("increase_extent_refcount returned %08x\n", Status);
501  return Status;
502  }
503 
504  sdr.offset = mr->address;
505 
507  sdr.offset, FALSE, NULL);
508  if (!NT_SUCCESS(Status)) {
509  ERR("decrease_extent_refcount returned %08x\n", Status);
510  return Status;
511  }
512 
514 
515  if (c) {
516  // check changed_extents
517 
518  ExAcquireResourceExclusiveLite(&c->changed_extents_lock, TRUE);
519 
520  le = c->changed_extents.Flink;
521 
522  while (le != &c->changed_extents) {
524 
525  if (ce->address == ed2->address) {
526  LIST_ENTRY* le2;
527 
528  le2 = ce->refs.Flink;
529  while (le2 != &ce->refs) {
531 
532  if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == mr->address) {
533  cer->sdr.offset = mr->new_address;
534  break;
535  }
536 
537  le2 = le2->Flink;
538  }
539 
540  le2 = ce->old_refs.Flink;
541  while (le2 != &ce->old_refs) {
543 
544  if (cer->type == TYPE_SHARED_DATA_REF && cer->sdr.offset == mr->address) {
545  cer->sdr.offset = mr->new_address;
546  break;
547  }
548 
549  le2 = le2->Flink;
550  }
551 
552  break;
553  }
554 
555  le = le->Flink;
556  }
557 
558  ExReleaseResourceLite(&c->changed_extents_lock);
559  }
560  }
561  }
562  }
563  }
564  }
565  }
566  }
567 
568  return STATUS_SUCCESS;
569 }
570 
572  LIST_ENTRY* data_items, chunk* c, LIST_ENTRY* rollback) {
573  LIST_ENTRY tree_writes, *le;
576  UINT8 level, max_level = 0;
577  chunk* newchunk = NULL;
578 
579  InitializeListHead(&tree_writes);
580 
581  le = items->Flink;
582  while (le != items) {
584  LIST_ENTRY* le2;
585  chunk* pc;
586 
587  mr->data = ExAllocatePoolWithTag(PagedPool, Vcb->superblock.node_size, ALLOC_TAG);
588  if (!mr->data) {
589  ERR("out of memory\n");
591  }
592 
593  Status = read_data(Vcb, mr->address, Vcb->superblock.node_size, NULL, TRUE, (UINT8*)mr->data,
594  c && mr->address >= c->offset && mr->address < c->offset + c->chunk_item->size ? c : NULL, &pc, NULL, 0, FALSE, NormalPagePriority);
595  if (!NT_SUCCESS(Status)) {
596  ERR("read_data returned %08x\n", Status);
597  return Status;
598  }
599 
600  if (pc->chunk_item->type & BLOCK_FLAG_SYSTEM)
601  mr->system = TRUE;
602 
603  if (data_items && mr->data->level == 0) {
604  le2 = data_items->Flink;
605  while (le2 != data_items) {
607  leaf_node* ln = (leaf_node*)&mr->data[1];
608  UINT16 i;
609 
610  for (i = 0; i < mr->data->num_items; i++) {
611  if (ln[i].key.obj_type == TYPE_EXTENT_DATA && ln[i].size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
612  EXTENT_DATA* ed = (EXTENT_DATA*)((UINT8*)mr->data + sizeof(tree_header) + ln[i].offset);
613 
616 
617  if (ed2->address == dr->address)
618  ed2->address = dr->new_address;
619  }
620  }
621  }
622 
623  le2 = le2->Flink;
624  }
625  }
626 
627  if (mr->data->level > max_level)
628  max_level = mr->data->level;
629 
630  le2 = mr->refs.Flink;
631  while (le2 != &mr->refs) {
633 
634  if (ref->type == TYPE_TREE_BLOCK_REF) {
635  KEY* firstitem;
636  root* r = NULL;
637  LIST_ENTRY* le3;
638  tree* t;
639 
640  firstitem = (KEY*)&mr->data[1];
641 
642  le3 = Vcb->roots.Flink;
643  while (le3 != &Vcb->roots) {
645 
646  if (r2->id == ref->tbr.offset) {
647  r = r2;
648  break;
649  }
650 
651  le3 = le3->Flink;
652  }
653 
654  if (!r) {
655  ERR("could not find subvol with id %llx\n", ref->tbr.offset);
656  return STATUS_INTERNAL_ERROR;
657  }
658 
659  Status = find_item_to_level(Vcb, r, &tp, firstitem, FALSE, mr->data->level + 1, NULL);
661  ERR("find_item_to_level returned %08x\n", Status);
662  return Status;
663  }
664 
665  t = tp.tree;
666  while (t && t->header.level < mr->data->level + 1) {
667  t = t->parent;
668  }
669 
670  if (!t)
671  ref->top = TRUE;
672  else {
673  metadata_reloc* mr2;
674 
675  Status = add_metadata_reloc_parent(Vcb, items, t->header.address, &mr2, rollback);
676  if (!NT_SUCCESS(Status)) {
677  ERR("add_metadata_reloc_parent returned %08x\n", Status);
678  return Status;
679  }
680 
681  ref->parent = mr2;
682  }
683  } else if (ref->type == TYPE_SHARED_BLOCK_REF) {
684  metadata_reloc* mr2;
685 
686  Status = add_metadata_reloc_parent(Vcb, items, ref->sbr.offset, &mr2, rollback);
687  if (!NT_SUCCESS(Status)) {
688  ERR("add_metadata_reloc_parent returned %08x\n", Status);
689  return Status;
690  }
691 
692  ref->parent = mr2;
693  }
694 
695  le2 = le2->Flink;
696  }
697 
698  le = le->Flink;
699  }
700 
701  le = items->Flink;
702  while (le != items) {
704  LIST_ENTRY* le2;
705  UINT32 hash;
706 
707  mr->t = NULL;
708 
709  hash = calc_crc32c(0xffffffff, (UINT8*)&mr->address, sizeof(UINT64));
710 
711  le2 = Vcb->trees_ptrs[hash >> 24];
712 
713  if (le2) {
714  while (le2 != &Vcb->trees_hash) {
715  tree* t = CONTAINING_RECORD(le2, tree, list_entry_hash);
716 
717  if (t->header.address == mr->address) {
718  mr->t = t;
719  break;
720  } else if (t->hash > hash)
721  break;
722 
723  le2 = le2->Flink;
724  }
725  }
726 
727  le = le->Flink;
728  }
729 
730  for (level = 0; level <= max_level; level++) {
731  le = items->Flink;
732  while (le != items) {
734 
735  if (mr->data->level == level) {
736  BOOL done = FALSE;
737  LIST_ENTRY* le2;
738  tree_write* tw;
739  UINT64 flags;
740  tree* t3;
741 
742  if (mr->system)
743  flags = Vcb->system_flags;
744  else if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS)
745  flags = Vcb->data_flags;
746  else
747  flags = Vcb->metadata_flags;
748 
749  if (newchunk) {
750  acquire_chunk_lock(newchunk, Vcb);
751 
752  if (newchunk->chunk_item->type == flags && find_metadata_address_in_chunk(Vcb, newchunk, &mr->new_address)) {
753  newchunk->used += Vcb->superblock.node_size;
754  space_list_subtract(newchunk, FALSE, mr->new_address, Vcb->superblock.node_size, rollback);
755  done = TRUE;
756  }
757 
758  release_chunk_lock(newchunk, Vcb);
759  }
760 
761  if (!done) {
762  ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE);
763 
764  le2 = Vcb->chunks.Flink;
765  while (le2 != &Vcb->chunks) {
767 
768  if (!c2->readonly && !c2->reloc && c2 != newchunk && c2->chunk_item->type == flags) {
769  acquire_chunk_lock(c2, Vcb);
770 
771  if ((c2->chunk_item->size - c2->used) >= Vcb->superblock.node_size) {
773  c2->used += Vcb->superblock.node_size;
774  space_list_subtract(c2, FALSE, mr->new_address, Vcb->superblock.node_size, rollback);
775  release_chunk_lock(c2, Vcb);
776  newchunk = c2;
777  done = TRUE;
778  break;
779  }
780  }
781 
782  release_chunk_lock(c2, Vcb);
783  }
784 
785  le2 = le2->Flink;
786  }
787 
788  // allocate new chunk if necessary
789  if (!done) {
790  Status = alloc_chunk(Vcb, flags, &newchunk, FALSE);
791 
792  if (!NT_SUCCESS(Status)) {
793  ERR("alloc_chunk returned %08x\n", Status);
794  ExReleaseResourceLite(&Vcb->chunk_lock);
795  goto end;
796  }
797 
798  acquire_chunk_lock(newchunk, Vcb);
799 
800  newchunk->balance_num = Vcb->balance.balance_num;
801 
802  if (!find_metadata_address_in_chunk(Vcb, newchunk, &mr->new_address)) {
803  release_chunk_lock(newchunk, Vcb);
804  ExReleaseResourceLite(&Vcb->chunk_lock);
805  ERR("could not find address in new chunk\n");
807  goto end;
808  } else {
809  newchunk->used += Vcb->superblock.node_size;
810  space_list_subtract(newchunk, FALSE, mr->new_address, Vcb->superblock.node_size, rollback);
811  }
812 
813  release_chunk_lock(newchunk, Vcb);
814  }
815 
816  ExReleaseResourceLite(&Vcb->chunk_lock);
817  }
818 
819  // update parents
820  le2 = mr->refs.Flink;
821  while (le2 != &mr->refs) {
823 
824  if (ref->parent) {
825  UINT16 i;
826  internal_node* in = (internal_node*)&ref->parent->data[1];
827 
828  for (i = 0; i < ref->parent->data->num_items; i++) {
829  if (in[i].address == mr->address) {
830  in[i].address = mr->new_address;
831  break;
832  }
833  }
834 
835  if (ref->parent->t) {
836  LIST_ENTRY* le3;
837 
838  le3 = ref->parent->t->itemlist.Flink;
839  while (le3 != &ref->parent->t->itemlist) {
841 
842  if (!td->inserted && td->treeholder.address == mr->address)
843  td->treeholder.address = mr->new_address;
844 
845  le3 = le3->Flink;
846  }
847  }
848  } else if (ref->top && ref->type == TYPE_TREE_BLOCK_REF) {
849  LIST_ENTRY* le3;
850  root* r = NULL;
851 
852  // alter ROOT_ITEM
853 
854  le3 = Vcb->roots.Flink;
855  while (le3 != &Vcb->roots) {
857 
858  if (r2->id == ref->tbr.offset) {
859  r = r2;
860  break;
861  }
862 
863  le3 = le3->Flink;
864  }
865 
866  if (r) {
867  r->treeholder.address = mr->new_address;
868 
869  if (r == Vcb->root_root)
870  Vcb->superblock.root_tree_addr = mr->new_address;
871  else if (r == Vcb->chunk_root)
872  Vcb->superblock.chunk_tree_addr = mr->new_address;
873  else if (r->root_item.block_number == mr->address) {
874  KEY searchkey;
875  ROOT_ITEM* ri;
876 
877  r->root_item.block_number = mr->new_address;
878 
879  searchkey.obj_id = r->id;
880  searchkey.obj_type = TYPE_ROOT_ITEM;
881  searchkey.offset = 0xffffffffffffffff;
882 
883  Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, NULL);
884  if (!NT_SUCCESS(Status)) {
885  ERR("find_item returned %08x\n", Status);
886  goto end;
887  }
888 
889  if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
890  ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
892  goto end;
893  }
894 
896  if (!ri) {
897  ERR("out of memory\n");
899  goto end;
900  }
901 
902  RtlCopyMemory(ri, &r->root_item, sizeof(ROOT_ITEM));
903 
905  if (!NT_SUCCESS(Status)) {
906  ERR("delete_tree_item returned %08x\n", Status);
907  goto end;
908  }
909 
910  Status = insert_tree_item(Vcb, Vcb->root_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, ri, sizeof(ROOT_ITEM), NULL, NULL);
911  if (!NT_SUCCESS(Status)) {
912  ERR("insert_tree_item returned %08x\n", Status);
913  goto end;
914  }
915  }
916  }
917  }
918 
919  le2 = le2->Flink;
920  }
921 
922  mr->data->address = mr->new_address;
923 
924  t3 = mr->t;
925 
926  while (t3) {
927  UINT8 h;
928  BOOL inserted;
929  tree* t4 = NULL;
930 
931  // check if tree loaded more than once
932  if (t3->list_entry.Flink != &Vcb->trees_hash) {
933  tree* nt = CONTAINING_RECORD(t3->list_entry_hash.Flink, tree, list_entry_hash);
934 
935  if (nt->header.address == t3->header.address)
936  t4 = nt;
937  }
938 
939  t3->header.address = mr->new_address;
940 
941  h = t3->hash >> 24;
942 
943  if (Vcb->trees_ptrs[h] == &t3->list_entry_hash) {
944  if (t3->list_entry_hash.Flink == &Vcb->trees_hash)
945  Vcb->trees_ptrs[h] = NULL;
946  else {
947  tree* t2 = CONTAINING_RECORD(t3->list_entry_hash.Flink, tree, list_entry_hash);
948 
949  if (t2->hash >> 24 == h)
950  Vcb->trees_ptrs[h] = &t2->list_entry_hash;
951  else
952  Vcb->trees_ptrs[h] = NULL;
953  }
954  }
955 
957 
958  t3->hash = calc_crc32c(0xffffffff, (UINT8*)&t3->header.address, sizeof(UINT64));
959  h = t3->hash >> 24;
960 
961  if (!Vcb->trees_ptrs[h]) {
962  UINT8 h2 = h;
963 
964  le2 = Vcb->trees_hash.Flink;
965 
966  if (h2 > 0) {
967  h2--;
968  do {
969  if (Vcb->trees_ptrs[h2]) {
970  le2 = Vcb->trees_ptrs[h2];
971  break;
972  }
973 
974  h2--;
975  } while (h2 > 0);
976  }
977  } else
978  le2 = Vcb->trees_ptrs[h];
979 
980  inserted = FALSE;
981  while (le2 != &Vcb->trees_hash) {
982  tree* t2 = CONTAINING_RECORD(le2, tree, list_entry_hash);
983 
984  if (t2->hash >= t3->hash) {
986  inserted = TRUE;
987  break;
988  }
989 
990  le2 = le2->Flink;
991  }
992 
993  if (!inserted)
994  InsertTailList(&Vcb->trees_hash, &t3->list_entry_hash);
995 
996  if (!Vcb->trees_ptrs[h] || t3->list_entry_hash.Flink == Vcb->trees_ptrs[h])
997  Vcb->trees_ptrs[h] = &t3->list_entry_hash;
998 
999  if (data_items && level == 0) {
1000  le2 = data_items->Flink;
1001 
1002  while (le2 != data_items) {
1004  LIST_ENTRY* le3 = t3->itemlist.Flink;
1005 
1006  while (le3 != &t3->itemlist) {
1008 
1009  if (!td->inserted && td->key.obj_type == TYPE_EXTENT_DATA && td->size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
1010  EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
1011 
1014 
1015  if (ed2->address == dr->address)
1016  ed2->address = dr->new_address;
1017  }
1018  }
1019 
1020  le3 = le3->Flink;
1021  }
1022 
1023  le2 = le2->Flink;
1024  }
1025  }
1026 
1027  t3 = t4;
1028  }
1029 
1030  *((UINT32*)mr->data) = ~calc_crc32c(0xffffffff, (UINT8*)&mr->data->fs_uuid, Vcb->superblock.node_size - sizeof(mr->data->csum));
1031 
1033  if (!tw) {
1034  ERR("out of memory\n");
1036  goto end;
1037  }
1038 
1039  tw->address = mr->new_address;
1040  tw->length = Vcb->superblock.node_size;
1041  tw->data = (UINT8*)mr->data;
1042 
1043  if (IsListEmpty(&tree_writes))
1044  InsertTailList(&tree_writes, &tw->list_entry);
1045  else {
1046  BOOL inserted = FALSE;
1047 
1048  le2 = tree_writes.Flink;
1049  while (le2 != &tree_writes) {
1051 
1052  if (tw2->address > tw->address) {
1053  InsertHeadList(le2->Blink, &tw->list_entry);
1054  inserted = TRUE;
1055  break;
1056  }
1057 
1058  le2 = le2->Flink;
1059  }
1060 
1061  if (!inserted)
1062  InsertTailList(&tree_writes, &tw->list_entry);
1063  }
1064  }
1065 
1066  le = le->Flink;
1067  }
1068  }
1069 
1070  Status = do_tree_writes(Vcb, &tree_writes, TRUE);
1071  if (!NT_SUCCESS(Status)) {
1072  ERR("do_tree_writes returned %08x\n", Status);
1073  goto end;
1074  }
1075 
1076  le = items->Flink;
1077  while (le != items) {
1079 
1081  if (!NT_SUCCESS(Status)) {
1082  ERR("add_metadata_reloc_extent_item returned %08x\n", Status);
1083  goto end;
1084  }
1085 
1086  le = le->Flink;
1087  }
1088 
1090 
1091 end:
1092  while (!IsListEmpty(&tree_writes)) {
1094  ExFreePool(tw);
1095  }
1096 
1097  return Status;
1098 }
1099 
1101  KEY searchkey;
1102  traverse_ptr tp;
1103  NTSTATUS Status;
1104  BOOL b;
1106  UINT32 loaded = 0;
1107 
1108  TRACE("chunk %llx\n", c->offset);
1109 
1112 
1113  ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
1114 
1115  searchkey.obj_id = c->offset;
1116  searchkey.obj_type = TYPE_METADATA_ITEM;
1117  searchkey.offset = 0xffffffffffffffff;
1118 
1119  Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, NULL);
1120  if (!NT_SUCCESS(Status)) {
1121  ERR("find_item returned %08x\n", Status);
1122  goto end;
1123  }
1124 
1125  do {
1126  traverse_ptr next_tp;
1127 
1128  if (tp.item->key.obj_id >= c->offset + c->chunk_item->size)
1129  break;
1130 
1131  if (tp.item->key.obj_id >= c->offset && (tp.item->key.obj_type == TYPE_EXTENT_ITEM || tp.item->key.obj_type == TYPE_METADATA_ITEM)) {
1132  BOOL tree = FALSE, skinny = FALSE;
1133 
1134  if (tp.item->key.obj_type == TYPE_METADATA_ITEM && tp.item->size >= sizeof(EXTENT_ITEM)) {
1135  tree = TRUE;
1136  skinny = TRUE;
1137  } else if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->key.offset == Vcb->superblock.node_size &&
1138  tp.item->size >= sizeof(EXTENT_ITEM)) {
1139  EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data;
1140 
1142  tree = TRUE;
1143  }
1144 
1145  if (tree) {
1146  Status = add_metadata_reloc(Vcb, &items, &tp, skinny, NULL, c, &rollback);
1147 
1148  if (!NT_SUCCESS(Status)) {
1149  ERR("add_metadata_reloc returned %08x\n", Status);
1150  goto end;
1151  }
1152 
1153  loaded++;
1154 
1155  if (loaded >= 64) // only do 64 at a time
1156  break;
1157  }
1158  }
1159 
1160  b = find_next_item(Vcb, &tp, &next_tp, FALSE, NULL);
1161 
1162  if (b)
1163  tp = next_tp;
1164  } while (b);
1165 
1166  if (IsListEmpty(&items)) {
1167  *changed = FALSE;
1169  goto end;
1170  } else
1171  *changed = TRUE;
1172 
1174  if (!NT_SUCCESS(Status)) {
1175  ERR("write_metadata_items returned %08x\n", Status);
1176  goto end;
1177  }
1178 
1180 
1181  Vcb->need_write = TRUE;
1182 
1183 end:
1184  if (NT_SUCCESS(Status)) {
1185  Status = do_write(Vcb, NULL);
1186  if (!NT_SUCCESS(Status))
1187  ERR("do_write returned %08x\n", Status);
1188  }
1189 
1190  if (NT_SUCCESS(Status))
1192  else
1194 
1195  free_trees(Vcb);
1196 
1197  ExReleaseResourceLite(&Vcb->tree_lock);
1198 
1199  while (!IsListEmpty(&items)) {
1201 
1202  while (!IsListEmpty(&mr->refs)) {
1204 
1205  ExFreePool(ref);
1206  }
1207 
1208  ExFreePool(mr);
1209  }
1210 
1211  return Status;
1212 }
1213 
1216  NTSTATUS Status;
1217  LIST_ENTRY* le;
1218  KEY searchkey;
1219  traverse_ptr tp;
1220  root* r = NULL;
1221  metadata_reloc* mr;
1222  UINT64 last_tree = 0;
1224 
1225  le = Vcb->roots.Flink;
1226  while (le != &Vcb->roots) {
1228 
1229  if (r2->id == edr->root) {
1230  r = r2;
1231  break;
1232  }
1233 
1234  le = le->Flink;
1235  }
1236 
1237  if (!r) {
1238  ERR("could not find subvol %llx\n", edr->count);
1239  return STATUS_INTERNAL_ERROR;
1240  }
1241 
1242  searchkey.obj_id = edr->objid;
1243  searchkey.obj_type = TYPE_EXTENT_DATA;
1244  searchkey.offset = 0;
1245 
1246  Status = find_item(Vcb, r, &tp, &searchkey, FALSE, NULL);
1247  if (!NT_SUCCESS(Status)) {
1248  ERR("find_item returned %08x\n", Status);
1249  return Status;
1250  }
1251 
1252  if (tp.item->key.obj_id < searchkey.obj_id || (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type < searchkey.obj_type)) {
1253  traverse_ptr tp2;
1254 
1255  if (find_next_item(Vcb, &tp, &tp2, FALSE, NULL))
1256  tp = tp2;
1257  else {
1258  ERR("could not find EXTENT_DATA for inode %llx in root %llx\n", searchkey.obj_id, r->id);
1259  return STATUS_INTERNAL_ERROR;
1260  }
1261  }
1262 
1263  ref = NULL;
1264 
1265  while (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
1266  traverse_ptr tp2;
1267 
1268  if (tp.item->size >= sizeof(EXTENT_DATA)) {
1270 
1273 
1274  if (ed2->address == dr->address && ed2->size == dr->size && tp.item->key.offset - ed2->offset == edr->offset) {
1275  if (ref && last_tree == tp.tree->header.address)
1276  ref->edr.count++;
1277  else {
1279  if (!ref) {
1280  ERR("out of memory\n");
1282  }
1283 
1284  ref->type = TYPE_EXTENT_DATA_REF;
1285  RtlCopyMemory(&ref->edr, edr, sizeof(EXTENT_DATA_REF));
1286  ref->edr.count = 1;
1287 
1288  Status = add_metadata_reloc_parent(Vcb, metadata_items, tp.tree->header.address, &mr, rollback);
1289  if (!NT_SUCCESS(Status)) {
1290  ERR("add_metadata_reloc_parent returned %08x\n", Status);
1291  ExFreePool(ref);
1292  return Status;
1293  }
1294 
1295  last_tree = tp.tree->header.address;
1296  ref->parent = mr;
1297 
1298  InsertTailList(&dr->refs, &ref->list_entry);
1299  }
1300  }
1301  }
1302  }
1303 
1304  if (find_next_item(Vcb, &tp, &tp2, FALSE, NULL))
1305  tp = tp2;
1306  else
1307  break;
1308  }
1309 
1310  return STATUS_SUCCESS;
1311 }
1312 
1315  NTSTATUS Status;
1316  data_reloc* dr;
1317  EXTENT_ITEM* ei;
1318  UINT16 len;
1319  UINT64 inline_rc;
1320  UINT8* ptr;
1321 
1323  if (!dr) {
1324  ERR("out of memory\n");
1326  }
1327 
1328  dr->address = tp->item->key.obj_id;
1329  dr->size = tp->item->key.offset;
1330  dr->ei = (EXTENT_ITEM*)tp->item->data;
1331  InitializeListHead(&dr->refs);
1332 
1334  if (!NT_SUCCESS(Status)) {
1335  ERR("delete_tree_item returned %08x\n", Status);
1336  return Status;
1337  }
1338 
1339  if (!c)
1341 
1342  if (c) {
1344 
1345  c->used -= tp->item->key.offset;
1346 
1348 
1350  }
1351 
1352  ei = (EXTENT_ITEM*)tp->item->data;
1353  inline_rc = 0;
1354 
1355  len = tp->item->size - sizeof(EXTENT_ITEM);
1356  ptr = (UINT8*)tp->item->data + sizeof(EXTENT_ITEM);
1357 
1358  while (len > 0) {
1359  UINT8 secttype = *ptr;
1360  UINT16 sectlen = secttype == TYPE_EXTENT_DATA_REF ? sizeof(EXTENT_DATA_REF) : (secttype == TYPE_SHARED_DATA_REF ? sizeof(SHARED_DATA_REF) : 0);
1361 
1362  len--;
1363 
1364  if (sectlen > len) {
1365  ERR("(%llx,%x,%llx): %x bytes left, expecting at least %x\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset, len, sectlen);
1366  return STATUS_INTERNAL_ERROR;
1367  }
1368 
1369  if (sectlen == 0) {
1370  ERR("(%llx,%x,%llx): unrecognized extent type %x\n", tp->item->key.obj_id, tp->item->key.obj_type, tp->item->key.offset, secttype);
1371  return STATUS_INTERNAL_ERROR;
1372  }
1373 
1374  if (secttype == TYPE_EXTENT_DATA_REF) {
1375  EXTENT_DATA_REF* edr = (EXTENT_DATA_REF*)(ptr + sizeof(UINT8));
1376 
1377  inline_rc += edr->count;
1378 
1379  Status = data_reloc_add_tree_edr(Vcb, metadata_items, dr, edr, rollback);
1380  if (!NT_SUCCESS(Status)) {
1381  ERR("data_reloc_add_tree_edr returned %08x\n", Status);
1382  return Status;
1383  }
1384  } else if (secttype == TYPE_SHARED_DATA_REF) {
1385  metadata_reloc* mr;
1387 
1389  if (!ref) {
1390  ERR("out of memory\n");
1392  }
1393 
1394  ref->type = TYPE_SHARED_DATA_REF;
1395  RtlCopyMemory(&ref->sdr, ptr + sizeof(UINT8), sizeof(SHARED_DATA_REF));
1396  inline_rc += ref->sdr.count;
1397 
1398  Status = add_metadata_reloc_parent(Vcb, metadata_items, ref->sdr.offset, &mr, rollback);
1399  if (!NT_SUCCESS(Status)) {
1400  ERR("add_metadata_reloc_parent returned %08x\n", Status);
1401  ExFreePool(ref);
1402  return Status;
1403  }
1404 
1405  ref->parent = mr;
1406 
1407  InsertTailList(&dr->refs, &ref->list_entry);
1408  } else {
1409  ERR("unexpected tree type %x\n", secttype);
1410  return STATUS_INTERNAL_ERROR;
1411  }
1412 
1413 
1414  len -= sectlen;
1415  ptr += sizeof(UINT8) + sectlen;
1416  }
1417 
1418  if (inline_rc < ei->refcount) { // look for non-inline entries
1419  traverse_ptr tp2 = *tp, next_tp;
1420 
1421  while (find_next_item(Vcb, &tp2, &next_tp, FALSE, NULL)) {
1422  tp2 = next_tp;
1423 
1424  if (tp2.item->key.obj_id == tp->item->key.obj_id) {
1425  if (tp2.item->key.obj_type == TYPE_EXTENT_DATA_REF && tp2.item->size >= sizeof(EXTENT_DATA_REF)) {
1426  Status = data_reloc_add_tree_edr(Vcb, metadata_items, dr, (EXTENT_DATA_REF*)tp2.item->data, rollback);
1427  if (!NT_SUCCESS(Status)) {
1428  ERR("data_reloc_add_tree_edr returned %08x\n", Status);
1429  return Status;
1430  }
1431 
1432  Status = delete_tree_item(Vcb, &tp2);
1433  if (!NT_SUCCESS(Status)) {
1434  ERR("delete_tree_item returned %08x\n", Status);
1435  return Status;
1436  }
1437  } else if (tp2.item->key.obj_type == TYPE_SHARED_DATA_REF && tp2.item->size >= sizeof(UINT32)) {
1438  metadata_reloc* mr;
1440 
1442  if (!ref) {
1443  ERR("out of memory\n");
1445  }
1446 
1447  ref->type = TYPE_SHARED_DATA_REF;
1448  ref->sdr.offset = tp2.item->key.offset;
1449  ref->sdr.count = *((UINT32*)tp2.item->data);
1450 
1451  Status = add_metadata_reloc_parent(Vcb, metadata_items, ref->sdr.offset, &mr, rollback);
1452  if (!NT_SUCCESS(Status)) {
1453  ERR("add_metadata_reloc_parent returned %08x\n", Status);
1454  ExFreePool(ref);
1455  return Status;
1456  }
1457 
1458  ref->parent = mr;
1459  InsertTailList(&dr->refs, &ref->list_entry);
1460 
1461  Status = delete_tree_item(Vcb, &tp2);
1462  if (!NT_SUCCESS(Status)) {
1463  ERR("delete_tree_item returned %08x\n", Status);
1464  return Status;
1465  }
1466  }
1467  } else
1468  break;
1469  }
1470  }
1471 
1473 
1474  return STATUS_SUCCESS;
1475 }
1476 
1478  LIST_ENTRY newlist, *le;
1479 
1480  if (IsListEmpty(&dr->refs))
1481  return;
1482 
1483  // insertion sort
1484 
1485  InitializeListHead(&newlist);
1486 
1487  while (!IsListEmpty(&dr->refs)) {
1489  BOOL inserted = FALSE;
1490 
1491  if (ref->type == TYPE_EXTENT_DATA_REF)
1492  ref->hash = get_extent_data_ref_hash2(ref->edr.root, ref->edr.objid, ref->edr.offset);
1493  else if (ref->type == TYPE_SHARED_DATA_REF)
1494  ref->hash = ref->parent->new_address;
1495 
1496  le = newlist.Flink;
1497  while (le != &newlist) {
1499 
1500  if (ref->type < ref2->type || (ref->type == ref2->type && ref->hash > ref2->hash)) {
1502  inserted = TRUE;
1503  break;
1504  }
1505 
1506  le = le->Flink;
1507  }
1508 
1509  if (!inserted)
1510  InsertTailList(&newlist, &ref->list_entry);
1511  }
1512 
1513  le = newlist.Flink;
1514  while (le != &newlist) {
1516 
1517  if (le->Flink != &newlist) {
1519 
1520  if (ref->type == TYPE_EXTENT_DATA_REF && ref2->type == TYPE_EXTENT_DATA_REF && ref->edr.root == ref2->edr.root &&
1521  ref->edr.objid == ref2->edr.objid && ref->edr.offset == ref2->edr.offset) {
1522  RemoveEntryList(&ref2->list_entry);
1523  ref->edr.count += ref2->edr.count;
1524  ExFreePool(ref2);
1525  continue;
1526  }
1527  }
1528 
1529  le = le->Flink;
1530  }
1531 
1532  newlist.Flink->Blink = &dr->refs;
1533  newlist.Blink->Flink = &dr->refs;
1534  dr->refs.Flink = newlist.Flink;
1535  dr->refs.Blink = newlist.Blink;
1536 }
1537 
1539  NTSTATUS Status;
1540  LIST_ENTRY* le;
1541  UINT64 rc = 0;
1542  UINT16 inline_len;
1543  BOOL all_inline = TRUE;
1544  data_reloc_ref* first_noninline = NULL;
1545  EXTENT_ITEM* ei;
1546  UINT8* ptr;
1547 
1548  inline_len = sizeof(EXTENT_ITEM);
1549 
1551 
1552  le = dr->refs.Flink;
1553  while (le != &dr->refs) {
1555  UINT16 extlen = 0;
1556 
1557  if (ref->type == TYPE_EXTENT_DATA_REF) {
1558  extlen += sizeof(EXTENT_DATA_REF);
1559  rc += ref->edr.count;
1560  } else if (ref->type == TYPE_SHARED_DATA_REF) {
1561  extlen += sizeof(SHARED_DATA_REF);
1562  rc++;
1563  }
1564 
1565  if (all_inline) {
1566  if ((ULONG)(inline_len + 1 + extlen) > (Vcb->superblock.node_size >> 2)) {
1567  all_inline = FALSE;
1568  first_noninline = ref;
1569  } else
1570  inline_len += extlen + 1;
1571  }
1572 
1573  le = le->Flink;
1574  }
1575 
1576  ei = ExAllocatePoolWithTag(PagedPool, inline_len, ALLOC_TAG);
1577  if (!ei) {
1578  ERR("out of memory\n");
1580  }
1581 
1582  ei->refcount = rc;
1583  ei->generation = dr->ei->generation;
1584  ei->flags = dr->ei->flags;
1585  ptr = (UINT8*)&ei[1];
1586 
1587  le = dr->refs.Flink;
1588  while (le != &dr->refs) {
1590 
1591  if (ref == first_noninline)
1592  break;
1593 
1594  *ptr = ref->type;
1595  ptr++;
1596 
1597  if (ref->type == TYPE_EXTENT_DATA_REF) {
1599 
1600  RtlCopyMemory(edr, &ref->edr, sizeof(EXTENT_DATA_REF));
1601 
1602  ptr += sizeof(EXTENT_DATA_REF);
1603  } else if (ref->type == TYPE_SHARED_DATA_REF) {
1605 
1606  sdr->offset = ref->parent->new_address;
1607  sdr->count = ref->sdr.count;
1608 
1609  ptr += sizeof(SHARED_DATA_REF);
1610  }
1611 
1612  le = le->Flink;
1613  }
1614 
1615  Status = insert_tree_item(Vcb, Vcb->extent_root, dr->new_address, TYPE_EXTENT_ITEM, dr->size, ei, inline_len, NULL, NULL);
1616  if (!NT_SUCCESS(Status)) {
1617  ERR("insert_tree_item returned %08x\n", Status);
1618  return Status;
1619  }
1620 
1621  if (!all_inline) {
1622  le = &first_noninline->list_entry;
1623 
1624  while (le != &dr->refs) {
1626 
1627  if (ref->type == TYPE_EXTENT_DATA_REF) {
1628  EXTENT_DATA_REF* edr;
1629 
1631  if (!edr) {
1632  ERR("out of memory\n");
1634  }
1635 
1636  RtlCopyMemory(edr, &ref->edr, sizeof(EXTENT_DATA_REF));
1637 
1638  Status = insert_tree_item(Vcb, Vcb->extent_root, dr->new_address, TYPE_EXTENT_DATA_REF, ref->hash, edr, sizeof(EXTENT_DATA_REF), NULL, NULL);
1639  if (!NT_SUCCESS(Status)) {
1640  ERR("insert_tree_item returned %08x\n", Status);
1641  return Status;
1642  }
1643  } else if (ref->type == TYPE_SHARED_DATA_REF) {
1644  UINT32* sdr;
1645 
1647  if (!sdr) {
1648  ERR("out of memory\n");
1650  }
1651 
1652  *sdr = ref->sdr.count;
1653 
1654  Status = insert_tree_item(Vcb, Vcb->extent_root, dr->new_address, TYPE_SHARED_DATA_REF, ref->parent->new_address, sdr, sizeof(UINT32), NULL, NULL);
1655  if (!NT_SUCCESS(Status)) {
1656  ERR("insert_tree_item returned %08x\n", Status);
1657  return Status;
1658  }
1659  }
1660 
1661  le = le->Flink;
1662  }
1663  }
1664 
1665  return STATUS_SUCCESS;
1666 }
1667 
1669  KEY searchkey;
1670  traverse_ptr tp;
1671  NTSTATUS Status;
1672  BOOL b;
1673  LIST_ENTRY items, metadata_items, rollback, *le;
1674  UINT64 loaded = 0, num_loaded = 0;
1675  chunk* newchunk = NULL;
1676  UINT8* data = NULL;
1677 
1678  TRACE("chunk %llx\n", c->offset);
1679 
1682  InitializeListHead(&metadata_items);
1683 
1684  ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
1685 
1686  searchkey.obj_id = c->offset;
1687  searchkey.obj_type = TYPE_EXTENT_ITEM;
1688  searchkey.offset = 0xffffffffffffffff;
1689 
1690  Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE, NULL);
1691  if (!NT_SUCCESS(Status)) {
1692  ERR("find_item returned %08x\n", Status);
1693  goto end;
1694  }
1695 
1696  do {
1697  traverse_ptr next_tp;
1698 
1699  if (tp.item->key.obj_id >= c->offset + c->chunk_item->size)
1700  break;
1701 
1702  if (tp.item->key.obj_id >= c->offset && tp.item->key.obj_type == TYPE_EXTENT_ITEM) {
1703  BOOL tree = FALSE;
1704 
1705  if (tp.item->key.obj_type == TYPE_EXTENT_ITEM && tp.item->size >= sizeof(EXTENT_ITEM)) {
1706  EXTENT_ITEM* ei = (EXTENT_ITEM*)tp.item->data;
1707 
1709  tree = TRUE;
1710  }
1711 
1712  if (!tree) {
1713  Status = add_data_reloc(Vcb, &items, &metadata_items, &tp, c, &rollback);
1714 
1715  if (!NT_SUCCESS(Status)) {
1716  ERR("add_data_reloc returned %08x\n", Status);
1717  goto end;
1718  }
1719 
1720  loaded += tp.item->key.offset;
1721  num_loaded++;
1722 
1723  if (loaded >= 0x1000000 || num_loaded >= 100) // only do so much at a time, so we don't block too obnoxiously
1724  break;
1725  }
1726  }
1727 
1728  b = find_next_item(Vcb, &tp, &next_tp, FALSE, NULL);
1729 
1730  if (b)
1731  tp = next_tp;
1732  } while (b);
1733 
1734  if (IsListEmpty(&items)) {
1735  *changed = FALSE;
1737  goto end;
1738  } else
1739  *changed = TRUE;
1740 
1742  if (!data) {
1743  ERR("out of memory\n");
1745  goto end;
1746  }
1747 
1748  le = items.Flink;
1749  while (le != &items) {
1751  BOOL done = FALSE;
1752  LIST_ENTRY* le2;
1753  UINT32* csum;
1754  RTL_BITMAP bmp;
1755  ULONG* bmparr;
1756  ULONG runlength, index, lastoff;
1757 
1758  if (newchunk) {
1759  acquire_chunk_lock(newchunk, Vcb);
1760 
1761  if (find_data_address_in_chunk(Vcb, newchunk, dr->size, &dr->new_address)) {
1762  newchunk->used += dr->size;
1763  space_list_subtract(newchunk, FALSE, dr->new_address, dr->size, &rollback);
1764  done = TRUE;
1765  }
1766 
1767  release_chunk_lock(newchunk, Vcb);
1768  }
1769 
1770  if (!done) {
1771  ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE);
1772 
1773  le2 = Vcb->chunks.Flink;
1774  while (le2 != &Vcb->chunks) {
1775  chunk* c2 = CONTAINING_RECORD(le2, chunk, list_entry);
1776 
1777  if (!c2->readonly && !c2->reloc && c2 != newchunk && c2->chunk_item->type == Vcb->data_flags) {
1778  acquire_chunk_lock(c2, Vcb);
1779 
1780  if ((c2->chunk_item->size - c2->used) >= dr->size) {
1781  if (find_data_address_in_chunk(Vcb, c2, dr->size, &dr->new_address)) {
1782  c2->used += dr->size;
1784  release_chunk_lock(c2, Vcb);
1785  newchunk = c2;
1786  done = TRUE;
1787  break;
1788  }
1789  }
1790 
1791  release_chunk_lock(c2, Vcb);
1792  }
1793 
1794  le2 = le2->Flink;
1795  }
1796 
1797  // allocate new chunk if necessary
1798  if (!done) {
1799  Status = alloc_chunk(Vcb, Vcb->data_flags, &newchunk, FALSE);
1800 
1801  if (!NT_SUCCESS(Status)) {
1802  ERR("alloc_chunk returned %08x\n", Status);
1803  ExReleaseResourceLite(&Vcb->chunk_lock);
1804  goto end;
1805  }
1806 
1807  acquire_chunk_lock(newchunk, Vcb);
1808 
1809  newchunk->balance_num = Vcb->balance.balance_num;
1810 
1811  if (!find_data_address_in_chunk(Vcb, newchunk, dr->size, &dr->new_address)) {
1812  release_chunk_lock(newchunk, Vcb);
1813  ExReleaseResourceLite(&Vcb->chunk_lock);
1814  ERR("could not find address in new chunk\n");
1816  goto end;
1817  } else {
1818  newchunk->used += dr->size;
1819  space_list_subtract(newchunk, FALSE, dr->new_address, dr->size, &rollback);
1820  }
1821 
1822  release_chunk_lock(newchunk, Vcb);
1823  }
1824 
1825  ExReleaseResourceLite(&Vcb->chunk_lock);
1826  }
1827 
1828  dr->newchunk = newchunk;
1829 
1830  bmparr = ExAllocatePoolWithTag(PagedPool, (ULONG)sector_align((dr->size / Vcb->superblock.sector_size) + 1, sizeof(ULONG)), ALLOC_TAG);
1831  if (!bmparr) {
1832  ERR("out of memory\n");
1834  goto end;
1835  }
1836 
1837  csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(dr->size * sizeof(UINT32) / Vcb->superblock.sector_size), ALLOC_TAG);
1838  if (!csum) {
1839  ERR("out of memory\n");
1840  ExFreePool(bmparr);
1842  goto end;
1843  }
1844 
1845  RtlInitializeBitMap(&bmp, bmparr, (ULONG)(dr->size / Vcb->superblock.sector_size));
1846  RtlSetAllBits(&bmp); // 1 = no csum, 0 = csum
1847 
1848  searchkey.obj_id = EXTENT_CSUM_ID;
1849  searchkey.obj_type = TYPE_EXTENT_CSUM;
1850  searchkey.offset = dr->address;
1851 
1852  Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE, NULL);
1853  if (!NT_SUCCESS(Status) && Status != STATUS_NOT_FOUND) {
1854  ERR("find_item returned %08x\n", Status);
1855  ExFreePool(csum);
1856  ExFreePool(bmparr);
1857  goto end;
1858  }
1859 
1860  if (Status != STATUS_NOT_FOUND) {
1861  do {
1862  traverse_ptr next_tp;
1863 
1864  if (tp.item->key.obj_type == TYPE_EXTENT_CSUM) {
1865  if (tp.item->key.offset >= dr->address + dr->size)
1866  break;
1867  else if (tp.item->size >= sizeof(UINT32) && tp.item->key.offset + (tp.item->size * Vcb->superblock.sector_size / sizeof(UINT32)) >= dr->address) {
1868  UINT64 cs = max(dr->address, tp.item->key.offset);
1869  UINT64 ce = min(dr->address + dr->size, tp.item->key.offset + (tp.item->size * Vcb->superblock.sector_size / sizeof(UINT32)));
1870 
1871  RtlCopyMemory(csum + ((cs - dr->address) / Vcb->superblock.sector_size),
1872  tp.item->data + ((cs - tp.item->key.offset) * sizeof(UINT32) / Vcb->superblock.sector_size),
1873  (ULONG)((ce - cs) * sizeof(UINT32) / Vcb->superblock.sector_size));
1874 
1875  RtlClearBits(&bmp, (ULONG)((cs - dr->address) / Vcb->superblock.sector_size), (ULONG)((ce - cs) / Vcb->superblock.sector_size));
1876 
1877  if (ce == dr->address + dr->size)
1878  break;
1879  }
1880  }
1881 
1882  if (find_next_item(Vcb, &tp, &next_tp, FALSE, NULL))
1883  tp = next_tp;
1884  else
1885  break;
1886  } while (TRUE);
1887  }
1888 
1889  lastoff = 0;
1890  runlength = RtlFindFirstRunClear(&bmp, &index);
1891 
1892  while (runlength != 0) {
1893  if (index > lastoff) {
1894  ULONG off = lastoff;
1895  ULONG size = index - lastoff;
1896 
1897  // handle no csum run
1898  do {
1899  ULONG rl;
1900 
1901  if (size * Vcb->superblock.sector_size > BALANCE_UNIT)
1902  rl = BALANCE_UNIT / Vcb->superblock.sector_size;
1903  else
1904  rl = size;
1905 
1906  Status = read_data(Vcb, dr->address + (off * Vcb->superblock.sector_size), rl * Vcb->superblock.sector_size, NULL, FALSE, data,
1908  if (!NT_SUCCESS(Status)) {
1909  ERR("read_data returned %08x\n", Status);
1910  ExFreePool(csum);
1911  ExFreePool(bmparr);
1912  goto end;
1913  }
1914 
1915  Status = write_data_complete(Vcb, dr->new_address + (off * Vcb->superblock.sector_size), data, rl * Vcb->superblock.sector_size,
1916  NULL, newchunk, FALSE, 0, NormalPagePriority);
1917  if (!NT_SUCCESS(Status)) {
1918  ERR("write_data_complete returned %08x\n", Status);
1919  ExFreePool(csum);
1920  ExFreePool(bmparr);
1921  goto end;
1922  }
1923 
1924  size -= rl;
1925  off += rl;
1926  } while (size > 0);
1927  }
1928 
1929  add_checksum_entry(Vcb, dr->new_address + (index * Vcb->superblock.sector_size), runlength, &csum[index], NULL);
1930  add_checksum_entry(Vcb, dr->address + (index * Vcb->superblock.sector_size), runlength, NULL, NULL);
1931 
1932  // handle csum run
1933  do {
1934  ULONG rl;
1935 
1936  if (runlength * Vcb->superblock.sector_size > BALANCE_UNIT)
1937  rl = BALANCE_UNIT / Vcb->superblock.sector_size;
1938  else
1939  rl = runlength;
1940 
1941  Status = read_data(Vcb, dr->address + (index * Vcb->superblock.sector_size), rl * Vcb->superblock.sector_size, &csum[index], FALSE, data,
1943  if (!NT_SUCCESS(Status)) {
1944  ERR("read_data returned %08x\n", Status);
1945  ExFreePool(csum);
1946  ExFreePool(bmparr);
1947  goto end;
1948  }
1949 
1950  Status = write_data_complete(Vcb, dr->new_address + (index * Vcb->superblock.sector_size), data, rl * Vcb->superblock.sector_size,
1951  NULL, newchunk, FALSE, 0, NormalPagePriority);
1952  if (!NT_SUCCESS(Status)) {
1953  ERR("write_data_complete returned %08x\n", Status);
1954  ExFreePool(csum);
1955  ExFreePool(bmparr);
1956  goto end;
1957  }
1958 
1959  runlength -= rl;
1960  index += rl;
1961  } while (runlength > 0);
1962 
1963  lastoff = index;
1964  runlength = RtlFindNextForwardRunClear(&bmp, index, &index);
1965  }
1966 
1967  ExFreePool(csum);
1968  ExFreePool(bmparr);
1969 
1970  // handle final nocsum run
1971  if (lastoff < dr->size / Vcb->superblock.sector_size) {
1972  ULONG off = lastoff;
1973  ULONG size = (ULONG)((dr->size / Vcb->superblock.sector_size) - lastoff);
1974 
1975  do {
1976  ULONG rl;
1977 
1978  if (size * Vcb->superblock.sector_size > BALANCE_UNIT)
1979  rl = BALANCE_UNIT / Vcb->superblock.sector_size;
1980  else
1981  rl = size;
1982 
1983  Status = read_data(Vcb, dr->address + (off * Vcb->superblock.sector_size), rl * Vcb->superblock.sector_size, NULL, FALSE, data,
1985  if (!NT_SUCCESS(Status)) {
1986  ERR("read_data returned %08x\n", Status);
1987  goto end;
1988  }
1989 
1990  Status = write_data_complete(Vcb, dr->new_address + (off * Vcb->superblock.sector_size), data, rl * Vcb->superblock.sector_size,
1991  NULL, newchunk, FALSE, 0, NormalPagePriority);
1992  if (!NT_SUCCESS(Status)) {
1993  ERR("write_data_complete returned %08x\n", Status);
1994  goto end;
1995  }
1996 
1997  size -= rl;
1998  off += rl;
1999  } while (size > 0);
2000  }
2001 
2002  le = le->Flink;
2003  }
2004 
2005  ExFreePool(data);
2006  data = NULL;
2007 
2008  Status = write_metadata_items(Vcb, &metadata_items, &items, NULL, &rollback);
2009  if (!NT_SUCCESS(Status)) {
2010  ERR("write_metadata_items returned %08x\n", Status);
2011  goto end;
2012  }
2013 
2014  le = items.Flink;
2015  while (le != &items) {
2017 
2019  if (!NT_SUCCESS(Status)) {
2020  ERR("add_data_reloc_extent_item returned %08x\n", Status);
2021  goto end;
2022  }
2023 
2024  le = le->Flink;
2025  }
2026 
2027  le = c->changed_extents.Flink;
2028  while (le != &c->changed_extents) {
2029  LIST_ENTRY *le2, *le3;
2031 
2032  le3 = le->Flink;
2033 
2034  le2 = items.Flink;
2035  while (le2 != &items) {
2037 
2038  if (ce->address == dr->address) {
2039  ce->address = dr->new_address;
2042  break;
2043  }
2044 
2045  le2 = le2->Flink;
2046  }
2047 
2048  le = le3;
2049  }
2050 
2052 
2053  Vcb->need_write = TRUE;
2054 
2055 end:
2056  if (NT_SUCCESS(Status)) {
2057  // update extents in cache inodes before we flush
2058  le = Vcb->chunks.Flink;
2059  while (le != &Vcb->chunks) {
2061 
2062  if (c2->cache) {
2063  LIST_ENTRY* le2;
2064 
2066 
2067  le2 = c2->cache->extents.Flink;
2068  while (le2 != &c2->cache->extents) {
2070 
2071  if (!ext->ignore) {
2072  if (ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) {
2073  EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data;
2074 
2075  if (ed2->size > 0 && ed2->address >= c->offset && ed2->address < c->offset + c->chunk_item->size) {
2076  LIST_ENTRY* le3 = items.Flink;
2077  while (le3 != &items) {
2079 
2080  if (ed2->address == dr->address) {
2081  ed2->address = dr->new_address;
2082  break;
2083  }
2084 
2085  le3 = le3->Flink;
2086  }
2087  }
2088  }
2089  }
2090 
2091  le2 = le2->Flink;
2092  }
2093 
2094  ExReleaseResourceLite(c2->cache->Header.Resource);
2095  }
2096 
2097  le = le->Flink;
2098  }
2099 
2100  Status = do_write(Vcb, NULL);
2101  if (!NT_SUCCESS(Status))
2102  ERR("do_write returned %08x\n", Status);
2103  }
2104 
2105  if (NT_SUCCESS(Status)) {
2107 
2108  // update open FCBs
2109  // FIXME - speed this up(?)
2110 
2111  acquire_fcb_lock_shared(Vcb);
2112 
2113  le = Vcb->all_fcbs.Flink;
2114  while (le != &Vcb->all_fcbs) {
2115  struct _fcb* fcb = CONTAINING_RECORD(le, struct _fcb, list_entry_all);
2116  LIST_ENTRY* le2;
2117 
2119 
2120  le2 = fcb->extents.Flink;
2121  while (le2 != &fcb->extents) {
2123 
2124  if (!ext->ignore) {
2125  if (ext->extent_data.type == EXTENT_TYPE_REGULAR || ext->extent_data.type == EXTENT_TYPE_PREALLOC) {
2126  EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ext->extent_data.data;
2127 
2128  if (ed2->size > 0 && ed2->address >= c->offset && ed2->address < c->offset + c->chunk_item->size) {
2129  LIST_ENTRY* le3 = items.Flink;
2130  while (le3 != &items) {
2132 
2133  if (ed2->address == dr->address) {
2134  ed2->address = dr->new_address;
2135  break;
2136  }
2137 
2138  le3 = le3->Flink;
2139  }
2140  }
2141  }
2142  }
2143 
2144  le2 = le2->Flink;
2145  }
2146 
2147  ExReleaseResourceLite(fcb->Header.Resource);
2148 
2149  le = le->Flink;
2150  }
2151 
2152  release_fcb_lock(Vcb);
2153  } else
2155 
2156  free_trees(Vcb);
2157 
2158  ExReleaseResourceLite(&Vcb->tree_lock);
2159 
2160  if (data)
2161  ExFreePool(data);
2162 
2163  while (!IsListEmpty(&items)) {
2165 
2166  while (!IsListEmpty(&dr->refs)) {
2168 
2169  ExFreePool(ref);
2170  }
2171 
2172  ExFreePool(dr);
2173  }
2174 
2175  while (!IsListEmpty(&metadata_items)) {
2177 
2178  while (!IsListEmpty(&mr->refs)) {
2180 
2181  ExFreePool(ref);
2182  }
2183 
2184  ExFreePool(mr);
2185  }
2186 
2187  return Status;
2188 }
2189 
2190 static __inline UINT64 get_chunk_dup_type(chunk* c) {
2191  if (c->chunk_item->type & BLOCK_FLAG_RAID0)
2192  return BLOCK_FLAG_RAID0;
2193  else if (c->chunk_item->type & BLOCK_FLAG_RAID1)
2194  return BLOCK_FLAG_RAID1;
2195  else if (c->chunk_item->type & BLOCK_FLAG_DUPLICATE)
2196  return BLOCK_FLAG_DUPLICATE;
2197  else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
2198  return BLOCK_FLAG_RAID10;
2199  else if (c->chunk_item->type & BLOCK_FLAG_RAID5)
2200  return BLOCK_FLAG_RAID5;
2201  else if (c->chunk_item->type & BLOCK_FLAG_RAID6)
2202  return BLOCK_FLAG_RAID6;
2203  else
2204  return BLOCK_FLAG_SINGLE;
2205 }
2206 
2208  btrfs_balance_opts* opts;
2209 
2210  opts = &Vcb->balance.opts[sort];
2211 
2212  if (!(opts->flags & BTRFS_BALANCE_OPTS_ENABLED))
2213  return FALSE;
2214 
2215  if (opts->flags & BTRFS_BALANCE_OPTS_PROFILES) {
2217 
2218  if (!(type & opts->profiles))
2219  return FALSE;
2220  }
2221 
2222  if (opts->flags & BTRFS_BALANCE_OPTS_DEVID) {
2223  UINT16 i;
2224  CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
2225  BOOL b = FALSE;
2226 
2227  for (i = 0; i < c->chunk_item->num_stripes; i++) {
2228  if (cis[i].dev_id == opts->devid) {
2229  b = TRUE;
2230  break;
2231  }
2232  }
2233 
2234  if (!b)
2235  return FALSE;
2236  }
2237 
2238  if (opts->flags & BTRFS_BALANCE_OPTS_DRANGE) {
2239  UINT16 i, factor;
2240  UINT64 physsize;
2241  CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
2242  BOOL b = FALSE;
2243 
2244  if (c->chunk_item->type & BLOCK_FLAG_RAID0)
2245  factor = c->chunk_item->num_stripes;
2246  else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
2247  factor = c->chunk_item->num_stripes / c->chunk_item->sub_stripes;
2248  else if (c->chunk_item->type & BLOCK_FLAG_RAID5)
2249  factor = c->chunk_item->num_stripes - 1;
2250  else if (c->chunk_item->type & BLOCK_FLAG_RAID6)
2251  factor = c->chunk_item->num_stripes - 2;
2252  else // SINGLE, DUPLICATE, RAID1
2253  factor = 1;
2254 
2255  physsize = c->chunk_item->size / factor;
2256 
2257  for (i = 0; i < c->chunk_item->num_stripes; i++) {
2258  if (cis[i].offset < opts->drange_end && cis[i].offset + physsize >= opts->drange_start &&
2259  (!(opts->flags & BTRFS_BALANCE_OPTS_DEVID) || cis[i].dev_id == opts->devid)) {
2260  b = TRUE;
2261  break;
2262  }
2263  }
2264 
2265  if (!b)
2266  return FALSE;
2267  }
2268 
2269  if (opts->flags & BTRFS_BALANCE_OPTS_VRANGE) {
2270  if (c->offset + c->chunk_item->size <= opts->vrange_start || c->offset > opts->vrange_end)
2271  return FALSE;
2272  }
2273 
2274  if (opts->flags & BTRFS_BALANCE_OPTS_STRIPES) {
2275  if (c->chunk_item->num_stripes < opts->stripes_start || c->chunk_item->num_stripes < opts->stripes_end)
2276  return FALSE;
2277  }
2278 
2279  if (opts->flags & BTRFS_BALANCE_OPTS_USAGE) {
2280  UINT64 usage = c->used * 100 / c->chunk_item->size;
2281 
2282  // usage == 0 should mean completely empty, not just that usage rounds to 0%
2283  if (c->used > 0 && usage == 0)
2284  usage = 1;
2285 
2286  if (usage < opts->usage_start || usage > opts->usage_end)
2287  return FALSE;
2288  }
2289 
2292 
2293  if (type == opts->convert)
2294  return FALSE;
2295  }
2296 
2297  return TRUE;
2298 }
2299 
2301  if (opts->flags & BTRFS_BALANCE_OPTS_PROFILES) {
2302  args->profiles = opts->profiles;
2304  }
2305 
2306  if (opts->flags & BTRFS_BALANCE_OPTS_USAGE) {
2307  if (args->usage_start == 0) {
2309  args->usage_start = opts->usage_start;
2310  args->usage_end = opts->usage_end;
2311  } else {
2312  args->flags |= BALANCE_ARGS_FLAGS_USAGE;
2313  args->usage = opts->usage_end;
2314  }
2315  }
2316 
2317  if (opts->flags & BTRFS_BALANCE_OPTS_DEVID) {
2318  args->devid = opts->devid;
2319  args->flags |= BALANCE_ARGS_FLAGS_DEVID;
2320  }
2321 
2322  if (opts->flags & BTRFS_BALANCE_OPTS_DRANGE) {
2323  args->drange_start = opts->drange_start;
2324  args->drange_end = opts->drange_end;
2325  args->flags |= BALANCE_ARGS_FLAGS_DRANGE;
2326  }
2327 
2328  if (opts->flags & BTRFS_BALANCE_OPTS_VRANGE) {
2329  args->vrange_start = opts->vrange_start;
2330  args->vrange_end = opts->vrange_end;
2331  args->flags |= BALANCE_ARGS_FLAGS_VRANGE;
2332  }
2333 
2334  if (opts->flags & BTRFS_BALANCE_OPTS_CONVERT) {
2335  args->convert = opts->convert;
2336  args->flags |= BALANCE_ARGS_FLAGS_CONVERT;
2337 
2338  if (opts->flags & BTRFS_BALANCE_OPTS_SOFT)
2339  args->flags |= BALANCE_ARGS_FLAGS_SOFT;
2340  }
2341 
2342  if (opts->flags & BTRFS_BALANCE_OPTS_LIMIT) {
2343  if (args->limit_start == 0) {
2345  args->limit_start = (UINT32)opts->limit_start;
2346  args->limit_end = (UINT32)opts->limit_end;
2347  } else {
2348  args->flags |= BALANCE_ARGS_FLAGS_LIMIT;
2349  args->limit = opts->limit_end;
2350  }
2351  }
2352 
2353  if (opts->flags & BTRFS_BALANCE_OPTS_STRIPES) {
2354  args->stripes_start = opts->stripes_start;
2355  args->stripes_end = opts->stripes_end;
2357  }
2358 }
2359 
2361  KEY searchkey;
2362  traverse_ptr tp;
2363  NTSTATUS Status;
2364  BALANCE_ITEM* bi;
2365 
2366  searchkey.obj_id = BALANCE_ITEM_ID;
2367  searchkey.obj_type = TYPE_TEMP_ITEM;
2368  searchkey.offset = 0;
2369 
2370  ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
2371 
2372  Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, NULL);
2373  if (!NT_SUCCESS(Status)) {
2374  ERR("find_item returned %08x\n", Status);
2375  goto end;
2376  }
2377 
2378  if (!keycmp(tp.item->key, searchkey)) {
2380  if (!NT_SUCCESS(Status)) {
2381  ERR("delete_tree_item returned %08x\n", Status);
2382  goto end;
2383  }
2384  }
2385 
2387  if (!bi) {
2388  ERR("out of memory\n");
2390  goto end;
2391  }
2392 
2393  RtlZeroMemory(bi, sizeof(BALANCE_ITEM));
2394 
2395  if (Vcb->balance.opts[BALANCE_OPTS_DATA].flags & BTRFS_BALANCE_OPTS_ENABLED) {
2396  bi->flags |= BALANCE_FLAGS_DATA;
2397  copy_balance_args(&Vcb->balance.opts[BALANCE_OPTS_DATA], &bi->data);
2398  }
2399 
2400  if (Vcb->balance.opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_ENABLED) {
2402  copy_balance_args(&Vcb->balance.opts[BALANCE_OPTS_METADATA], &bi->metadata);
2403  }
2404 
2405  if (Vcb->balance.opts[BALANCE_OPTS_SYSTEM].flags & BTRFS_BALANCE_OPTS_ENABLED) {
2406  bi->flags |= BALANCE_FLAGS_SYSTEM;
2407  copy_balance_args(&Vcb->balance.opts[BALANCE_OPTS_SYSTEM], &bi->system);
2408  }
2409 
2410  Status = insert_tree_item(Vcb, Vcb->root_root, BALANCE_ITEM_ID, TYPE_TEMP_ITEM, 0, bi, sizeof(BALANCE_ITEM), NULL, NULL);
2411  if (!NT_SUCCESS(Status)) {
2412  ERR("insert_tree_item returned %08x\n", Status);
2413  ExFreePool(bi);
2414  goto end;
2415  }
2416 
2418 
2419 end:
2420  if (NT_SUCCESS(Status)) {
2421  Status = do_write(Vcb, NULL);
2422  if (!NT_SUCCESS(Status))
2423  ERR("do_write returned %08x\n", Status);
2424  }
2425 
2426  free_trees(Vcb);
2427 
2428  ExReleaseResourceLite(&Vcb->tree_lock);
2429 
2430  return Status;
2431 }
2432 
2434  KEY searchkey;
2435  traverse_ptr tp;
2436  NTSTATUS Status;
2437 
2438  searchkey.obj_id = BALANCE_ITEM_ID;
2439  searchkey.obj_type = TYPE_TEMP_ITEM;
2440  searchkey.offset = 0;
2441 
2442  ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
2443 
2444  Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, NULL);
2445  if (!NT_SUCCESS(Status)) {
2446  ERR("find_item returned %08x\n", Status);
2447  goto end;
2448  }
2449 
2450  if (!keycmp(tp.item->key, searchkey)) {
2452  if (!NT_SUCCESS(Status)) {
2453  ERR("delete_tree_item returned %08x\n", Status);
2454  goto end;
2455  }
2456 
2457  Status = do_write(Vcb, NULL);
2458  if (!NT_SUCCESS(Status)) {
2459  ERR("do_write returned %08x\n", Status);
2460  goto end;
2461  }
2462 
2463  free_trees(Vcb);
2464  }
2465 
2467 
2468 end:
2469  ExReleaseResourceLite(&Vcb->tree_lock);
2470 
2471  return Status;
2472 }
2473 
2476 
2477  if (args->flags & BALANCE_ARGS_FLAGS_PROFILES) {
2479  opts->profiles = args->profiles;
2480  }
2481 
2482  if (args->flags & BALANCE_ARGS_FLAGS_USAGE) {
2484 
2485  opts->usage_start = 0;
2486  opts->usage_end = (UINT8)args->usage;
2487  } else if (args->flags & BALANCE_ARGS_FLAGS_USAGE_RANGE) {
2489 
2490  opts->usage_start = (UINT8)args->usage_start;
2491  opts->usage_end = (UINT8)args->usage_end;
2492  }
2493 
2494  if (args->flags & BALANCE_ARGS_FLAGS_DEVID) {
2496  opts->devid = args->devid;
2497  }
2498 
2499  if (args->flags & BALANCE_ARGS_FLAGS_DRANGE) {
2501  opts->drange_start = args->drange_start;
2502  opts->drange_end = args->drange_end;
2503  }
2504 
2505  if (args->flags & BALANCE_ARGS_FLAGS_VRANGE) {
2507  opts->vrange_start = args->vrange_start;
2508  opts->vrange_end = args->vrange_end;
2509  }
2510 
2511  if (args->flags & BALANCE_ARGS_FLAGS_LIMIT) {
2513 
2514  opts->limit_start = 0;
2515  opts->limit_end = args->limit;
2516  } else if (args->flags & BALANCE_ARGS_FLAGS_LIMIT_RANGE) {
2518 
2519  opts->limit_start = args->limit_start;
2520  opts->limit_end = args->limit_end;
2521  }
2522 
2523  if (args->flags & BALANCE_ARGS_FLAGS_STRIPES_RANGE) {
2525 
2526  opts->stripes_start = (UINT16)args->stripes_start;
2527  opts->stripes_end = (UINT16)args->stripes_end;
2528  }
2529 
2530  if (args->flags & BALANCE_ARGS_FLAGS_CONVERT) {
2532  opts->convert = args->convert;
2533 
2534  if (args->flags & BALANCE_ARGS_FLAGS_SOFT)
2535  opts->flags |= BTRFS_BALANCE_OPTS_SOFT;
2536  }
2537 }
2538 
2540  NTSTATUS Status;
2541  superblock* sb;
2542  int i = 0;
2543 
2545  if (!sb) {
2546  ERR("out of memory\n");
2548  }
2549 
2550  RtlZeroMemory(sb, sizeof(superblock));
2551 
2552  while (superblock_addrs[i] > 0 && dev->devitem.num_bytes >= superblock_addrs[i] + sizeof(superblock)) {
2553  Status = write_data_phys(dev->devobj, superblock_addrs[i], sb, sizeof(superblock));
2554 
2555  if (!NT_SUCCESS(Status)) {
2556  ExFreePool(sb);
2557  return Status;
2558  }
2559 
2560  i++;
2561  }
2562 
2563  ExFreePool(sb);
2564 
2565  return STATUS_SUCCESS;
2566 }
2567 
2569  KEY searchkey;
2570  traverse_ptr tp;
2571  NTSTATUS Status;
2572  LIST_ENTRY* le;
2574 
2575  if (Vcb->need_write) {
2576  Status = do_write(Vcb, NULL);
2577 
2578  if (!NT_SUCCESS(Status))
2579  ERR("do_write returned %08x\n", Status);
2580  } else
2582 
2583  free_trees(Vcb);
2584 
2585  if (!NT_SUCCESS(Status))
2586  return Status;
2587 
2588  // remove entry in chunk tree
2589 
2590  searchkey.obj_id = 1;
2591  searchkey.obj_type = TYPE_DEV_ITEM;
2592  searchkey.offset = dev->devitem.dev_id;
2593 
2594  Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE, NULL);
2595  if (!NT_SUCCESS(Status)) {
2596  ERR("find_item returned %08x\n", Status);
2597  return Status;
2598  }
2599 
2600  if (!keycmp(searchkey, tp.item->key)) {
2602 
2603  if (!NT_SUCCESS(Status)) {
2604  ERR("delete_tree_item returned %08x\n", Status);
2605  return Status;
2606  }
2607  }
2608 
2609  // remove stats entry in device tree
2610 
2611  searchkey.obj_id = 0;
2612  searchkey.obj_type = TYPE_DEV_STATS;
2613  searchkey.offset = dev->devitem.dev_id;
2614 
2615  Status = find_item(Vcb, Vcb->dev_root, &tp, &searchkey, FALSE, NULL);
2616  if (!NT_SUCCESS(Status)) {
2617  ERR("find_item returned %08x\n", Status);
2618  return Status;
2619  }
2620 
2621  if (!keycmp(searchkey, tp.item->key)) {
2623 
2624  if (!NT_SUCCESS(Status)) {
2625  ERR("delete_tree_item returned %08x\n", Status);
2626  return Status;
2627  }
2628  }
2629 
2630  // update superblock
2631 
2632  Vcb->superblock.num_devices--;
2633  Vcb->superblock.total_bytes -= dev->devitem.num_bytes;
2634  Vcb->devices_loaded--;
2635 
2636  RemoveEntryList(&dev->list_entry);
2637 
2638  // flush
2639 
2640  Status = do_write(Vcb, NULL);
2641  if (!NT_SUCCESS(Status))
2642  ERR("do_write returned %08x\n", Status);
2643 
2644  free_trees(Vcb);
2645 
2646  if (!NT_SUCCESS(Status))
2647  return Status;
2648 
2649  if (!dev->readonly && dev->devobj) {
2651  if (!NT_SUCCESS(Status))
2652  WARN("remove_superblocks returned %08x\n", Status);
2653  }
2654 
2655  // remove entry in volume list
2656 
2657  vde = Vcb->vde;
2658 
2659  if (dev->devobj) {
2660  pdo_device_extension* pdode = vde->pdode;
2661 
2663 
2664  le = pdode->children.Flink;
2665  while (le != &pdode->children) {
2667 
2668  if (RtlCompareMemory(&dev->devitem.device_uuid, &vc->uuid, sizeof(BTRFS_UUID)) == sizeof(BTRFS_UUID)) {
2670  PDEVICE_OBJECT mountmgr;
2671  UNICODE_STRING mmdevpath;
2672 
2673  pdode->children_loaded--;
2674 
2675  if (vc->had_drive_letter) { // re-add entry to mountmgr
2677  Status = IoGetDeviceObjectPointer(&mmdevpath, FILE_READ_ATTRIBUTES, &FileObject, &mountmgr);
2678  if (!NT_SUCCESS(Status))
2679  ERR("IoGetDeviceObjectPointer returned %08x\n", Status);
2680  else {
2681  MOUNTDEV_NAME mdn;
2682 
2685  ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status);
2686  else {
2687  MOUNTDEV_NAME* mdn2;
2688  ULONG mdnsize = (ULONG)offsetof(MOUNTDEV_NAME, Name[0]) + mdn.NameLength;
2689 
2690  mdn2 = ExAllocatePoolWithTag(PagedPool, mdnsize, ALLOC_TAG);
2691  if (!mdn2)
2692  ERR("out of memory\n");
2693  else {
2694  Status = dev_ioctl(dev->devobj, IOCTL_MOUNTDEV_QUERY_DEVICE_NAME, NULL, 0, mdn2, mdnsize, TRUE, NULL);
2695  if (!NT_SUCCESS(Status))
2696  ERR("IOCTL_MOUNTDEV_QUERY_DEVICE_NAME returned %08x\n", Status);
2697  else {
2699 
2700  name.Buffer = mdn2->Name;
2701  name.Length = name.MaximumLength = mdn2->NameLength;
2702 
2703  Status = mountmgr_add_drive_letter(mountmgr, &name);
2704  if (!NT_SUCCESS(Status))
2705  WARN("mountmgr_add_drive_letter returned %08x\n", Status);
2706  }
2707 
2708  ExFreePool(mdn2);
2709  }
2710  }
2711 
2713  }
2714  }
2715 
2716  ExFreePool(vc->pnp_name.Buffer);
2718  ExFreePool(vc);
2719 
2721 
2722  break;
2723  }
2724 
2725  le = le->Flink;
2726  }
2727 
2728  if (pdode->children_loaded > 0 && vde->device->Characteristics & FILE_REMOVABLE_MEDIA) {
2729  vde->device->Characteristics &= ~FILE_REMOVABLE_MEDIA;
2730 
2731  le = pdode->children.Flink;
2732  while (le != &pdode->children) {
2734 
2735  if (vc->devobj->Characteristics & FILE_REMOVABLE_MEDIA) {
2736  vde->device->Characteristics |= FILE_REMOVABLE_MEDIA;
2737  break;
2738  }
2739 
2740  le = le->Flink;
2741  }
2742  }
2743 
2744  pdode->num_children = Vcb->superblock.num_devices;
2745 
2747 
2748  // free dev
2749 
2750  if (dev->trim && !dev->readonly && !Vcb->options.no_trim)
2752  }
2753 
2754  while (!IsListEmpty(&dev->space)) {
2755  LIST_ENTRY* le2 = RemoveHeadList(&dev->space);
2757 
2758  ExFreePool(s);
2759  }
2760 
2761  ExFreePool(dev);
2762 
2763  if (Vcb->trim) {
2764  Vcb->trim = FALSE;
2765 
2766  le = Vcb->devices.Flink;
2767  while (le != &Vcb->devices) {
2768  device* dev2 = CONTAINING_RECORD(le, device, list_entry);
2769 
2770  if (dev2->trim) {
2771  Vcb->trim = TRUE;
2772  break;
2773  }
2774 
2775  le = le->Flink;
2776  }
2777  }
2778 
2780 
2781  return STATUS_SUCCESS;
2782 }
2783 
2786  DEVICE_DATA_SET_RANGE* ranges;
2787  ULONG datalen, i;
2788  KEY searchkey;
2789  traverse_ptr tp;
2790  NTSTATUS Status;
2791  BOOL b;
2792  UINT64 lastoff = 0x100000; // don't TRIM the first megabyte, in case someone has been daft enough to install GRUB there
2793  LIST_ENTRY* le;
2794 
2795  dev->num_trim_entries = 0;
2796 
2797  searchkey.obj_id = dev->devitem.dev_id;
2798  searchkey.obj_type = TYPE_DEV_EXTENT;
2799  searchkey.offset = 0;
2800 
2801  Status = find_item(Vcb, Vcb->dev_root, &tp, &searchkey, FALSE, NULL);
2802  if (!NT_SUCCESS(Status)) {
2803  ERR("find_item returned %08x\n", Status);
2804  return;
2805  }
2806 
2807  do {
2808  traverse_ptr next_tp;
2809 
2810  if (tp.item->key.obj_id == dev->devitem.dev_id && tp.item->key.obj_type == TYPE_DEV_EXTENT) {
2811  if (tp.item->size >= sizeof(DEV_EXTENT)) {
2812  DEV_EXTENT* de = (DEV_EXTENT*)tp.item->data;
2813 
2814  if (tp.item->key.offset > lastoff)
2815  add_trim_entry_avoid_sb(Vcb, dev, lastoff, tp.item->key.offset - lastoff);
2816 
2817  lastoff = tp.item->key.offset + de->length;
2818  } else {
2819  ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(DEV_EXTENT));
2820  return;
2821  }
2822  }
2823 
2824  b = find_next_item(Vcb, &tp, &next_tp, FALSE, NULL);
2825 
2826  if (b) {
2827  tp = next_tp;
2828  if (tp.item->key.obj_id > searchkey.obj_id || (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type > searchkey.obj_type))
2829  break;
2830  }
2831  } while (b);
2832 
2833  if (lastoff < dev->devitem.num_bytes)
2834  add_trim_entry_avoid_sb(Vcb, dev, lastoff, dev->devitem.num_bytes - lastoff);
2835 
2836  if (dev->num_trim_entries == 0)
2837  return;
2838 
2839  datalen = (ULONG)sector_align(sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES), sizeof(UINT64)) + (dev->num_trim_entries * sizeof(DEVICE_DATA_SET_RANGE));
2840 
2842  if (!dmdsa) {
2843  ERR("out of memory\n");
2844  goto end;
2845  }
2846 
2847  dmdsa->Size = sizeof(DEVICE_MANAGE_DATA_SET_ATTRIBUTES);
2848  dmdsa->Action = DeviceDsmAction_Trim;
2850  dmdsa->ParameterBlockOffset = 0;
2851  dmdsa->ParameterBlockLength = 0;
2853  dmdsa->DataSetRangesLength = dev->num_trim_entries * sizeof(DEVICE_DATA_SET_RANGE);
2854 
2855  ranges = (DEVICE_DATA_SET_RANGE*)((UINT8*)dmdsa + dmdsa->DataSetRangesOffset);
2856 
2857  i = 0;
2858  le = dev->trim_list.Flink;
2859  while (le != &dev->trim_list) {
2861 
2862  ranges[i].StartingOffset = s->address;
2863  ranges[i].LengthInBytes = s->size;
2864  i++;
2865 
2866  le = le->Flink;
2867  }
2868 
2870  if (!NT_SUCCESS(Status))
2871  WARN("IOCTL_STORAGE_MANAGE_DATA_SET_ATTRIBUTES returned %08x\n", Status);
2872 
2873  ExFreePool(dmdsa);
2874 
2875 end:
2876  while (!IsListEmpty(&dev->trim_list)) {
2878  ExFreePool(s);
2879  }
2880 
2881  dev->num_trim_entries = 0;
2882 }
2883 
2885  NTSTATUS Status;
2886  BOOL changed;
2887  LIST_ENTRY* le;
2888  chunk* rc;
2889 
2890  // FIXME - allow with metadata chunks?
2891 
2892  while (TRUE) {
2893  rc = NULL;
2894 
2895  ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE);
2896 
2897  ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE);
2898 
2899  // choose the least-used chunk we haven't looked at yet
2900  le = Vcb->chunks.Flink;
2901  while (le != &Vcb->chunks) {
2903 
2904  // FIXME - skip full-size chunks over e.g. 90% full?
2905  if (c->chunk_item->type & BLOCK_FLAG_DATA && !c->readonly && c->balance_num != Vcb->balance.balance_num && (!rc || c->used < rc->used))
2906  rc = c;
2907 
2908  le = le->Flink;
2909  }
2910 
2911  ExReleaseResourceLite(&Vcb->chunk_lock);
2912 
2913  if (!rc) {
2914  ExReleaseResourceLite(&Vcb->tree_lock);
2915  break;
2916  }
2917 
2918  if (rc->list_entry_balance.Flink) {
2920  Vcb->balance.chunks_left--;
2921  }
2922 
2923  rc->list_entry_balance.Flink = (LIST_ENTRY*)1; // so it doesn't get dropped
2924  rc->reloc = TRUE;
2925 
2926  ExReleaseResourceLite(&Vcb->tree_lock);
2927 
2928  do {
2929  changed = FALSE;
2930 
2931  Status = balance_data_chunk(Vcb, rc, &changed);
2932  if (!NT_SUCCESS(Status)) {
2933  ERR("balance_data_chunk returned %08x\n", Status);
2934  Vcb->balance.status = Status;
2936  rc->reloc = FALSE;
2937  return Status;
2938  }
2939 
2940  KeWaitForSingleObject(&Vcb->balance.event, Executive, KernelMode, FALSE, NULL);
2941 
2942  if (Vcb->readonly)
2943  Vcb->balance.stopping = TRUE;
2944 
2945  if (Vcb->balance.stopping)
2946  return STATUS_SUCCESS;
2947  } while (changed);
2948 
2950 
2951  rc->changed = TRUE;
2952  rc->space_changed = TRUE;
2953  rc->balance_num = Vcb->balance.balance_num;
2954 
2955  Status = do_write(Vcb, NULL);
2956  if (!NT_SUCCESS(Status)) {
2957  ERR("do_write returned %08x\n", Status);
2958  return Status;
2959  }
2960 
2961  free_trees(Vcb);
2962  }
2963 
2964  ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE);
2965 
2966  Status = alloc_chunk(Vcb, flags, &rc, TRUE);
2967 
2968  ExReleaseResourceLite(&Vcb->chunk_lock);
2969 
2970  if (NT_SUCCESS(Status)) {
2971  *newchunk = rc;
2972  return Status;
2973  } else {
2974  ERR("alloc_chunk returned %08x\n", Status);
2975  return Status;
2976  }
2977 }
2978 
2980  LIST_ENTRY* le;
2981 
2982  while (!IsListEmpty(&dev->space)) {
2984 
2985  ExFreePool(s);
2986  }
2987 
2988  // The Linux driver doesn't like to allocate chunks within the first megabyte of a device.
2989 
2990  space_list_add2(&dev->space, NULL, 0x100000, dev->devitem.num_bytes - 0x100000, NULL, NULL);
2991 
2992  le = Vcb->chunks.Flink;
2993  while (le != &Vcb->chunks) {
2994  UINT16 n;
2996  CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
2997 
2998  for (n = 0; n < c->chunk_item->num_stripes; n++) {
2999  UINT64 stripe_size = 0;
3000 
3001  if (cis[n].dev_id == dev->devitem.dev_id) {
3002  if (stripe_size == 0) {
3003  UINT16 factor;
3004 
3005  if (c->chunk_item->type & BLOCK_FLAG_RAID0)
3006  factor = c->chunk_item->num_stripes;
3007  else if (c->chunk_item->type & BLOCK_FLAG_RAID10)
3008  factor = c->chunk_item->num_stripes / c->chunk_item->sub_stripes;
3009  else if (c->chunk_item->type & BLOCK_FLAG_RAID5)
3010  factor = c->chunk_item->num_stripes - 1;
3011  else if (c->chunk_item->type & BLOCK_FLAG_RAID6)
3012  factor = c->chunk_item->num_stripes - 2;
3013  else // SINGLE, DUP, RAID1
3014  factor = 1;
3015 
3016  stripe_size = c->chunk_item->size / factor;
3017  }
3018 
3019  space_list_subtract2(&dev->space, NULL, cis[n].offset, stripe_size, NULL, NULL);
3020  }
3021  }
3022 
3023  le = le->Flink;
3024  }
3025 
3026  return STATUS_SUCCESS;
3027 }
3028 
3029 _Function_class_(KSTART_ROUTINE)
3030 #ifndef __REACTOS__
3031 void balance_thread(void* context) {
3032 #else
3033 void NTAPI balance_thread(void* context) {
3034 #endif
3036  LIST_ENTRY chunks;
3037  LIST_ENTRY* le;
3038  UINT64 num_chunks[3], okay_metadata_chunks = 0, okay_data_chunks = 0, okay_system_chunks = 0;
3039  UINT64 old_data_flags = 0, old_metadata_flags = 0, old_system_flags = 0;
3040  NTSTATUS Status;
3041 
3042  Vcb->balance.balance_num++;
3043 
3044  Vcb->balance.stopping = FALSE;
3045  KeInitializeEvent(&Vcb->balance.finished, NotificationEvent, FALSE);
3046 
3047  if (Vcb->balance.opts[BALANCE_OPTS_DATA].flags & BTRFS_BALANCE_OPTS_ENABLED && Vcb->balance.opts[BALANCE_OPTS_DATA].flags & BTRFS_BALANCE_OPTS_CONVERT) {
3048  old_data_flags = Vcb->data_flags;
3049  Vcb->data_flags = BLOCK_FLAG_DATA | (Vcb->balance.opts[BALANCE_OPTS_DATA].convert == BLOCK_FLAG_SINGLE ? 0 : Vcb->balance.opts[BALANCE_OPTS_DATA].convert);
3050 
3052  }
3053 
3054  if (Vcb->balance.opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_ENABLED && Vcb->balance.opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_CONVERT) {
3055  old_metadata_flags = Vcb->metadata_flags;
3056  Vcb->metadata_flags = BLOCK_FLAG_METADATA | (Vcb->balance.opts[BALANCE_OPTS_METADATA].convert == BLOCK_FLAG_SINGLE ? 0 : Vcb->balance.opts[BALANCE_OPTS_METADATA].convert);
3057  }
3058 
3059  if (Vcb->balance.opts[BALANCE_OPTS_SYSTEM].flags & BTRFS_BALANCE_OPTS_ENABLED && Vcb->balance.opts[BALANCE_OPTS_SYSTEM].flags & BTRFS_BALANCE_OPTS_CONVERT) {
3060  old_system_flags = Vcb->system_flags;
3061  Vcb->system_flags = BLOCK_FLAG_SYSTEM | (Vcb->balance.opts[BALANCE_OPTS_SYSTEM].convert == BLOCK_FLAG_SINGLE ? 0 : Vcb->balance.opts[BALANCE_OPTS_SYSTEM].convert);
3062  }
3063 
3064  if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS) {
3065  if (Vcb->balance.opts[BALANCE_OPTS_DATA].flags & BTRFS_BALANCE_OPTS_ENABLED)
3066  RtlCopyMemory(&Vcb->balance.opts[BALANCE_OPTS_METADATA], &Vcb->balance.opts[BALANCE_OPTS_DATA], sizeof(btrfs_balance_opts));
3067  else if (Vcb->balance.opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_ENABLED)
3068  RtlCopyMemory(&Vcb->balance.opts[BALANCE_OPTS_DATA], &Vcb->balance.opts[BALANCE_OPTS_METADATA], sizeof(btrfs_balance_opts));
3069  }
3070 
3071  num_chunks[0] = num_chunks[1] = num_chunks[2] = 0;
3072  Vcb->balance.total_chunks = Vcb->balance.chunks_left = 0;
3073 
3074  InitializeListHead(&chunks);
3075 
3076  // FIXME - what are we supposed to do with limit_start?
3077 
3078  if (!Vcb->readonly) {
3079  if (!Vcb->balance.removing && !Vcb->balance.shrinking) {
3081  if (!NT_SUCCESS(Status)) {
3082  ERR("add_balance_item returned %08x\n", Status);
3083  Vcb->balance.status = Status;
3084  goto end;
3085  }
3086  } else {
3087  if (Vcb->need_write) {
3088  Status = do_write(Vcb, NULL);
3089 
3090  free_trees(Vcb);
3091 
3092  if (!NT_SUCCESS(Status)) {
3093  ERR("do_write returned %08x\n", Status);
3094  Vcb->balance.status = Status;
3095  goto end;
3096  }
3097  }
3098  }
3099  }
3100 
3101  KeWaitForSingleObject(&Vcb->balance.event, Executive, KernelMode, FALSE, NULL);
3102 
3103  if (Vcb->balance.stopping)
3104  goto end;
3105 
3106  ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE);
3107 
3108  le = Vcb->chunks.Flink;
3109  while (le != &Vcb->chunks) {
3111  UINT8 sort;
3112 
3114 
3115  if (c->chunk_item->type & BLOCK_FLAG_DATA)
3117  else if (c->chunk_item->type & BLOCK_FLAG_METADATA)
3119  else if (c->chunk_item->type & BLOCK_FLAG_SYSTEM)
3121  else {
3122  ERR("unexpected chunk type %llx\n", c->chunk_item->type);
3124  break;
3125  }
3126 
3127  if ((!(Vcb->balance.opts[sort].flags & BTRFS_BALANCE_OPTS_LIMIT) || num_chunks[sort] < Vcb->balance.opts[sort].limit_end) &&
3129  InsertTailList(&chunks, &c->list_entry_balance);
3130 
3131  num_chunks[sort]++;
3132  Vcb->balance.total_chunks++;
3133  Vcb->balance.chunks_left++;
3134  } else if (sort == BALANCE_OPTS_METADATA)
3135  okay_metadata_chunks++;
3136  else if (sort == BALANCE_OPTS_DATA)
3137  okay_data_chunks++;
3138  else if (sort == BALANCE_OPTS_SYSTEM)
3139  okay_system_chunks++;
3140 
3141  if (!c->cache_loaded) {
3143 
3144  if (!NT_SUCCESS(Status)) {
3145  ERR("load_cache_chunk returned %08x\n", Status);
3146  Vcb->balance.status = Status;
3148  ExReleaseResourceLite(&Vcb->chunk_lock);
3149  goto end;
3150  }
3151  }
3152 
3154 
3155  le = le->Flink;
3156  }
3157 
3158  ExReleaseResourceLite(&Vcb->chunk_lock);
3159 
3160  // If we're doing a full balance, try and allocate a new chunk now, before we mess things up
3161  if (okay_metadata_chunks == 0 || okay_data_chunks == 0 || okay_system_chunks == 0) {
3162  BOOL consolidated = FALSE;
3163  chunk* c;
3164 
3165  if (okay_metadata_chunks == 0) {
3166  ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE);
3167 
3168  Status = alloc_chunk(Vcb, Vcb->metadata_flags, &c, TRUE);
3169  if (NT_SUCCESS(Status))
3170  c->balance_num = Vcb->balance.balance_num;
3171  else if (Status != STATUS_DISK_FULL || consolidated) {
3172  ERR("alloc_chunk returned %08x\n", Status);
3173  ExReleaseResourceLite(&Vcb->chunk_lock);
3174  Vcb->balance.status = Status;
3175  goto end;
3176  }
3177 
3178  ExReleaseResourceLite(&Vcb->chunk_lock);
3179 
3180  if (Status == STATUS_DISK_FULL) {
3181  Status = try_consolidation(Vcb, Vcb->metadata_flags, &c);
3182  if (!NT_SUCCESS(Status)) {
3183  ERR("try_consolidation returned %08x\n", Status);
3184  Vcb->balance.status = Status;
3185  goto end;
3186  } else
3187  c->balance_num = Vcb->balance.balance_num;
3188 
3189  consolidated = TRUE;
3190 
3191  if (Vcb->balance.stopping)
3192  goto end;
3193  }
3194  }
3195 
3196  if (okay_data_chunks == 0) {
3197  ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE);
3198 
3199  Status = alloc_chunk(Vcb, Vcb->data_flags, &c, TRUE);
3200  if (NT_SUCCESS(Status))
3201  c->balance_num = Vcb->balance.balance_num;
3202  else if (Status != STATUS_DISK_FULL || consolidated) {
3203  ERR("alloc_chunk returned %08x\n", Status);
3204  ExReleaseResourceLite(&Vcb->chunk_lock);
3205  Vcb->balance.status = Status;
3206  goto end;
3207  }
3208 
3209  ExReleaseResourceLite(&Vcb->chunk_lock);
3210 
3211  if (Status == STATUS_DISK_FULL) {
3212  Status = try_consolidation(Vcb, Vcb->data_flags, &c);
3213  if (!NT_SUCCESS(Status)) {
3214  ERR("try_consolidation returned %08x\n", Status);
3215  Vcb->balance.status = Status;
3216  goto end;
3217  } else
3218  c->balance_num = Vcb->balance.balance_num;
3219 
3220  consolidated = TRUE;
3221 
3222  if (Vcb->balance.stopping)
3223  goto end;
3224  }
3225  }
3226 
3227  if (okay_system_chunks == 0) {
3228  ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE);
3229 
3230  Status = alloc_chunk(Vcb, Vcb->system_flags, &c, TRUE);
3231  if (NT_SUCCESS(Status))
3232  c->balance_num = Vcb->balance.balance_num;
3233  else if (Status != STATUS_DISK_FULL || consolidated) {
3234  ERR("alloc_chunk returned %08x\n", Status);
3235  ExReleaseResourceLite(&Vcb->chunk_lock);
3236  Vcb->balance.status = Status;
3237  goto end;
3238  }
3239 
3240  ExReleaseResourceLite(&Vcb->chunk_lock);
3241 
3242  if (Status == STATUS_DISK_FULL) {
3243  Status = try_consolidation(Vcb, Vcb->system_flags, &c);
3244  if (!NT_SUCCESS(Status)) {
3245  ERR("try_consolidation returned %08x\n", Status);
3246  Vcb->balance.status = Status;
3247  goto end;
3248  } else
3249  c->balance_num = Vcb->balance.balance_num;
3250 
3251  consolidated = TRUE;
3252 
3253  if (Vcb->balance.stopping)
3254  goto end;
3255  }
3256  }
3257  }
3258 
3259  ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE);
3260 
3261  le = chunks.Flink;
3262  while (le != &chunks) {
3263  chunk* c = CONTAINING_RECORD(le, chunk, list_entry_balance);
3264 
3265  c->reloc = TRUE;
3266 
3267  le = le->Flink;
3268  }
3269 
3270  ExReleaseResourceLite(&Vcb->chunk_lock);
3271 
3272  // do data chunks before metadata
3273  le = chunks.Flink;
3274  while (le != &chunks) {
3275  chunk* c = CONTAINING_RECORD(le, chunk, list_entry_balance);
3276  LIST_ENTRY* le2 = le->Flink;
3277 
3278  if (c->chunk_item->type & BLOCK_FLAG_DATA) {
3279  BOOL changed;
3280 
3281  do {
3282  changed = FALSE;
3283 
3284  Status = balance_data_chunk(Vcb, c, &changed);
3285  if (!NT_SUCCESS(Status)) {
3286  ERR("balance_data_chunk returned %08x\n", Status);
3287  Vcb->balance.status = Status;
3288  goto end;
3289  }
3290 
3291  KeWaitForSingleObject(&Vcb->balance.event, Executive, KernelMode, FALSE, NULL);
3292 
3293  if (Vcb->readonly)
3294  Vcb->balance.stopping = TRUE;
3295 
3296  if (Vcb->balance.stopping)
3297  break;
3298  } while (changed);
3299 
3300  c->changed = TRUE;
3301  c->space_changed = TRUE;
3302  }
3303 
3304  if (Vcb->balance.stopping)
3305  goto end;
3306 
3307  if (c->chunk_item->type & BLOCK_FLAG_DATA &&
3308  (!(Vcb->balance.opts[BALANCE_OPTS_METADATA].flags & BTRFS_BALANCE_OPTS_ENABLED) || !(c->chunk_item->type & BLOCK_FLAG_METADATA))) {
3309  RemoveEntryList(&c->list_entry_balance);
3310  c->list_entry_balance.Flink = NULL;
3311 
3312  Vcb->balance.chunks_left--;
3313  }
3314 
3315  le = le2;
3316  }
3317 
3318  // do metadata chunks
3319  while (!IsListEmpty(&chunks)) {
3320  chunk* c;
3321  BOOL changed;
3322 
3323  le = RemoveHeadList(&chunks);
3324  c = CONTAINING_RECORD(le, chunk, list_entry_balance);
3325 
3326  if (c->chunk_item->type & BLOCK_FLAG_METADATA || c->chunk_item->type & BLOCK_FLAG_SYSTEM) {
3327  do {
3328  Status = balance_metadata_chunk(Vcb, c, &changed);
3329  if (!NT_SUCCESS(Status)) {
3330  ERR("balance_metadata_chunk returned %08x\n", Status);
3331  Vcb->balance.status = Status;
3332  goto end;
3333  }
3334 
3335  KeWaitForSingleObject(&Vcb->balance.event, Executive, KernelMode, FALSE, NULL);
3336 
3337  if (Vcb->readonly)
3338  Vcb->balance.stopping = TRUE;
3339 
3340  if (Vcb->balance.stopping)
3341  break;
3342  } while (changed);
3343 
3344  c->changed = TRUE;
3345  c->space_changed = TRUE;
3346  }
3347 
3348  if (Vcb->balance.stopping)
3349  break;
3350 
3351  c->list_entry_balance.Flink = NULL;
3352 
3353  Vcb->balance.chunks_left--;
3354  }
3355 
3356 end:
3357  if (!Vcb->readonly) {
3358  if (Vcb->balance.stopping || !NT_SUCCESS(Vcb->balance.status)) {
3359  le = chunks.Flink;
3360  while (le != &chunks) {
3361  chunk* c = CONTAINING_RECORD(le, chunk, list_entry_balance);
3362  c->reloc = FALSE;
3363 
3364  le = le->Flink;
3365  c->list_entry_balance.Flink = NULL;
3366  }
3367 
3368  if (old_data_flags != 0)
3369  Vcb->data_flags = old_data_flags;
3370 
3371  if (old_metadata_flags != 0)
3372  Vcb->metadata_flags = old_metadata_flags;
3373 
3374  if (old_system_flags != 0)
3375  Vcb->system_flags = old_system_flags;
3376  }
3377 
3378  if (Vcb->balance.removing) {
3379  device* dev = NULL;
3380 
3381  ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
3382 
3383  le = Vcb->devices.Flink;
3384  while (le != &Vcb->devices) {
3385  device* dev2 = CONTAINING_RECORD(le, device, list_entry);
3386 
3387  if (dev2->devitem.dev_id == Vcb->balance.opts[0].devid) {
3388  dev = dev2;
3389  break;
3390  }
3391 
3392  le = le->Flink;
3393  }
3394 
3395  if (dev) {
3396  if (Vcb->balance.chunks_left == 0) {
3398 
3399  if (!NT_SUCCESS(Status)) {
3400  ERR("finish_removing_device returned %08x\n", Status);
3401  dev->reloc = FALSE;
3402  }
3403  } else
3404  dev->reloc = FALSE;
3405  }
3406 
3407  ExReleaseResourceLite(&Vcb->tree_lock);
3408  } else if (Vcb->balance.shrinking) {
3409  device* dev = NULL;
3410 
3411  ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
3412 
3413  le = Vcb->devices.Flink;
3414  while (le != &Vcb->devices) {
3415  device* dev2 = CONTAINING_RECORD(le, device, list_entry);
3416 
3417  if (dev2->devitem.dev_id == Vcb->balance.opts[0].devid) {
3418  dev = dev2;
3419  break;
3420  }
3421 
3422  le = le->Flink;
3423  }
3424 
3425  if (!dev) {
3426  ERR("could not find device %llx\n", Vcb->balance.opts[0].devid);
3427  Vcb->balance.status = STATUS_INTERNAL_ERROR;
3428  }
3429 
3430  if (Vcb->balance.stopping || !NT_SUCCESS(Vcb->balance.status)) {
3431  if (dev) {
3433  if (!NT_SUCCESS(Status))
3434  WARN("regenerate_space_list returned %08x\n", Status);
3435  }
3436  } else {
3437  UINT64 old_size;
3438 
3439  old_size = dev->devitem.num_bytes;
3440  dev->devitem.num_bytes = Vcb->balance.opts[0].drange_start;
3441 
3443  if (!NT_SUCCESS(Status)) {
3444  ERR("update_dev_item returned %08x\n", Status);
3445  dev->devitem.num_bytes = old_size;
3446  Vcb->balance.status = Status;
3447 
3449  if (!NT_SUCCESS(Status))
3450  WARN("regenerate_space_list returned %08x\n", Status);
3451  } else {
3452  Vcb->superblock.total_bytes -= old_size - dev->devitem.num_bytes;
3453 
3454  Status = do_write(Vcb, NULL);
3455  if (!NT_SUCCESS(Status))
3456  ERR("do_write returned %08x\n", Status);
3457 
3458  free_trees(Vcb);
3459  }
3460  }
3461 
3462  ExReleaseResourceLite(&Vcb->tree_lock);
3463 
3464  if (!Vcb->balance.stopping && NT_SUCCESS(Vcb->balance.status))
3466  } else {
3468  if (!NT_SUCCESS(Status)) {
3469  ERR("remove_balance_item returned %08x\n", Status);
3470  goto end;
3471  }
3472  }
3473 
3474  if (Vcb->trim && !Vcb->options.no_trim) {
3475  ExAcquireResourceExclusiveLite(&Vcb->tree_lock, TRUE);
3476 
3477  le = Vcb->devices.Flink;
3478  while (le != &Vcb->devices) {
3479  device* dev2 = CONTAINING_RECORD(le, device, list_entry);
3480 
3481  if (dev2->devobj && !dev2->readonly && dev2->trim)
3482  trim_unalloc_space(Vcb, dev2);
3483 
3484  le = le->Flink;
3485  }
3486 
3487  ExReleaseResourceLite(&Vcb->tree_lock);
3488  }
3489  }
3490 
3491  ZwClose(Vcb->balance.thread);
3492  Vcb->balance.thread = NULL;
3493 
3494  KeSetEvent(&Vcb->balance.finished, 0, FALSE);
3495 }
3496 
3498  NTSTATUS Status;
3500  UINT8 i;
3501 
3502  if (length < sizeof(btrfs_start_balance) || !data)
3503  return STATUS_INVALID_PARAMETER;
3504 
3505  if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode))
3507 
3508  if (Vcb->locked) {
3509  WARN("cannot start balance while locked\n");
3510  return STATUS_DEVICE_NOT_READY;
3511  }
3512 
3513  if (Vcb->scrub.thread) {
3514  WARN("cannot start balance while scrub running\n");
3515  return STATUS_DEVICE_NOT_READY;
3516  }
3517 
3518  if (Vcb->balance.thread) {
3519  WARN("balance already running\n");
3520  return STATUS_DEVICE_NOT_READY;
3521  }
3522 
3523  if (Vcb->readonly)
3525 
3529  return STATUS_SUCCESS;
3530 
3531  for (i = 0; i < 3; i++) {
3532  if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_ENABLED) {
3533  if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_PROFILES) {
3536 
3537  if (bsb->opts[i].profiles == 0)
3538  return STATUS_INVALID_PARAMETER;
3539  }
3540 
3541  if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_DEVID) {
3542  if (bsb->opts[i].devid == 0)
3543  return STATUS_INVALID_PARAMETER;
3544  }
3545 
3546  if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_DRANGE) {
3547  if (bsb->opts[i].drange_start > bsb->opts[i].drange_end)
3548  return STATUS_INVALID_PARAMETER;
3549  }
3550 
3551  if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_VRANGE) {
3552  if (bsb->opts[i].vrange_start > bsb->opts[i].vrange_end)
3553  return STATUS_INVALID_PARAMETER;
3554  }
3555 
3556  if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_LIMIT) {
3557  bsb->opts[i].limit_start = max(1, bsb->opts[i].limit_start);
3558  bsb->opts[i].limit_end = max(1, bsb->opts[i].limit_end);
3559 
3560  if (bsb->opts[i].limit_start > bsb->opts[i].limit_end)
3561  return STATUS_INVALID_PARAMETER;
3562  }
3563 
3564  if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_STRIPES) {
3565  bsb->opts[i].stripes_start = max(1, bsb->opts[i].stripes_start);
3566  bsb->opts[i].stripes_end = max(1, bsb->opts[i].stripes_end);
3567 
3568  if (bsb->opts[i].stripes_start > bsb->opts[i].stripes_end)
3569  return STATUS_INVALID_PARAMETER;
3570  }
3571 
3572  if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_USAGE) {
3573  bsb->opts[i].usage_start = min(100, bsb->opts[i].stripes_start);
3574  bsb->opts[i].usage_end = min(100, bsb->opts[i].stripes_end);
3575 
3576  if (bsb->opts[i].stripes_start > bsb->opts[i].stripes_end)
3577  return STATUS_INVALID_PARAMETER;
3578  }
3579 
3580  if (bsb->opts[i].flags & BTRFS_BALANCE_OPTS_CONVERT) {
3581  if (bsb->opts[i].convert != BLOCK_FLAG_RAID0 && bsb->opts[i].convert != BLOCK_FLAG_RAID1 &&
3583  bsb->opts[i].convert != BLOCK_FLAG_RAID5 && bsb->opts[i].convert != BLOCK_FLAG_RAID6 &&
3584  bsb->opts[i].convert != BLOCK_FLAG_SINGLE)
3585  return STATUS_INVALID_PARAMETER;
3586  }
3587  }
3588  }
3589 
3590  RtlCopyMemory(&Vcb->balance.opts[BALANCE_OPTS_DATA], &bsb->opts[BALANCE_OPTS_DATA], sizeof(btrfs_balance_opts));
3593 
3594  Vcb->balance.paused = FALSE;
3595  Vcb->balance.removing = FALSE;
3596  Vcb->balance.shrinking = FALSE;
3597  Vcb->balance.status = STATUS_SUCCESS;
3598  KeInitializeEvent(&Vcb->balance.event, NotificationEvent, !Vcb->balance.paused);
3599 
3600  Status = PsCreateSystemThread(&Vcb->balance.thread, 0, NULL, NULL, NULL, balance_thread, Vcb);
3601  if (!NT_SUCCESS(Status)) {
3602  ERR("PsCreateSystemThread returned %08x\n", Status);
3603  return Status;
3604  }
3605 
3606  return STATUS_SUCCESS;
3607 }
3608 
3610  KEY searchkey;
3611  traverse_ptr tp;
3612  NTSTATUS Status;
3613  BALANCE_ITEM* bi;
3614  int i;
3615 
3616  searchkey.obj_id = BALANCE_ITEM_ID;
3617  searchkey.obj_type = TYPE_TEMP_ITEM;
3618  searchkey.offset = 0;
3619 
3620  Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE, NULL);
3621  if (!NT_SUCCESS(Status)) {
3622  ERR("find_item returned %08x\n", Status);
3623  return Status;
3624  }
3625 
3626  if (keycmp(tp.item->key, searchkey)) {
3627  TRACE("no balance item found\n");
3628  return STATUS_NOT_FOUND;
3629  }
3630 
3631  if (tp.item->size < sizeof(BALANCE_ITEM)) {
3632  WARN("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset,
3633  tp.item->size, sizeof(BALANCE_ITEM));
3634  return STATUS_INTERNAL_ERROR;
3635  }
3636 
3637  bi = (BALANCE_ITEM*)tp.item->data;
3638 
3639  if (bi->flags & BALANCE_FLAGS_DATA)
3640  load_balance_args(&Vcb->balance.opts[BALANCE_OPTS_DATA], &bi->data);
3641 
3642  if (bi->flags & BALANCE_FLAGS_METADATA)
3643  load_balance_args(&Vcb->balance.opts[BALANCE_OPTS_METADATA], &bi->metadata);
3644 
3645  if (bi->flags & BALANCE_FLAGS_SYSTEM)
3646  load_balance_args(&Vcb->balance.opts[BALANCE_OPTS_SYSTEM], &bi->system);
3647 
3648  // do the heuristics that Linux driver does
3649 
3650  for (i = 0; i < 3; i++) {
3651  if (Vcb->balance.opts[i].flags & BTRFS_BALANCE_OPTS_ENABLED) {
3652  // if converting, don't redo chunks already done
3653 
3654  if (Vcb->balance.opts[i].flags & BTRFS_BALANCE_OPTS_CONVERT)
3655  Vcb->balance.opts[i].flags |= BTRFS_BALANCE_OPTS_SOFT;
3656 
3657  // don't balance chunks more than 90% filled - presumably these
3658  // have already been done
3659 
3660  if (!(Vcb->balance.opts[i].flags & BTRFS_BALANCE_OPTS_USAGE) &&
3661  !(Vcb->balance.opts[i].flags & BTRFS_BALANCE_OPTS_CONVERT)
3662  ) {
3663  Vcb->balance.opts[i].flags |= BTRFS_BALANCE_OPTS_USAGE;
3664  Vcb->balance.opts[i].usage_start = 0;
3665  Vcb->balance.opts[i].usage_end = 90;
3666  }
3667  }
3668  }
3669 
3670  if (Vcb->readonly || Vcb->options.skip_balance)
3671  Vcb->balance.paused = TRUE;
3672  else
3673  Vcb->balance.paused = FALSE;
3674 
3675  Vcb->balance.removing = FALSE;
3676  Vcb->balance.shrinking = FALSE;
3677  Vcb->balance.status = STATUS_SUCCESS;
3678  KeInitializeEvent(&Vcb->balance.event, NotificationEvent, !Vcb->balance.paused);
3679 
3680  Status = PsCreateSystemThread(&Vcb->balance.thread, 0, NULL, NULL, NULL, balance_thread, Vcb);
3681  if (!NT_SUCCESS(Status)) {
3682  ERR("PsCreateSystemThread returned %08x\n", Status);
3683  return Status;
3684  }
3685 
3686  return STATUS_SUCCESS;
3687 }
3688 
3691 
3692  if (length < sizeof(btrfs_query_balance) || !data)
3693  return STATUS_INVALID_PARAMETER;
3694 
3695  if (!Vcb->balance.thread) {
3697 
3698  if (!NT_SUCCESS(Vcb->balance.status)) {
3699  bqb->status |= BTRFS_BALANCE_ERROR;
3700  bqb->error = Vcb->balance.status;
3701  }
3702 
3703  return STATUS_SUCCESS;
3704  }
3705 
3706  bqb->status = Vcb->balance.paused ? BTRFS_BALANCE_PAUSED : BTRFS_BALANCE_RUNNING;
3707 
3708  if (Vcb->balance.removing)
3709  bqb->status |= BTRFS_BALANCE_REMOVAL;
3710 
3711  if (Vcb->balance.shrinking)
3713 
3714  if (!NT_SUCCESS(Vcb->balance.status))
3715  bqb->status |= BTRFS_BALANCE_ERROR;
3716 
3717  bqb->chunks_left = Vcb->balance.chunks_left;
3718  bqb->total_chunks = Vcb->balance.total_chunks;
3719  bqb->error = Vcb->balance.status;
3720  RtlCopyMemory(&bqb->data_opts, &Vcb->balance.opts[BALANCE_OPTS_DATA], sizeof(btrfs_balance_opts));
3721  RtlCopyMemory(&bqb->metadata_opts, &Vcb->balance.opts[BALANCE_OPTS_METADATA], sizeof(btrfs_balance_opts));
3722  RtlCopyMemory(&bqb->system_opts, &Vcb->balance.opts[BALANCE_OPTS_SYSTEM], sizeof(btrfs_balance_opts));
3723 
3724  return STATUS_SUCCESS;
3725 }
3726 
3728  if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode))
3730 
3731  if (!Vcb->balance.thread)
3732  return STATUS_DEVICE_NOT_READY;
3733 
3734  if (Vcb->balance.paused)
3735  return STATUS_DEVICE_NOT_READY;
3736 
3737  Vcb->balance.paused = TRUE;
3738  KeClearEvent(&Vcb->balance.event);
3739 
3740  return STATUS_SUCCESS;
3741 }
3742 
3744  if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode))
3746 
3747  if (!Vcb->balance.thread)
3748  return STATUS_DEVICE_NOT_READY;
3749 
3750  if (!Vcb->balance.paused)
3751  return STATUS_DEVICE_NOT_READY;
3752 
3753  if (Vcb->readonly)
3755 
3756  Vcb->balance.paused = FALSE;
3757  KeSetEvent(&Vcb->balance.event, 0, FALSE);
3758 
3759  return STATUS_SUCCESS;
3760 }
3761 
3763  if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode))
3765 
3766  if (!Vcb->balance.thread)
3767  return STATUS_DEVICE_NOT_READY;
3768 
3769  Vcb->balance.paused = FALSE;
3770  Vcb->balance.stopping = TRUE;
3771  Vcb->balance.status = STATUS_SUCCESS;
3772  KeSetEvent(&Vcb->balance.event, 0, FALSE);
3773 
3774  return STATUS_SUCCESS;
3775 }
3776 
3778  UINT64 devid;
3779  LIST_ENTRY* le;
3780  device* dev = NULL;
3781  NTSTATUS Status;
3782  int i;
3783  UINT64 num_rw_devices;
3784 
3785  TRACE("(%p, %p, %x)\n", Vcb, data, length);
3786 
3787  if (!SeSinglePrivilegeCheck(RtlConvertLongToLuid(SE_MANAGE_VOLUME_PRIVILEGE), processor_mode))
3789 
3790  if (length < sizeof(UINT64))
3791  return STATUS_INVALID_PARAMETER;
3792 
3793  devid = *(UINT64*)data;
3794 
3795  ExAcquireResourceSharedLite(&Vcb->tree_lock, TRUE);
3796 
3797  if (Vcb->readonly) {
3798  ExReleaseResourceLite(&Vcb->tree_lock);
3799  return