[mlpack] 08/20: Potential fix for OS X RectangleTree bug.

Barak A. Pearlmutter barak+git at pearlmutter.net
Thu May 25 20:44:08 UTC 2017


This is an automated email from the git hooks/post-receive script.

bap pushed a commit to branch master
in repository mlpack.

commit b95b2f9e2f2a43d76a0f3d558e247fe49e09e275
Author: Ryan Curtin <ryan at ratml.org>
Date:   Tue May 2 15:00:50 2017 -0400

    Potential fix for OS X RectangleTree bug.
---
 .../core/tree/rectangle_tree/r_star_tree_split.hpp |   6 +-
 .../tree/rectangle_tree/r_star_tree_split_impl.hpp | 412 +++++++++++----
 .../tree/rectangle_tree/rectangle_tree_impl.hpp    |  19 +-
 .../core/tree/rectangle_tree/x_tree_split.hpp      |   6 +-
 .../core/tree/rectangle_tree/x_tree_split_impl.hpp | 584 +++++++++++++--------
 5 files changed, 672 insertions(+), 355 deletions(-)

diff --git a/src/mlpack/core/tree/rectangle_tree/r_star_tree_split.hpp b/src/mlpack/core/tree/rectangle_tree/r_star_tree_split.hpp
index 6370120..7f1c036 100644
--- a/src/mlpack/core/tree/rectangle_tree/r_star_tree_split.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/r_star_tree_split.hpp
@@ -52,9 +52,9 @@ class RStarTreeSplit
    * Comparator for sorting with std::pair. This comparator works a little bit
    * faster then the default comparator.
    */
-  template<typename ElemType>
-  static bool PairComp(const std::pair<ElemType, size_t>& p1,
-                       const std::pair<ElemType, size_t>& p2)
+  template<typename ElemType, typename TreeType>
+  static bool PairComp(const std::pair<ElemType, TreeType>& p1,
+                       const std::pair<ElemType, TreeType>& p2)
   {
     return p1.first < p2.first;
   }
diff --git a/src/mlpack/core/tree/rectangle_tree/r_star_tree_split_impl.hpp b/src/mlpack/core/tree/rectangle_tree/r_star_tree_split_impl.hpp
index d5669a6..51ddfea 100644
--- a/src/mlpack/core/tree/rectangle_tree/r_star_tree_split_impl.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/r_star_tree_split_impl.hpp
@@ -35,29 +35,31 @@ void RStarTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
 
   if (tree->Count() <= tree->MaxLeafSize())
     return;
+//  std::cout << "split leaf node " << tree << " with parent " << tree->Parent()
+//<< "\n";
 
   // If we are splitting the root node, we need will do things differently so
   // that the constructor and other methods don't confuse the end user by giving
   // an address of another node.
-  if (tree->Parent() == NULL)
-  {
+//  if (tree->Parent() == NULL)
+//  {
     // We actually want to copy this way.  Pointers and everything.
-    TreeType* copy = new TreeType(*tree, false);
-    copy->Parent() = tree;
-    tree->Count() = 0;
-    tree->NullifyData();
+//    TreeType* copy = new TreeType(*tree, false);
+//    copy->Parent() = tree;
+//    tree->Count() = 0;
+//    tree->NullifyData();
     // Because this was a leaf node, numChildren must be 0.
-    tree->children[(tree->NumChildren())++] = copy;
-    assert(tree->NumChildren() == 1);
+//    tree->children[(tree->NumChildren())++] = copy;
+//    assert(tree->NumChildren() == 1);
 
-    RStarTreeSplit::SplitLeafNode(copy,relevels);
-    return;
-  }
+//    RStarTreeSplit::SplitLeafNode(copy,relevels);
+//    return;
+//  }
 
   // If we haven't yet reinserted on this level, we try doing so now.
-  if (relevels[tree->TreeDepth()])
+  if (relevels[tree->TreeDepth() - 1])
   {
-    relevels[tree->TreeDepth()] = false;
+    relevels[tree->TreeDepth() - 1] = false;
 
     // We sort the points by decreasing distance to the centroid of the bound.
     // We then remove the first p entries and reinsert them at the root.
@@ -81,8 +83,8 @@ void RStarTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
       sorted[i].second = i;
     }
 
-    std::sort(sorted.begin(), sorted.end(), PairComp<ElemType>);
-    std::vector<size_t> pointIndices(p);
+    std::sort(sorted.begin(), sorted.end(), PairComp<ElemType, size_t>);
+    std::vector<size_t> pointIndices(sorted.size());
 
     for (size_t i = 0; i < p; i++)
     {
@@ -119,7 +121,7 @@ void RStarTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
       sorted[i].second = i;
     }
 
-    std::sort(sorted.begin(), sorted.end(), PairComp<ElemType>);
+    std::sort(sorted.begin(), sorted.end(), PairComp<ElemType, size_t>);
 
     // We'll store each of the three scores for each distribution.
     std::vector<ElemType> areas(tree->MaxLeafSize() - 2 * tree->MinLeafSize() +
@@ -195,56 +197,134 @@ void RStarTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
   for (size_t i = 0; i < sorted.size(); i++)
   {
     sorted[i].first = tree->Dataset().col(tree->Point(i))[bestAxis];
-    sorted[i].second = i;
+    sorted[i].second = tree->Point(i);
   }
 
-  std::sort(sorted.begin(), sorted.end(), PairComp<ElemType>);
-
-  TreeType* treeOne = new TreeType(tree->Parent());
-  TreeType* treeTwo = new TreeType(tree->Parent());
+  std::sort(sorted.begin(), sorted.end(), PairComp<ElemType, size_t>);
 
-  if (tiedOnOverlap)
+  if (tree->Parent())
   {
-    for (size_t i = 0; i < tree->Count(); i++)
+//    std::cout << "first check parent " << tree->Parent() << " numDescendants "
+//<< tree->Parent()->NumDescendants() <<
+//" with " << tree->Parent()->NumChildren() << " children\n";
+//    size_t manualCount = 0;
+//    for (size_t i = 0; i < tree->Parent()->NumChildren(); ++i)
+//    {
+//      std::cout << "(" << &(tree->Parent()->Child(i)) << ") ";
+//      manualCount += tree->Parent()->Child(i).NumDescendants();
+//      std::cout << tree->Parent()->Child(i).NumDescendants() << " ";
+//    }
+//    std::cout << "\n";
+  //  TreeType* treeOne = new TreeType(tree->Parent());
+    // Now clean the node, and we will re-use this.
+    const size_t oldDescendants = tree->numDescendants;
+    const size_t numPoints = tree->count;
+    tree->numChildren = 0;
+    tree->numDescendants = 0;
+    tree->bound.Clear();
+    tree->count = 0;
+    tree->begin = 0;
+
+    TreeType* treeTwo = new TreeType(tree->Parent());
+
+    if (tiedOnOverlap)
     {
-      if (i < bestAreaIndexOnBestAxis + tree->MinLeafSize())
-        treeOne->InsertPoint(tree->Point(sorted[i].second));
-      else
-        treeTwo->InsertPoint(tree->Point(sorted[i].second));
+      for (size_t i = 0; i < numPoints; i++)
+      {
+        if (i < bestAreaIndexOnBestAxis + tree->MinLeafSize())
+          tree->InsertPoint(sorted[i].second);
+        else
+          treeTwo->InsertPoint(sorted[i].second);
+      }
     }
+    else
+    {
+      for (size_t i = 0; i < numPoints; i++)
+      {
+        if (i < bestOverlapIndexOnBestAxis + tree->MinLeafSize())
+          tree->InsertPoint(sorted[i].second);
+        else
+          treeTwo->InsertPoint(sorted[i].second);
+      }
+    }
+
+    // Insert the new tree node.
+    TreeType* par = tree->Parent();
+    par->children[par->NumChildren()++] = treeTwo;
+
+    // We only add one at a time, so we should only need to test for equality
+    // just in case, we use an assert.
+//    std::cout << "x: " << oldDescendants << " == " << tree->NumDescendants() <<
+//" + " << treeTwo->NumDescendants() << " (" << tree->NumDescendants() +
+//treeTwo->NumDescendants() << ")\n";
+    assert(oldDescendants == tree->NumDescendants() + treeTwo->NumDescendants());
+//    std::cout << "check parent " << par << " numDescendants " << par->NumDescendants() <<
+//" with " << par->NumChildren() << " children\n";
+//    manualCount = 0;
+//    for (size_t i = 0; i < par->NumChildren(); ++i)
+//    {
+//      std::cout << "(" << &(par->Child(i)) << ") ";
+//      manualCount += par->Child(i).NumDescendants();
+//      std::cout << par->Child(i).NumDescendants() << " ";
+//    }
+//    std::cout << "\n";
+//    assert(par->NumDescendants() == manualCount);
+    assert(par->NumChildren() <= par->MaxNumChildren() + 1);
+    if (par->NumChildren() == par->MaxNumChildren() + 1)
+      RStarTreeSplit::SplitNonLeafNode(par, relevels);
+
+    assert(tree->Parent()->NumChildren() <= tree->MaxNumChildren());
+    assert(tree->Parent()->NumChildren() >= tree->MinNumChildren());
+    assert(treeTwo->Parent()->NumChildren() <= treeTwo->MaxNumChildren());
+    assert(treeTwo->Parent()->NumChildren() >= treeTwo->MinNumChildren());
   }
   else
   {
-    for (size_t i = 0; i < tree->Count(); i++)
+    TreeType* treeOne = new TreeType(tree);
+    TreeType* treeTwo = new TreeType(tree);
+
+    const size_t oldNumDescendants = tree->NumDescendants();
+    const size_t numPoints = tree->Count();
+    tree->numChildren = 0;
+    tree->bound.Clear();
+    tree->count = 0;
+    tree->begin = 0;
+    tree->numDescendants = 0;
+
+    if (tiedOnOverlap)
     {
-      if (i < bestOverlapIndexOnBestAxis + tree->MinLeafSize())
-        treeOne->InsertPoint(tree->Point(sorted[i].second));
-      else
-        treeTwo->InsertPoint(tree->Point(sorted[i].second));
+      for (size_t i = 0; i < numPoints; ++i)
+      {
+        if (i < bestAreaIndexOnBestAxis + tree->MinLeafSize())
+          treeOne->InsertPoint(sorted[i].second);
+        else
+          treeTwo->InsertPoint(sorted[i].second);
+      }
+    }
+    else
+    {
+      for (size_t i = 0; i < numPoints; ++i)
+      {
+        if (i < bestOverlapIndexOnBestAxis + tree->MinLeafSize())
+          treeOne->InsertPoint(sorted[i].second);
+        else
+          treeTwo->InsertPoint(sorted[i].second);
+      }
     }
-  }
-
-  // Remove this node and insert treeOne and treeTwo.
-  TreeType* par = tree->Parent();
-  size_t index = 0;
-  while (par->children[index] != tree) { index++; }
-
-  assert(index != par->NumChildren());
-  par->children[index] = treeOne;
-  par->children[par->NumChildren()++] = treeTwo;
-
-  // We only add one at a time, so we should only need to test for equality
-  // just in case, we use an assert.
-  assert(par->NumChildren() <= par->MaxNumChildren() + 1);
-  if (par->NumChildren() == par->MaxNumChildren() + 1)
-    RStarTreeSplit::SplitNonLeafNode(par,relevels);
-
-  assert(treeOne->Parent()->NumChildren() <= treeOne->MaxNumChildren());
-  assert(treeOne->Parent()->NumChildren() >= treeOne->MinNumChildren());
-  assert(treeTwo->Parent()->NumChildren() <= treeTwo->MaxNumChildren());
-  assert(treeTwo->Parent()->NumChildren() >= treeTwo->MinNumChildren());
 
-  tree->SoftDelete();
+    InsertNodeIntoTree(tree, treeOne);
+    InsertNodeIntoTree(tree, treeTwo);
+
+//    std::cout << "y: " << oldNumDescendants << ": " << treeOne->NumDescendants()
+//<< " + " << treeTwo->NumDescendants() << " (" << tree->NumDescendants() <<
+//")\n";
+//    std::cout << "tree left " << treeOne->NumDescendants() << ", right " <<
+//treeTwo->NumDescendants() << ", parent " << tree->NumDescendants() << "\n";
+//    for (size_t i = 0; i < tree->NumChildren(); ++i)
+//      std::cout << tree->Child(i).NumDescendants() << " ";
+//    std::cout << " (that's the children of the parent)\n";
+    assert(oldNumDescendants == tree->NumDescendants());
+  }
 }
 
 /**
@@ -257,6 +337,8 @@ void RStarTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
 template<typename TreeType>
 bool RStarTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels)
 {
+//  std::cout << "split nonleaf node " << tree << " with parent " <<
+//tree->Parent() << "\n";
   // Convenience typedef.
   typedef typename TreeType::ElemType ElemType;
   typedef bound::HRectBound<metric::EuclideanDistance, ElemType> BoundType;
@@ -264,19 +346,19 @@ bool RStarTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels
   // If we are splitting the root node, we need will do things differently so
   // that the constructor and other methods don't confuse the end user by giving
   // an address of another node.
-  if (tree->Parent() == NULL)
-  {
+//  if (tree->Parent() == NULL)
+//  {
     // We actually want to copy this way.  Pointers and everything.
-    TreeType* copy = new TreeType(*tree, false);
+//    TreeType* copy = new TreeType(*tree, false);
 
-    copy->Parent() = tree;
-    tree->NumChildren() = 0;
-    tree->NullifyData();
-    tree->children[(tree->NumChildren())++] = copy;
+//    copy->Parent() = tree;
+//    tree->NumChildren() = 0;
+//    tree->NullifyData();
+//    tree->children[(tree->NumChildren())++] = copy;
 
-    RStarTreeSplit::SplitNonLeafNode(copy,relevels);
-    return true;
-  }
+//    RStarTreeSplit::SplitNonLeafNode(copy,relevels);
+//    return true;
+//  }
 
  /*
   // If we haven't yet reinserted on this level, we try doing so now.
@@ -358,7 +440,7 @@ bool RStarTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels
       sorted[i].second = i;
     }
 
-    std::sort(sorted.begin(), sorted.end(), PairComp<ElemType>);
+    std::sort(sorted.begin(), sorted.end(), PairComp<ElemType, size_t>);
 
     // We'll store each of the three scores for each distribution.
     std::vector<ElemType> areas(tree->MaxNumChildren() -
@@ -441,7 +523,7 @@ bool RStarTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels
       sorted[i].second = i;
     }
 
-    std::sort(sorted.begin(), sorted.end(), PairComp<ElemType>);
+    std::sort(sorted.begin(), sorted.end(), PairComp<ElemType, size_t>);
 
     // We'll store each of the three scores for each distribution.
     std::vector<ElemType> areas(tree->MaxNumChildren() -
@@ -514,13 +596,13 @@ bool RStarTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels
     }
   }
 
-  std::vector<std::pair<ElemType, size_t>> sorted(tree->NumChildren());
+  std::vector<std::pair<ElemType, TreeType*>> sorted(tree->NumChildren());
   if (lowIsBest)
   {
     for (size_t i = 0; i < sorted.size(); i++)
     {
       sorted[i].first = tree->Child(i).Bound()[bestAxis].Lo();
-      sorted[i].second = i;
+      sorted[i].second = &tree->Child(i);
     }
   }
   else
@@ -528,67 +610,161 @@ bool RStarTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels
     for (size_t i = 0; i < sorted.size(); i++)
     {
       sorted[i].first = tree->Child(i).Bound()[bestAxis].Hi();
-      sorted[i].second = i;
+      sorted[i].second = &tree->Child(i);
     }
   }
 
-  std::sort(sorted.begin(), sorted.end(), PairComp<ElemType>);
-
-  TreeType* treeOne = new TreeType(tree->Parent());
-  TreeType* treeTwo = new TreeType(tree->Parent());
+  std::sort(sorted.begin(), sorted.end(), PairComp<ElemType, TreeType*>);
 
-  if (tiedOnOverlap)
+  if (tree->Parent() != NULL)
   {
-    for (size_t i = 0; i < tree->NumChildren(); i++)
+    const size_t oldNumDescendants = tree->NumDescendants();
+//    for (size_t i = 0; i < tree->NumChildren(); ++i)
+//      std::cout << tree->Child(i).NumDescendants() << " ";
+//    std::cout << " (total " << tree->NumDescendants() << ", count " <<
+//tree->count << "\n";
+    const size_t oldNumChildren = tree->NumChildren();
+    tree->numChildren = 0;
+    tree->bound.Clear();
+    tree->count = 0;
+    tree->begin = 0;
+    tree->numDescendants = 0;
+    TreeType* treeTwo = new TreeType(tree->Parent());
+
+    if (tiedOnOverlap)
     {
-      if (i < bestAreaIndexOnBestAxis + tree->MinNumChildren())
-        InsertNodeIntoTree(treeOne, &(tree->Child(sorted[i].second)));
-      else
-        InsertNodeIntoTree(treeTwo, &(tree->Child(sorted[i].second)));
+      for (size_t i = 0; i < oldNumChildren; i++)
+      {
+        if (i < bestAreaIndexOnBestAxis + tree->MinNumChildren())
+        {
+//          std::cout << "insert " << sorted[i].second->NumDescendants() << " into tree"
+//              << "\n";
+          InsertNodeIntoTree(tree, sorted[i].second);
+        }
+        else
+        {
+//          std::cout << "insert " << sorted[i].second->NumDescendants() << " into treeTwo\n";
+          InsertNodeIntoTree(treeTwo, sorted[i].second);
+        }
+      }
     }
-  }
-  else
-  {
-    for (size_t i = 0; i < tree->NumChildren(); i++)
+    else
     {
-      if (i < bestOverlapIndexOnBestAxis + tree->MinNumChildren())
-        InsertNodeIntoTree(treeOne, &(tree->Child(sorted[i].second)));
-      else
-        InsertNodeIntoTree(treeTwo, &(tree->Child(sorted[i].second)));
+      for (size_t i = 0; i < oldNumChildren; i++)
+      {
+        if (i < bestOverlapIndexOnBestAxis + tree->MinNumChildren())
+        {
+//          std::cout << "insert " << sorted[i].second->NumDescendants() << " into tree\n";
+          InsertNodeIntoTree(tree, sorted[i].second);
+        }
+        else
+        {
+//          std::cout << "insert " << sorted[i].second->NumDescendants() << " into treeTwo\n";
+          InsertNodeIntoTree(treeTwo, sorted[i].second);
+        }
+      }
     }
-  }
 
-  // Remove this node and insert treeOne and treeTwo
-  TreeType* par = tree->Parent();
-  size_t index = 0;
-  while (par->children[index] != tree) { index++; }
+    // Insert the new node into the tree.
+    TreeType* par = tree->Parent();
+    par->children[par->NumChildren()++] = treeTwo;
+
+    // We only add one at a time, so we should only need to test for equality
+    // just in case, we use an assert.
+    assert(par->NumChildren() <= par->MaxNumChildren() + 1);
+    if (par->NumChildren() == par->MaxNumChildren() + 1)
+      RStarTreeSplit::SplitNonLeafNode(par,relevels);
+
+    // We have to update the children of each of these new nodes so that they
+    // record the correct parent.
+    for (size_t i = 0; i < tree->NumChildren(); i++)
+      tree->children[i]->Parent() = tree;
+
+    for (size_t i = 0; i < treeTwo->NumChildren(); i++)
+      treeTwo->children[i]->Parent() = treeTwo;
 
-  par->children[index] = treeOne;
-  par->children[par->NumChildren()++] = treeTwo;
+//    std::cout << "tree left " << tree->NumDescendants() << ", right " <<
+//treeTwo->NumDescendants() << ", parent " << par->NumDescendants() << "\n";
+//    for (size_t i = 0; i < par->NumChildren(); ++i)
+//      std::cout << par->Child(i).NumDescendants() << " ";
+//    std::cout << " (that's the children of the parent)\n";
+    assert(oldNumDescendants == (tree->NumDescendants() +
+treeTwo->NumDescendants()));
 
-  // We only add one at a time, so we should only need to test for equality
-  // just in case, we use an assert.
-  assert(par->NumChildren() <= par->MaxNumChildren() + 1);
-  if (par->NumChildren() == par->MaxNumChildren() + 1)
-    RStarTreeSplit::SplitNonLeafNode(par,relevels);
 
-  // We have to update the children of each of these new nodes so that they
-  // record the correct parent.
-  for (size_t i = 0; i < treeOne->NumChildren(); i++)
-    treeOne->children[i]->Parent() = treeOne;
+    assert(tree->Parent()->NumChildren() <= tree->MaxNumChildren());
+    assert(tree->Parent()->NumChildren() >= tree->MinNumChildren());
+    assert(treeTwo->Parent()->NumChildren() <= treeTwo->MaxNumChildren());
+    assert(treeTwo->Parent()->NumChildren() >= treeTwo->MinNumChildren());
+
+    assert(tree->MaxNumChildren() < 7);
+    assert(treeTwo->MaxNumChildren() < 7);
+  }
+  else
+  {
+    const size_t oldDescendants = tree->NumDescendants();
+//    for (size_t i = 0; i < tree->NumChildren(); ++i)
+//      std::cout << tree->Child(i).NumDescendants() << " ";
+//    std::cout << " (total " << tree->NumDescendants() << ", count " <<
+//tree->count << "\n";
+    TreeType* treeOne = new TreeType(tree);
+    TreeType* treeTwo = new TreeType(tree);
+
+    const size_t oldNumChildren = tree->NumChildren();
+    tree->count = 0;
+    tree->numChildren = 0;
+    tree->bound.Clear();
+    tree->numDescendants = 0;
+
+    if (tiedOnOverlap)
+    {
+      for (size_t i = 0; i < oldNumChildren; i++)
+      {
+        if (i < bestAreaIndexOnBestAxis + tree->MinNumChildren())
+        {
+//          std::cout << "insert " << sorted[i].second->NumDescendants() << " into treeOne\n";
+          InsertNodeIntoTree(treeOne, sorted[i].second);
+        }
+        else
+        {
+//          std::cout << "insert " << sorted[i].second->NumDescendants() << " into treeTwo\n";
+          InsertNodeIntoTree(treeTwo, sorted[i].second);
+        }
+      }
+    }
+    else
+    {
+      for (size_t i = 0; i < oldNumChildren; i++)
+      {
+        if (i < bestOverlapIndexOnBestAxis + tree->MinNumChildren())
+        {
+//          std::cout << "insert " << sorted[i].second->NumDescendants() << " into treeOne\n";
+          InsertNodeIntoTree(treeOne, sorted[i].second);
+        }
+        else
+        {
+//          std::cout << "insert " << sorted[i].second->NumDescendants() << " into treeTwo\n";
+          InsertNodeIntoTree(treeTwo, sorted[i].second);
+        }
+      }
+    }
 
-  for (size_t i = 0; i < treeTwo->NumChildren(); i++)
-    treeTwo->children[i]->Parent() = treeTwo;
+    InsertNodeIntoTree(tree, treeOne);
+    InsertNodeIntoTree(tree, treeTwo);
 
-  assert(treeOne->Parent()->NumChildren() <= treeOne->MaxNumChildren());
-  assert(treeOne->Parent()->NumChildren() >= treeOne->MinNumChildren());
-  assert(treeTwo->Parent()->NumChildren() <= treeTwo->MaxNumChildren());
-  assert(treeTwo->Parent()->NumChildren() >= treeTwo->MinNumChildren());
+//    std::cout << oldDescendants << "; " << treeOne->numDescendants << ", " <<
+//treeTwo->numDescendants << " --> " << tree->numDescendants << "\n";
+    assert(oldDescendants == (treeOne->numDescendants +
+        treeTwo->numDescendants));
 
-  assert(treeOne->MaxNumChildren() < 7);
-  assert(treeTwo->MaxNumChildren() < 7);
+    // We have to update the children of each of these new nodes so that they
+    // record the correct parent.
+    for (size_t i = 0; i < treeOne->NumChildren(); i++)
+      treeOne->children[i]->Parent() = treeOne;
 
-  tree->SoftDelete();
+    for (size_t i = 0; i < treeTwo->NumChildren(); i++)
+      treeTwo->children[i]->Parent() = treeTwo;
+  }
 
   return false;
 }
@@ -600,9 +776,21 @@ bool RStarTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels
 template<typename TreeType>
 void RStarTreeSplit::InsertNodeIntoTree(TreeType* destTree, TreeType* srcNode)
 {
+//  std::cout << "insert " << srcNode << " into " << destTree << "\n";
+
   destTree->Bound() |= srcNode->Bound();
   destTree->numDescendants += srcNode->numDescendants;
   destTree->children[destTree->NumChildren()++] = srcNode;
+
+//  std::cout << "dest now has " << destTree->NumDescendants() << "\n";
+//  size_t manualCount = 0;
+//  for (size_t i = 0; i < destTree->NumChildren(); ++i)
+//  {
+//    manualCount += destTree->Child(i).NumDescendants();
+//    std::cout << destTree->Child(i).NumDescendants() << " ";
+//  }
+//  std::cout << "\n";
+//  assert(manualCount == destTree->NumDescendants());
 }
 
 } // namespace tree
diff --git a/src/mlpack/core/tree/rectangle_tree/rectangle_tree_impl.hpp b/src/mlpack/core/tree/rectangle_tree/rectangle_tree_impl.hpp
index 5b4295b..d5f8858 100644
--- a/src/mlpack/core/tree/rectangle_tree/rectangle_tree_impl.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/rectangle_tree_impl.hpp
@@ -275,7 +275,6 @@ RectangleTree<MetricType, StatisticType, MatType, SplitType, DescentType,
 
   if (ownsDataset)
     delete dataset;
-
 }
 
 /**
@@ -336,9 +335,7 @@ void RectangleTree<MetricType, StatisticType, MatType, SplitType, DescentType,
 
   numDescendants++;
 
-  std::vector<bool> lvls(TreeDepth());
-  for (size_t i = 0; i < lvls.size(); i++)
-    lvls[i] = true;
+  std::vector<bool> lvls(TreeDepth(), true);
 
   // If this is a leaf node, we stop here and add the point.
   if (numChildren == 0)
@@ -452,9 +449,7 @@ bool RectangleTree<MetricType, StatisticType, MatType, SplitType, DescentType,
   while (root->Parent() != NULL)
     root = root->Parent();
 
-  std::vector<bool> lvls(root->TreeDepth());
-  for (size_t i = 0; i < lvls.size(); i++)
-    lvls[i] = true;
+  std::vector<bool> lvls(root->TreeDepth(), true);
 
   if (numChildren == 0)
   {
@@ -991,6 +986,7 @@ void RectangleTree<MetricType, StatisticType, MatType, SplitType, DescentType,
         // This will check the minFill of the parent.
         parent->CondenseTree(point, relevels, usePoint);
         // Now it should be safe to delete this node.
+        std::cout << "soft delete " << this << "\n";
         SoftDelete();
 
         return;
@@ -1052,6 +1048,7 @@ void RectangleTree<MetricType, StatisticType, MatType, SplitType, DescentType,
           // This will check the minFill of the point.
           parent->CondenseTree(point, relevels, usePoint);
           // Now it should be safe to delete this node.
+          std::cout << "soft delete " << this << " 2\n";
           SoftDelete();
 
           return;
@@ -1067,15 +1064,17 @@ void RectangleTree<MetricType, StatisticType, MatType, SplitType, DescentType,
       if (child->NumChildren() > maxNumChildren)
       {
         maxNumChildren = child->MaxNumChildren();
-        children.resize(maxNumChildren+1);
+        children.resize(maxNumChildren + 1);
       }
 
       for (size_t i = 0; i < child->NumChildren(); i++) {
         children[i] = child->children[i];
         children[i]->Parent() = this;
+        child->children[i] = NULL;
       }
 
       numChildren = child->NumChildren();
+      child->NumChildren() = 0;
 
       for (size_t i = 0; i < child->Count(); i++)
       {
@@ -1086,7 +1085,9 @@ void RectangleTree<MetricType, StatisticType, MatType, SplitType, DescentType,
       auxiliaryInfo = child->AuxiliaryInfo();
 
       count = child->Count();
-      child->SoftDelete();
+      child->Count() = 0;
+
+      delete child;
       return;
     }
   }
diff --git a/src/mlpack/core/tree/rectangle_tree/x_tree_split.hpp b/src/mlpack/core/tree/rectangle_tree/x_tree_split.hpp
index a3d6cef..2c6ce57 100644
--- a/src/mlpack/core/tree/rectangle_tree/x_tree_split.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/x_tree_split.hpp
@@ -62,9 +62,9 @@ class XTreeSplit
    * Comparator for sorting with std::pair. This comparator works a little bit
    * faster then the default comparator.
    */
-  template<typename ElemType>
-  static bool PairComp(const std::pair<ElemType, size_t>& p1,
-                       const std::pair<ElemType, size_t>& p2)
+  template<typename ElemType, typename SecondType>
+  static bool PairComp(const std::pair<ElemType, SecondType>& p1,
+                       const std::pair<ElemType, SecondType>& p2)
   {
     return p1.first < p2.first;
   }
diff --git a/src/mlpack/core/tree/rectangle_tree/x_tree_split_impl.hpp b/src/mlpack/core/tree/rectangle_tree/x_tree_split_impl.hpp
index a8f6dc8..6f72673 100644
--- a/src/mlpack/core/tree/rectangle_tree/x_tree_split_impl.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/x_tree_split_impl.hpp
@@ -38,24 +38,24 @@ void XTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
   // If we are splitting the root node, we need will do things differently so
   // that the constructor and other methods don't confuse the end user by giving
   // an address of another node.
-  if (tree->Parent() == NULL)
-  {
+  //if (tree->Parent() == NULL)
+  //{
     // We actually want to copy this way.  Pointers and everything.
-    TreeType* copy = new TreeType(*tree, false);
-    copy->Parent() = tree;
-    tree->Count() = 0;
-    tree->NullifyData();
+  //  TreeType* copy = new TreeType(*tree, false);
+  //  copy->Parent() = tree;
+  //  tree->Count() = 0;
+  //  tree->NullifyData();
     // Because this was a leaf node, numChildren must be 0.
-    tree->children[(tree->NumChildren())++] = copy;
-    assert(tree->NumChildren() == 1);
-    XTreeSplit::SplitLeafNode(copy,relevels);
-    return;
-  }
+  //  tree->children[(tree->NumChildren())++] = copy;
+  //  assert(tree->NumChildren() == 1);
+  //  XTreeSplit::SplitLeafNode(copy,relevels);
+  //  return;
+  //}
 
   // If we haven't yet reinserted on this level, we try doing so now.
-  if (relevels[tree->TreeDepth()])
+  if (relevels[tree->TreeDepth() - 1])
   {
-    relevels[tree->TreeDepth()] = false;
+    relevels[tree->TreeDepth() - 1] = false;
     // We sort the points by decreasing distance to the centroid of the bound.
     // We then remove the first p entries and reinsert them at the root.
     TreeType* root = tree;
@@ -77,10 +77,10 @@ void XTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
     {
       sorted[i].first = tree->Metric().Evaluate(center,
           tree->Dataset().col(tree->Point(i)));
-       sorted[i].second = i;
+      sorted[i].second = i;
     }
 
-    std::sort(sorted.begin(), sorted.end(), PairComp<ElemType>);
+    std::sort(sorted.begin(), sorted.end(), PairComp<ElemType, size_t>);
     std::vector<size_t> pointIndices(p);
 
     for (size_t i = 0; i < p; i++)
@@ -129,7 +129,7 @@ void XTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
       sorted[i].second = i;
     }
 
-    std::sort(sorted.begin(), sorted.end(), PairComp<ElemType>);
+    std::sort(sorted.begin(), sorted.end(), PairComp<ElemType, size_t>);
 
     // We'll store each of the three scores for each distribution.
     std::vector<ElemType> areas(tree->MaxLeafSize() -
@@ -202,77 +202,104 @@ void XTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
   for (size_t i = 0; i < sorted.size(); i++)
   {
     sorted[i].first = tree->Dataset().col(tree->Point(i))[bestAxis];
-    sorted[i].second = i;
+    sorted[i].second = tree->Point(i);
   }
 
-  std::sort(sorted.begin(), sorted.end(), PairComp<ElemType>);
-
-  TreeType* treeOne = new TreeType(tree->Parent(),
-                            tree->AuxiliaryInfo().NormalNodeMaxNumChildren());
-  TreeType* treeTwo = new TreeType(tree->Parent(),
-                            tree->AuxiliaryInfo().NormalNodeMaxNumChildren());
+  std::sort(sorted.begin(), sorted.end(), PairComp<ElemType, size_t>);
 
-  // The leaf nodes should never have any overlap introduced by the above method
-  // since a split axis is chosen and then points are assigned based on their
-  // value along that axis.
-  if (tiedOnOverlap)
+  if (tree->Parent() != NULL)
   {
-    for (size_t i = 0; i < tree->Count(); i++)
+    // We can reuse 'tree' as one of the two children.
+  //TreeType* treeOne = new TreeType(tree->Parent(),
+  //                          tree->AuxiliaryInfo().NormalNodeMaxNumChildren());
+    TreeType* treeTwo = new TreeType(tree->Parent(),
+                              tree->AuxiliaryInfo().NormalNodeMaxNumChildren());
+
+    const size_t oldCount = tree->Count();
+    tree->Count() = 0;
+
+    // The leaf nodes should never have any overlap introduced by the above method
+    // since a split axis is chosen and then points are assigned based on their
+    // value along that axis.
+    if (tiedOnOverlap)
     {
-      if (i < bestAreaIndexOnBestAxis + tree->MinLeafSize())
-        treeOne->InsertPoint(tree->Point(sorted[i].second));
-      else
-        treeTwo->InsertPoint(tree->Point(sorted[i].second));
+      for (size_t i = 0; i < oldCount; i++)
+      {
+        if (i < bestAreaIndexOnBestAxis + tree->MinLeafSize())
+          tree->InsertPoint(sorted[i].second);
+        else
+          treeTwo->InsertPoint(sorted[i].second);
+      }
+    }
+    else
+    {
+      for (size_t i = 0; i < oldCount; i++)
+      {
+        if (i < bestOverlapIndexOnBestAxis + tree->MinLeafSize())
+          tree->InsertPoint(tree->Point(sorted[i].second));
+        else
+          treeTwo->InsertPoint(tree->Point(sorted[i].second));
+      }
     }
+
+    // Remove this node and insert treeOne and treeTwo.
+    TreeType* par = tree->Parent();
+    par->children[par->NumChildren()++] = treeTwo;
+
+    // We now update the split history of each new node.
+    tree->AuxiliaryInfo().SplitHistory().history[bestAxis] = true;
+    tree->AuxiliaryInfo().SplitHistory().lastDimension = bestAxis;
+    treeTwo->AuxiliaryInfo().SplitHistory().history[bestAxis] = true;
+    treeTwo->AuxiliaryInfo().SplitHistory().lastDimension = bestAxis;
+
+    // We only add one at a time, so we should only need to test for equality just
+    // in case, we use an assert.
+    assert(par->NumChildren() <= par->MaxNumChildren() + 1);
+    if (par->NumChildren() == par->MaxNumChildren() + 1)
+      XTreeSplit::SplitNonLeafNode(par,relevels);
   }
   else
   {
-    for (size_t i = 0; i < tree->Count(); i++)
+    // We have to insert two nodes, and this node moves "up", since it is the
+    // root.
+    TreeType* treeOne = new TreeType(tree,
+        tree->AuxiliaryInfo().NormalNodeMaxNumChildren());
+    TreeType* treeTwo = new TreeType(tree,
+        tree->AuxiliaryInfo().NormalNodeMaxNumChildren());
+
+    const size_t oldCount = tree->Count();
+    tree->Count() = 0;
+
+    if (tiedOnOverlap)
     {
-      if (i < bestOverlapIndexOnBestAxis + tree->MinLeafSize())
-        treeOne->InsertPoint(tree->Point(sorted[i].second));
-      else
-        treeTwo->InsertPoint(tree->Point(sorted[i].second));
+      for (size_t i = 0; i < oldCount; i++)
+      {
+        if (i < bestAreaIndexOnBestAxis + tree->MinLeafSize())
+          treeOne->InsertPoint(sorted[i].second);
+        else
+          treeTwo->InsertPoint(sorted[i].second);
+      }
     }
-  }
-
-  // Remove this node and insert treeOne and treeTwo.
-  TreeType* par = tree->Parent();
-  size_t index = par->NumChildren();
-  for (size_t i = 0; i < par->NumChildren(); i++)
-  {
-    if (par->children[i] == tree)
+    else
     {
-      index = i;
-      break;
+      for (size_t i = 0; i < oldCount; i++)
+      {
+        if (i < bestOverlapIndexOnBestAxis + tree->MinLeafSize())
+          treeOne->InsertPoint(sorted[i].second);
+        else
+          treeTwo->InsertPoint(sorted[i].second);
+      }
     }
+
+    tree->children[0] = treeOne;
+    tree->children[1] = treeTwo;
+    tree->numChildren = 2;
+
+    treeOne->AuxiliaryInfo().SplitHistory().history[bestAxis] = true;
+    treeOne->AuxiliaryInfo().SplitHistory().lastDimension = bestAxis;
+    treeTwo->AuxiliaryInfo().SplitHistory().history[bestAxis] = true;
+    treeTwo->AuxiliaryInfo().SplitHistory().lastDimension = bestAxis;
   }
-  assert(index != par->NumChildren());
-  par->children[index] = treeOne;
-  par->children[par->NumChildren()++] = treeTwo;
-
-  // We now update the split history of each new node.
-  treeOne->AuxiliaryInfo().SplitHistory().history[bestAxis] = true;
-  treeOne->AuxiliaryInfo().SplitHistory().lastDimension = bestAxis;
-  treeTwo->AuxiliaryInfo().SplitHistory().history[bestAxis] = true;
-  treeTwo->AuxiliaryInfo().SplitHistory().lastDimension = bestAxis;
-
-  // We only add one at a time, so we should only need to test for equality just
-  // in case, we use an assert.
-  assert(par->NumChildren() <= par->MaxNumChildren() + 1);
-  if (par->NumChildren() == par->MaxNumChildren() + 1)
-    XTreeSplit::SplitNonLeafNode(par,relevels);
-
-  assert(treeOne->Parent()->NumChildren() <=
-      treeOne->Parent()->MaxNumChildren());
-  assert(treeOne->Parent()->NumChildren() >=
-      treeOne->Parent()->MinNumChildren());
-  assert(treeTwo->Parent()->NumChildren() <=
-      treeTwo->Parent()->MaxNumChildren());
-  assert(treeTwo->Parent()->NumChildren() >=
-      treeTwo->Parent()->MinNumChildren());
-
-  tree->SoftDelete();
 }
 
 /**
@@ -292,18 +319,18 @@ bool XTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels)
   // If we are splitting the root node, we need will do things differently so
   // that the constructor and other methods don't confuse the end user by giving
   // an address of another node.
-  if (tree->Parent() == NULL)
-  {
+//  if (tree->Parent() == NULL)
+//  {
     // We actually want to copy this way.  Pointers and everything.
-    TreeType* copy = new TreeType(*tree, false);
-
-    copy->Parent() = tree;
-    tree->NumChildren() = 0;
-    tree->NullifyData();
-    tree->children[(tree->NumChildren())++] = copy;
-    XTreeSplit::SplitNonLeafNode(copy,relevels);
-    return true;
-  }
+//    TreeType* copy = new TreeType(*tree, false);
+
+//    copy->Parent() = tree;
+//    tree->NumChildren() = 0;
+//    tree->NullifyData();
+//    tree->children[(tree->NumChildren())++] = copy;
+//    XTreeSplit::SplitNonLeafNode(copy,relevels);
+//    return true;
+//  }
 
   // The X tree paper doesn't explain how to handle the split history when
   // reinserting nodes and reinserting nodes seems to hurt the performance, so
@@ -373,14 +400,14 @@ bool XTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels)
     ElemType axisScore = 0.0;
 
     // We'll do Bound().Lo() now and use Bound().Hi() later.
-    std::vector<std::pair<ElemType, size_t>> sorted(tree->NumChildren());
+    std::vector<std::pair<ElemType, TreeType*>> sorted(tree->NumChildren());
     for (size_t i = 0; i < sorted.size(); i++)
     {
       sorted[i].first = tree->Child(i).Bound()[j].Lo();
-      sorted[i].second = i;
+      sorted[i].second = &tree->Child(i);
     }
 
-    std::sort(sorted.begin(), sorted.end(), PairComp<ElemType>);
+    std::sort(sorted.begin(), sorted.end(), PairComp<ElemType, TreeType*>);
 
     // We'll store each of the three scores for each distribution.
     std::vector<ElemType> areas(tree->MaxNumChildren() -
@@ -408,10 +435,10 @@ bool XTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels)
       BoundType bound2(tree->Bound().Dim());
 
       for (size_t l = 0; l < cutOff; l++)
-        bound1 |= tree->Child(sorted[l].second).Bound();
+        bound1 |= sorted[l].second->Bound();
 
       for (size_t l = cutOff; l < tree->NumChildren(); l++)
-        bound2 |= tree->Child(sorted[l].second).Bound();
+        bound2 |= sorted[l].second->Bound();
 
       ElemType area1 = bound1.Volume();
       ElemType area2 = bound2.Volume();
@@ -478,14 +505,14 @@ bool XTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels)
   {
     ElemType axisScore = 0.0;
 
-    std::vector<std::pair<ElemType, size_t>> sorted(tree->NumChildren());
+    std::vector<std::pair<ElemType, TreeType*>> sorted(tree->NumChildren());
     for (size_t i = 0; i < sorted.size(); i++)
     {
       sorted[i].first = tree->Child(i).Bound()[j].Hi();
-      sorted[i].second = i;
+      sorted[i].second = &tree->Child(i);
     }
 
-    std::sort(sorted.begin(), sorted.end(), PairComp<ElemType>);
+    std::sort(sorted.begin(), sorted.end(), PairComp<ElemType, TreeType*>);
 
     // We'll store each of the three scores for each distribution.
     std::vector<ElemType> areas(tree->MaxNumChildren() -
@@ -513,10 +540,10 @@ bool XTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels)
       BoundType bound2(tree->Bound().Dim());
 
       for (size_t l = 0; l < cutOff; l++)
-        bound1 |= tree->Child(sorted[l].second).Bound();
+        bound1 |= sorted[l].second->Bound();
 
       for (size_t l = cutOff; l < tree->NumChildren(); l++)
-        bound2 |= tree->Child(sorted[l].second).Bound();
+        bound2 |= sorted[l].second->Bound();
 
       ElemType area1 = bound1.Volume();
       ElemType area2 = bound2.Volume();
@@ -581,13 +608,13 @@ bool XTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels)
     }
   }
 
-  std::vector<std::pair<ElemType, size_t>> sorted(tree->NumChildren());
+  std::vector<std::pair<ElemType, TreeType*>> sorted(tree->NumChildren());
   if (lowIsBest)
   {
     for (size_t i = 0; i < sorted.size(); i++)
     {
       sorted[i].first = tree->Child(i).Bound()[bestAxis].Lo();
-      sorted[i].second = i;
+      sorted[i].second = &tree->Child(i);
     }
   }
   else
@@ -595,181 +622,282 @@ bool XTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels)
     for (size_t i = 0; i < sorted.size(); i++)
     {
       sorted[i].first = tree->Child(i).Bound()[bestAxis].Hi();
-      sorted[i].second = i;
+      sorted[i].second = &tree->Child(i);
     }
   }
 
-  std::sort(sorted.begin(), sorted.end(), PairComp<ElemType>);
-
-  TreeType* treeOne = new TreeType(tree->Parent(), tree->MaxNumChildren());
-  TreeType* treeTwo = new TreeType(tree->Parent(), tree->MaxNumChildren());
+  std::sort(sorted.begin(), sorted.end(), PairComp<ElemType, TreeType*>);
 
-  // Now as per the X-tree paper, we ensure that this split was good enough.
-  bool useMinOverlapSplit = false;
-  if (tiedOnOverlap)
+  if (tree->Parent() != NULL)
   {
-    if (overlapBestAreaAxis/areaBestAreaAxis < MAX_OVERLAP)
+    // Reuse tree as the new child.
+    TreeType* treeTwo = new TreeType(tree->Parent(), tree->MaxNumChildren());
+    const size_t numChildren = tree->NumChildren();
+    tree->numChildren = 0;
+
+    // Now as per the X-tree paper, we ensure that this split was good enough.
+    bool useMinOverlapSplit = false;
+    if (tiedOnOverlap)
     {
-      for (size_t i = 0; i < tree->NumChildren(); i++)
+      if (overlapBestAreaAxis/areaBestAreaAxis < MAX_OVERLAP)
       {
-        if (i < bestAreaIndexOnBestAxis + tree->MinNumChildren())
-          InsertNodeIntoTree(treeOne, tree->children[sorted[i].second]);
-        else
-          InsertNodeIntoTree(treeTwo, tree->children[sorted[i].second]);
+        for (size_t i = 0; i < numChildren; i++)
+        {
+          if (i < bestAreaIndexOnBestAxis + tree->MinNumChildren())
+            InsertNodeIntoTree(tree, sorted[i].second);
+          else
+            InsertNodeIntoTree(treeTwo, sorted[i].second);
+        }
       }
+      else
+        useMinOverlapSplit = true;
     }
     else
-      useMinOverlapSplit = true;
-  }
-  else
-  {
-    if (overlapBestOverlapAxis/areaBestOverlapAxis < MAX_OVERLAP)
     {
-      for (size_t i = 0; i < tree->NumChildren(); i++)
+      if (overlapBestOverlapAxis/areaBestOverlapAxis < MAX_OVERLAP)
       {
-        if (i < bestOverlapIndexOnBestAxis + tree->MinNumChildren())
-          InsertNodeIntoTree(treeOne, tree->children[sorted[i].second]);
-        else
-          InsertNodeIntoTree(treeTwo, tree->children[sorted[i].second]);
+        for (size_t i = 0; i < numChildren; i++)
+        {
+          if (i < bestOverlapIndexOnBestAxis + tree->MinNumChildren())
+            InsertNodeIntoTree(tree, sorted[i].second);
+          else
+            InsertNodeIntoTree(treeTwo, sorted[i].second);
+        }
       }
+      else
+        useMinOverlapSplit = true;
     }
-    else
-      useMinOverlapSplit = true;
-  }
 
-  // If the split was not good enough, then we try the minimal overlap split.
-  // If that fails, we create a "super node" (more accurately we resize this one
-  // to make it a super node).
-  if (useMinOverlapSplit)
-  {
-    // If there is a dimension that might work, try that.
-    if ((minOverlapSplitDimension != tree->Bound().Dim()) &&
-        (bestScoreMinOverlapSplit / areaOfBestMinOverlapSplit < MAX_OVERLAP))
+    // If the split was not good enough, then we try the minimal overlap split.
+    // If that fails, we create a "super node" (more accurately we resize this one
+    // to make it a super node).
+    if (useMinOverlapSplit)
     {
-      std::vector<std::pair<ElemType, size_t>> sorted2(tree->NumChildren());
-      if (minOverlapSplitUsesHi)
+      // If there is a dimension that might work, try that.
+      if ((minOverlapSplitDimension != tree->Bound().Dim()) &&
+          (bestScoreMinOverlapSplit / areaOfBestMinOverlapSplit < MAX_OVERLAP))
       {
-        for (size_t i = 0; i < sorted2.size(); i++)
+        std::vector<std::pair<ElemType, TreeType*>> sorted2(numChildren);
+        if (minOverlapSplitUsesHi)
         {
-          sorted2[i].first = tree->Child(i).Bound()[bestAxis].Hi();
-          sorted2[i].second = i;
+          for (size_t i = 0; i < sorted2.size(); i++)
+          {
+            sorted2[i].first = sorted[i].second->Bound()[bestAxis].Hi();
+            sorted2[i].second = sorted[i].second;
+          }
+        }
+        else
+        {
+          for (size_t i = 0; i < sorted2.size(); i++)
+          {
+            sorted2[i].first = sorted[i].second->Bound()[bestAxis].Lo();
+            sorted2[i].second = sorted[i].second;
+          }
+        }
+        std::sort(sorted2.begin(), sorted2.end(), PairComp<ElemType, TreeType*>);
+
+        for (size_t i = 0; i < numChildren; i++)
+        {
+          if (i < bestIndexMinOverlapSplit + tree->MinNumChildren())
+            InsertNodeIntoTree(tree, sorted2[i].second);
+          else
+            InsertNodeIntoTree(treeTwo, sorted2[i].second);
         }
       }
       else
       {
-        for (size_t i = 0; i < sorted2.size(); i++)
+        // We don't create a supernode that would be the only child of the root.
+        // (Note that if you did try to do so you would need to update the parent
+        // field on each child of this new node as creating a supernode causes the
+        // function to return before that is done.
+
+        // I thought commenting out the bellow would make the tree less efficient
+        // but would still work.  It doesn't.  I should look into that to see if
+        // there is another bug.
+
+        if ((tree->Parent()->Parent() == NULL) &&
+            (tree->Parent()->NumChildren() == 1))
         {
-          sorted2[i].first = tree->Child(i).Bound()[bestAxis].Lo();
-          sorted2[i].second = i;
+          // We make the root a supernode instead.
+          tree->Parent()->MaxNumChildren() = tree->MaxNumChildren() +
+                                tree->AuxiliaryInfo().NormalNodeMaxNumChildren();
+          tree->Parent()->children.resize(tree->Parent()->MaxNumChildren() + 1);
+          tree->Parent()->NumChildren() = tree->NumChildren();
+          for (size_t i = 0; i < numChildren; ++i)
+          {
+            tree->Parent()->children[i] = sorted[i].second;
+            tree->Parent()->children[i]->Parent() = tree->Parent();
+            tree->children[i] = NULL;
+          }
+
+          delete tree;
+          delete treeTwo;
+
+          return false;
         }
+
+        // If we don't have to worry about the root, we just enlarge this node.
+        tree->MaxNumChildren() +=
+                                tree->AuxiliaryInfo().NormalNodeMaxNumChildren();
+        tree->children.resize(tree->MaxNumChildren() + 1);
+        tree->numChildren = numChildren;
+        for (size_t i = 0; i < numChildren; i++)
+          tree->Child(i).Parent() = tree;
+
+        delete treeTwo;
+        return false;
       }
-      std::sort(sorted2.begin(), sorted2.end(), PairComp<ElemType>);
+    }
 
-      for (size_t i = 0; i < tree->NumChildren(); i++)
+    // Update the split history of each child.
+    tree->AuxiliaryInfo().SplitHistory().history[bestAxis] = true;
+    tree->AuxiliaryInfo().SplitHistory().lastDimension = bestAxis;
+    treeTwo->AuxiliaryInfo().SplitHistory().history[bestAxis] = true;
+    treeTwo->AuxiliaryInfo().SplitHistory().lastDimension = bestAxis;
+
+    // Remove this node and insert treeOne and treeTwo
+    TreeType* par = tree->Parent();
+    par->children[par->NumChildren()++] = treeTwo;
+
+    // we only add one at a time, so we should only need to test for equality
+    // just in case, we use an assert.
+    if (!(par->NumChildren() <= par->MaxNumChildren() + 1))
+      Log::Debug << "error " << par->NumChildren() << ", "
+          << par->MaxNumChildren() + 1 << std::endl;
+    assert(par->NumChildren() <= par->MaxNumChildren() + 1);
+
+    if (par->NumChildren() == par->MaxNumChildren() + 1)
+      XTreeSplit::SplitNonLeafNode(par,relevels);
+
+    // We have to update the children of each of these new nodes so that they
+    // record the correct parent.
+    for (size_t i = 0; i < treeTwo->NumChildren(); i++)
+      treeTwo->Child(i).Parent() = treeTwo;
+
+    assert(tree->Parent()->NumChildren() <=
+        tree->Parent()->MaxNumChildren());
+    assert(tree->Parent()->NumChildren() >=
+        tree->Parent()->MinNumChildren());
+    assert(treeTwo->Parent()->NumChildren() <=
+        treeTwo->Parent()->MaxNumChildren());
+    assert(treeTwo->Parent()->NumChildren() >=
+        treeTwo->Parent()->MinNumChildren());
+
+    return false;
+  }
+  else
+  {
+    // We are the root of the tree, so we need to create two children to add.
+    TreeType* treeOne = new TreeType(tree, tree->MaxNumChildren());
+    TreeType* treeTwo = new TreeType(tree, tree->MaxNumChildren());
+    const size_t numChildren = tree->NumChildren();
+    tree->numChildren = 0;
+
+    // Now as per the X-tree paper, we ensure that this split was good enough.
+    bool useMinOverlapSplit = false;
+    if (tiedOnOverlap)
+    {
+      if (overlapBestAreaAxis/areaBestAreaAxis < MAX_OVERLAP)
       {
-        if (i < bestIndexMinOverlapSplit + tree->MinNumChildren())
-          InsertNodeIntoTree(treeOne, tree->children[sorted2[i].second]);
-        else
-          InsertNodeIntoTree(treeTwo, tree->children[sorted2[i].second]);
+        for (size_t i = 0; i < numChildren; i++)
+        {
+          if (i < bestAreaIndexOnBestAxis + tree->MinNumChildren())
+            InsertNodeIntoTree(treeOne, sorted[i].second);
+          else
+            InsertNodeIntoTree(treeTwo, sorted[i].second);
+        }
       }
+      else
+        useMinOverlapSplit = true;
     }
     else
     {
-      // We don't create a supernode that would be the only child of the root.
-      // (Note that if you did try to do so you would need to update the parent
-      // field on each child of this new node as creating a supernode causes the
-      // function to return before that is done.
-
-      // I thought commenting out the bellow would make the tree less efficient
-      // but would still work.  It doesn't.  I should look into that to see if
-      // there is another bug.
+      if (overlapBestOverlapAxis/areaBestOverlapAxis < MAX_OVERLAP)
+      {
+        for (size_t i = 0; i < numChildren; i++)
+        {
+          if (i < bestOverlapIndexOnBestAxis + tree->MinNumChildren())
+            InsertNodeIntoTree(treeOne, sorted[i].second);
+          else
+            InsertNodeIntoTree(treeTwo, sorted[i].second);
+        }
+      }
+      else
+        useMinOverlapSplit = true;
+    }
 
-      if ((tree->Parent()->Parent() == NULL) &&
-          (tree->Parent()->NumChildren() == 1))
+    // If the split was not good enough, then we try the minimal overlap split.
+    // If that fails, we create a "super node" (more accurately we resize this one
+    // to make it a super node).
+    if (useMinOverlapSplit)
+    {
+      // If there is a dimension that might work, try that.
+      if ((minOverlapSplitDimension != tree->Bound().Dim()) &&
+          (bestScoreMinOverlapSplit / areaOfBestMinOverlapSplit < MAX_OVERLAP))
       {
-        // We make the root a supernode instead.
-        tree->Parent()->MaxNumChildren() = tree->MaxNumChildren() + 
-                              tree->AuxiliaryInfo().NormalNodeMaxNumChildren();
-        tree->Parent()->children.resize(tree->Parent()->MaxNumChildren() + 1);
-        tree->Parent()->NumChildren() = tree->NumChildren();
-        for (size_t i = 0; i < tree->NumChildren(); i++)
+        std::vector<std::pair<ElemType, TreeType*>> sorted2(numChildren);
+        if (minOverlapSplitUsesHi)
+        {
+          for (size_t i = 0; i < sorted2.size(); i++)
+          {
+            sorted2[i].first = sorted[i].second->Bound()[bestAxis].Hi();
+            sorted2[i].second = sorted[i].second;
+          }
+        }
+        else
+        {
+          for (size_t i = 0; i < sorted2.size(); i++)
+          {
+            sorted2[i].first = sorted[i].second->Bound()[bestAxis].Lo();
+            sorted2[i].second = sorted[i].second;
+          }
+        }
+        std::sort(sorted2.begin(), sorted2.end(), PairComp<ElemType, TreeType*>);
+
+        for (size_t i = 0; i < numChildren; i++)
         {
-          tree->Parent()->children[i] = tree->children[i];
-          tree->Child(i).Parent() = tree->Parent();
+          if (i < bestIndexMinOverlapSplit + tree->MinNumChildren())
+            InsertNodeIntoTree(treeOne, sorted2[i].second);
+          else
+            InsertNodeIntoTree(treeTwo, sorted2[i].second);
         }
+      }
+      else
+      {
+        // Make this node a supernode.
+        tree->MaxNumChildren() +=
+                                tree->AuxiliaryInfo().NormalNodeMaxNumChildren();
+        tree->children.resize(tree->MaxNumChildren() + 1);
+        tree->numChildren = numChildren;
+        for (size_t i = 0; i < numChildren; i++)
+          tree->Child(i).Parent() = tree;
 
         delete treeOne;
         delete treeTwo;
-        tree->NullifyData();
-        tree->SoftDelete();
         return false;
       }
-
-      // If we don't have to worry about the root, we just enlarge this node.
-      tree->MaxNumChildren() += 
-                              tree->AuxiliaryInfo().NormalNodeMaxNumChildren();
-      tree->children.resize(tree->MaxNumChildren() + 1);
-      for (size_t i = 0; i < tree->NumChildren(); i++)
-        tree->Child(i).Parent() = tree;
-
-      delete treeOne;
-      delete treeTwo;
-
-      return false;
     }
-  }
 
-  // Update the split history of each child.
-  treeOne->AuxiliaryInfo().SplitHistory().history[bestAxis] = true;
-  treeOne->AuxiliaryInfo().SplitHistory().lastDimension = bestAxis;
-  treeTwo->AuxiliaryInfo().SplitHistory().history[bestAxis] = true;
-  treeTwo->AuxiliaryInfo().SplitHistory().lastDimension = bestAxis;
-
-  // Remove this node and insert treeOne and treeTwo
-  TreeType* par = tree->Parent();
-  size_t index = 0;
-  for (size_t i = 0; i < par->NumChildren(); i++)
-  {
-    if (par->children[i] == tree)
-    {
-      index = i;
-      break;
-    }
+    // Update the split history of each child.
+    treeOne->AuxiliaryInfo().SplitHistory().history[bestAxis] = true;
+    treeOne->AuxiliaryInfo().SplitHistory().lastDimension = bestAxis;
+    treeTwo->AuxiliaryInfo().SplitHistory().history[bestAxis] = true;
+    treeTwo->AuxiliaryInfo().SplitHistory().lastDimension = bestAxis;
+
+    // Remove this node and insert treeOne and treeTwo
+    tree->children[0] = treeOne;
+    tree->children[1] = treeTwo;
+    tree->numChildren = 2;
+
+    // We have to update the children of each of these new nodes so that they
+    // record the correct parent.
+    for (size_t i = 0; i < treeOne->NumChildren(); ++i)
+      treeOne->Child(i).Parent() = treeOne;
+    for (size_t i = 0; i < treeTwo->NumChildren(); i++)
+      treeTwo->Child(i).Parent() = treeTwo;
+
+    return false;
   }
-
-  par->children[index] = treeOne;
-  par->children[par->NumChildren()++] = treeTwo;
-
-  // we only add one at a time, so we should only need to test for equality
-  // just in case, we use an assert.
-
-  if (!(par->NumChildren() <= par->MaxNumChildren() + 1))
-    Log::Debug << "error " << par->NumChildren() << ", "
-        << par->MaxNumChildren() + 1 << std::endl;
-  assert(par->NumChildren() <= par->MaxNumChildren() + 1);
-
-  if (par->NumChildren() == par->MaxNumChildren() + 1)
-    XTreeSplit::SplitNonLeafNode(par,relevels);
-
-  // We have to update the children of each of these new nodes so that they
-  // record the correct parent.
-  for (size_t i = 0; i < treeOne->NumChildren(); i++)
-    treeOne->Child(i).Parent() = treeOne;
-  for (size_t i = 0; i < treeTwo->NumChildren(); i++)
-    treeTwo->Child(i).Parent() = treeTwo;
-
-  assert(treeOne->Parent()->NumChildren() <=
-      treeOne->Parent()->MaxNumChildren());
-  assert(treeOne->Parent()->NumChildren() >=
-      treeOne->Parent()->MinNumChildren());
-  assert(treeTwo->Parent()->NumChildren() <=
-      treeTwo->Parent()->MaxNumChildren());
-  assert(treeTwo->Parent()->NumChildren() >=
-      treeTwo->Parent()->MinNumChildren());
-
-  tree->SoftDelete();
-
-  return false;
 }
 
 /**

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git



More information about the debian-science-commits mailing list