//===-- IntervalTree.h ------------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements an interval tree. // // Further information: // https://en.wikipedia.org/wiki/Interval_tree // //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_INTERVALTREE_H #define LLVM_ADT_INTERVALTREE_H #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include #include #include // IntervalTree is a light tree data structure to hold intervals. It allows // finding all intervals that overlap with any given point. At this time, // it does not support any deletion or rebalancing operations. // // The IntervalTree is designed to be set up once, and then queried without // any further additions. // // Synopsis: // Closed intervals delimited by PointT objects are mapped to ValueT objects. // // Restrictions: // PointT must be a fundamental type. // ValueT must be a fundamental or pointer type. // // template // class IntervalTree { // public: // // IntervalTree(); // ~IntervalTree(): // // using IntervalReferences = SmallVector; // // void create(); // void insert(PointT Left, PointT Right, ValueT Value); // // IntervalReferences getContaining(PointT Point); // static void sortIntervals(IntervalReferences &Intervals, Sorting Sort); // // find_iterator begin(PointType Point) const; // find_iterator end() const; // // bool empty() const; // void clear(); // // void print(raw_ostream &OS, bool HexFormat = true); // }; // //===----------------------------------------------------------------------===// // // In the below given dataset // // [a, b] <- (x) // // 'a' and 'b' describe a range and 'x' the value for that interval. // // The following data are purely for illustrative purposes: // // [30, 35] <- (3035), [39, 50] <- (3950), [55, 61] <- (5561), // [31, 56] <- (3156), [12, 21] <- (1221), [25, 41] <- (2541), // [49, 65] <- (4965), [71, 79] <- (7179), [11, 16] <- (1116), // [20, 30] <- (2030), [36, 54] <- (3654), [60, 70] <- (6070), // [74, 80] <- (7480), [15, 40] <- (1540), [43, 43] <- (4343), // [50, 75] <- (5075), [10, 85] <- (1085) // // The data represents a set of overlapping intervals: // // 30--35 39------------50 55----61 // 31------------------------56 // 12--------21 25------------41 49-------------65 71-----79 // 11----16 20-----30 36----------------54 60------70 74---- 80 // 15---------------------40 43--43 50--------------------75 // 10----------------------------------------------------------------------85 // // The items are stored in a binary tree with each node storing: // // MP: A middle point. // IL: All intervals whose left value are completely to the left of the middle // point. They are sorted in ascending order by their beginning point. // IR: All intervals whose right value are completely to the right of the // middle point. They are sorted in descending order by their ending point. // LS: Left subtree. // RS: Right subtree. // // As IL and IR will contain the same intervals, in order to optimize space, // instead of storing intervals on each node, we use two vectors that will // contain the intervals described by IL and IR. Each node will contain an // index into that vector (global bucket), to indicate the beginning of the // intervals assigned to the node. // // The following is the output from print(): // // 0: MP:43 IR [10,85] [31,56] [36,54] [39,50] [43,43] // 0: MP:43 IL [10,85] [31,56] [36,54] [39,50] [43,43] // 1: MP:25 IR [25,41] [15,40] [20,30] // 1: MP:25 IL [15,40] [20,30] [25,41] // 2: MP:15 IR [12,21] [11,16] // 2: MP:15 IL [11,16] [12,21] // 2: MP:36 IR [] // 2: MP:36 IL [] // 3: MP:31 IR [30,35] // 3: MP:31 IL [30,35] // 1: MP:61 IR [50,75] [60,70] [49,65] [55,61] // 1: MP:61 IL [49,65] [50,75] [55,61] [60,70] // 2: MP:74 IR [74,80] [71,79] // 2: MP:74 IL [71,79] [74,80] // // with: // 0: Root Node. // MP: Middle point. // IL: Intervals to the left (in ascending order by beginning point). // IR: Intervals to the right (in descending order by ending point). // // Root // | // V // +------------MP:43------------+ // | IL IR | // | [10,85] [10,85] | // LS | [31,56] [31,56] | RS // | [36,54] [36,54] | // | [39,50] [39,50] | // | [43,43] [43,43] | // V V // +------------MP:25------------+ MP:61------------+ // | IL IR | IL IR | // | [15,40] [25,41] | [49,65] [50,75] | // LS | [20,30] [15,40] | RS [50,75] [60,70] | RS // | [25,41] [20,30] | [55,61] [49,65] | // | | [60,70] [55,61] | // V V V // MP:15 +-------MP:36 MP:74 // IL IR | IL IR IL IR // [11,16] [12,21] LS | [] [] [71,79] [74,80] // [12,21] [11,16] | [74,80] [71,79] // V // MP:31 // IL IR // [30,35] [30,35] // // The creation of an interval tree is done in 2 steps: // 1) Insert the interval items by calling // void insert(PointT Left, PointT Right, ValueT Value); // Left, Right: the interval left and right limits. // Value: the data associated with that specific interval. // // 2) Create the interval tree by calling // void create(); // // Once the tree is created, it is switched to query mode. // Query the tree by using iterators or container. // // a) Iterators over intervals overlapping the given point with very weak // ordering guarantees. // find_iterator begin(PointType Point) const; // find_iterator end() const; // Point: a target point to be tested for inclusion in any interval. // // b) Container: // IntervalReferences getContaining(PointT Point); // Point: a target point to be tested for inclusion in any interval. // Returns vector with all the intervals containing the target point. // // The returned intervals are in their natural tree location. They can // be sorted: // // static void sortIntervals(IntervalReferences &Intervals, Sorting Sort); // // Ability to print the constructed interval tree: // void print(raw_ostream &OS, bool HexFormat = true); // Display the associated data in hexadecimal format. namespace llvm { //===----------------------------------------------------------------------===// //--- IntervalData ----// //===----------------------------------------------------------------------===// /// An interval data composed by a \a Left and \a Right points and an /// associated \a Value. /// \a PointT corresponds to the interval endpoints type. /// \a ValueT corresponds to the interval value type. template class IntervalData { protected: using PointType = PointT; using ValueType = ValueT; private: PointType Left; PointType Right; ValueType Value; public: IntervalData() = delete; IntervalData(PointType Left, PointType Right, ValueType Value) : Left(Left), Right(Right), Value(Value) { assert(Left <= Right && "'Left' must be less or equal to 'Right'"); } virtual ~IntervalData() = default; PointType left() const { return Left; } PointType right() const { return Right; } ValueType value() const { return Value; } /// Return true if \a Point is inside the left bound of closed interval \a /// [Left;Right]. This is Left <= Point for closed intervals. bool left(const PointType &Point) const { return left() <= Point; } /// Return true if \a Point is inside the right bound of closed interval \a /// [Left;Right]. This is Point <= Right for closed intervals. bool right(const PointType &Point) const { return Point <= right(); } /// Return true when \a Point is contained in interval \a [Left;Right]. /// This is Left <= Point <= Right for closed intervals. bool contains(const PointType &Point) const { return left(Point) && right(Point); } }; //===----------------------------------------------------------------------===// //--- IntervalTree ----// //===----------------------------------------------------------------------===// // Helper class template that is used by the IntervalTree to ensure that one // does instantiate using only fundamental and/or pointer types. template using PointTypeIsValid = std::bool_constant::value>; template using ValueTypeIsValid = std::bool_constant::value || std::is_pointer::value>; template > class IntervalTree { static_assert(PointTypeIsValid::value, "PointT must be a fundamental type"); static_assert(ValueTypeIsValid::value, "ValueT must be a fundamental or pointer type"); public: using PointType = PointT; using ValueType = ValueT; using DataType = DataT; using Allocator = BumpPtrAllocator; enum class Sorting { Ascending, Descending }; using IntervalReferences = SmallVector; private: using IntervalVector = SmallVector; using PointsVector = SmallVector; class IntervalNode { PointType MiddlePoint; // MP - Middle point. IntervalNode *Left = nullptr; // LS - Left subtree. IntervalNode *Right = nullptr; // RS - Right subtree. unsigned BucketIntervalsStart = 0; // Starting index in global bucket. unsigned BucketIntervalsSize = 0; // Size of bucket. public: PointType middle() const { return MiddlePoint; } unsigned start() const { return BucketIntervalsStart; } unsigned size() const { return BucketIntervalsSize; } IntervalNode(PointType Point, unsigned Start) : MiddlePoint(Point), BucketIntervalsStart(Start) {} friend IntervalTree; }; Allocator &NodeAllocator; // Allocator used for creating interval nodes. IntervalNode *Root = nullptr; // Interval tree root. IntervalVector Intervals; // Storage for each interval and all of the fields // point back into it. PointsVector EndPoints; // Sorted left and right points of all the intervals. // These vectors provide storage that nodes carve buckets of overlapping // intervals out of. All intervals are recorded on each vector. // The bucket with the intervals associated to a node, is determined by // the fields 'BucketIntervalStart' and 'BucketIntervalSize' in the node. // The buckets in the first vector are sorted in ascending order using // the left value and the buckets in the second vector are sorted in // descending order using the right value. Every interval in a bucket // contains the middle point for the node. IntervalReferences IntervalsLeft; // Intervals to the left of middle point. IntervalReferences IntervalsRight; // Intervals to the right of middle point. // Working vector used during the tree creation to sort the intervals. It is // cleared once the tree is created. IntervalReferences References; /// Recursively delete the constructed tree. void deleteTree(IntervalNode *Node) { if (Node) { deleteTree(Node->Left); deleteTree(Node->Right); Node->~IntervalNode(); NodeAllocator.Deallocate(Node); } } /// Print the interval list (left and right) for a given \a Node. static void printList(raw_ostream &OS, IntervalReferences &IntervalSet, unsigned Start, unsigned Size, bool HexFormat = true) { assert(Start + Size <= IntervalSet.size() && "Start + Size must be in bounds of the IntervalSet"); const char *Format = HexFormat ? "[0x%08x,0x%08x] " : "[%2d,%2d] "; if (Size) { for (unsigned Position = Start; Position < Start + Size; ++Position) OS << format(Format, IntervalSet[Position]->left(), IntervalSet[Position]->right()); } else { OS << "[]"; } OS << "\n"; } /// Print an interval tree \a Node. void printNode(raw_ostream &OS, unsigned Level, IntervalNode *Node, bool HexFormat = true) { const char *Format = HexFormat ? "MP:0x%08x " : "MP:%2d "; auto PrintNodeData = [&](StringRef Text, IntervalReferences &IntervalSet) { OS << format("%5d: ", Level); OS.indent(Level * 2); OS << format(Format, Node->middle()) << Text << " "; printList(OS, IntervalSet, Node->start(), Node->size(), HexFormat); }; PrintNodeData("IR", IntervalsRight); PrintNodeData("IL", IntervalsLeft); } /// Recursively print all the interval nodes. void printTree(raw_ostream &OS, unsigned Level, IntervalNode *Node, bool HexFormat = true) { if (Node) { printNode(OS, Level, Node, HexFormat); ++Level; printTree(OS, Level, Node->Left, HexFormat); printTree(OS, Level, Node->Right, HexFormat); } } /// Recursively construct the interval tree. /// IntervalsSize: Number of intervals that have been processed and it will /// be used as the start for the intervals bucket for a node. /// PointsBeginIndex, PointsEndIndex: Determine the range into the EndPoints /// vector of end points to be processed. /// ReferencesBeginIndex, ReferencesSize: Determine the range into the /// intervals being processed. IntervalNode *createTree(unsigned &IntervalsSize, int PointsBeginIndex, int PointsEndIndex, int ReferencesBeginIndex, int ReferencesSize) { // We start by taking the entire range of all the intervals and dividing // it in half at x_middle (in practice, x_middle should be picked to keep // the tree relatively balanced). // This gives three sets of intervals, those completely to the left of // x_middle which we'll call S_left, those completely to the right of // x_middle which we'll call S_right, and those overlapping x_middle // which we'll call S_middle. // The intervals in S_left and S_right are recursively divided in the // same manner until there are no intervals remaining. if (PointsBeginIndex > PointsEndIndex || ReferencesBeginIndex >= ReferencesSize) return nullptr; int MiddleIndex = (PointsBeginIndex + PointsEndIndex) / 2; PointType MiddlePoint = EndPoints[MiddleIndex]; unsigned NewBucketStart = IntervalsSize; unsigned NewBucketSize = 0; int ReferencesRightIndex = ReferencesSize; IntervalNode *Root = new (NodeAllocator) IntervalNode(MiddlePoint, NewBucketStart); // A quicksort implementation where all the intervals that overlap // with the pivot are put into the "bucket", and "References" is the // partition space where we recursively sort the remaining intervals. for (int Index = ReferencesBeginIndex; Index < ReferencesRightIndex;) { // Current interval contains the middle point. if (References[Index]->contains(MiddlePoint)) { IntervalsLeft[IntervalsSize] = References[Index]; IntervalsRight[IntervalsSize] = References[Index]; ++IntervalsSize; Root->BucketIntervalsSize = ++NewBucketSize; if (Index < --ReferencesRightIndex) std::swap(References[Index], References[ReferencesRightIndex]); if (ReferencesRightIndex < --ReferencesSize) std::swap(References[ReferencesRightIndex], References[ReferencesSize]); continue; } if (References[Index]->left() > MiddlePoint) { if (Index < --ReferencesRightIndex) std::swap(References[Index], References[ReferencesRightIndex]); continue; } ++Index; } // Sort intervals on the left and right of the middle point. if (NewBucketSize > 1) { // Sort the intervals in ascending order by their beginning point. std::stable_sort(IntervalsLeft.begin() + NewBucketStart, IntervalsLeft.begin() + NewBucketStart + NewBucketSize, [](const DataType *LHS, const DataType *RHS) { return LHS->left() < RHS->left(); }); // Sort the intervals in descending order by their ending point. std::stable_sort(IntervalsRight.begin() + NewBucketStart, IntervalsRight.begin() + NewBucketStart + NewBucketSize, [](const DataType *LHS, const DataType *RHS) { return LHS->right() > RHS->right(); }); } if (PointsBeginIndex <= MiddleIndex - 1) { Root->Left = createTree(IntervalsSize, PointsBeginIndex, MiddleIndex - 1, ReferencesBeginIndex, ReferencesRightIndex); } if (MiddleIndex + 1 <= PointsEndIndex) { Root->Right = createTree(IntervalsSize, MiddleIndex + 1, PointsEndIndex, ReferencesRightIndex, ReferencesSize); } return Root; } public: class find_iterator { public: using iterator_category = std::forward_iterator_tag; using value_type = DataType; using difference_type = DataType; using pointer = DataType *; using reference = DataType &; private: const IntervalReferences *AscendingBuckets = nullptr; const IntervalReferences *DescendingBuckets = nullptr; // Current node and index while traversing the intervals that contain // the reference point. IntervalNode *Node = nullptr; PointType Point = {}; unsigned Index = 0; // For the current node, check if we have intervals that contain the // reference point. We return when the node does have intervals that // contain such point. Otherwise we keep descending on that branch. void initNode() { Index = 0; while (Node) { // Return if the reference point is the same as the middle point or // the current node doesn't have any intervals at all. if (Point == Node->middle()) { if (Node->size() == 0) { // No intervals that contain the reference point. Node = nullptr; } return; } if (Point < Node->middle()) { // The reference point can be at the left or right of the middle // point. Return if the current node has intervals that contain the // reference point; otherwise descend on the respective branch. if (Node->size() && (*AscendingBuckets)[Node->start()]->left(Point)) { return; } Node = Node->Left; } else { if (Node->size() && (*DescendingBuckets)[Node->start()]->right(Point)) { return; } Node = Node->Right; } } } // Given the current node (which was initialized by initNode), move to // the next interval in the list of intervals that contain the reference // point. Otherwise move to the next node, as the intervals contained // in that node, can contain the reference point. void nextInterval() { // If there are available intervals that contain the reference point, // traverse them; otherwise move to the left or right node, depending // on the middle point value. if (++Index < Node->size()) { if (Node->middle() == Point) return; if (Point < Node->middle()) { // Reference point is on the left. if (!(*AscendingBuckets)[Node->start() + Index]->left(Point)) { // The intervals don't contain the reference point. Move to the // next node, preserving the descending order. Node = Node->Left; initNode(); } } else { // Reference point is on the right. if (!(*DescendingBuckets)[Node->start() + Index]->right(Point)) { // The intervals don't contain the reference point. Move to the // next node, preserving the ascending order. Node = Node->Right; initNode(); } } } else { // We have traversed all the intervals in the current node. if (Point == Node->middle()) { Node = nullptr; Index = 0; return; } // Select a branch based on the middle point. Node = Point < Node->middle() ? Node->Left : Node->Right; initNode(); } } find_iterator() = default; explicit find_iterator(const IntervalReferences *Left, const IntervalReferences *Right, IntervalNode *Node, PointType Point) : AscendingBuckets(Left), DescendingBuckets(Right), Node(Node), Point(Point), Index(0) { initNode(); } const DataType *current() const { return (Point <= Node->middle()) ? (*AscendingBuckets)[Node->start() + Index] : (*DescendingBuckets)[Node->start() + Index]; } public: find_iterator &operator++() { nextInterval(); return *this; } find_iterator operator++(int) { find_iterator Iter(*this); nextInterval(); return Iter; } /// Dereference operators. const DataType *operator->() const { return current(); } const DataType &operator*() const { return *(current()); } /// Comparison operators. friend bool operator==(const find_iterator &LHS, const find_iterator &RHS) { return (!LHS.Node && !RHS.Node && !LHS.Index && !RHS.Index) || (LHS.Point == RHS.Point && LHS.Node == RHS.Node && LHS.Index == RHS.Index); } friend bool operator!=(const find_iterator &LHS, const find_iterator &RHS) { return !(LHS == RHS); } friend IntervalTree; }; private: find_iterator End; public: explicit IntervalTree(Allocator &NodeAllocator) : NodeAllocator(NodeAllocator) {} ~IntervalTree() { clear(); } /// Return true when no intervals are mapped. bool empty() const { return Root == nullptr; } /// Remove all entries. void clear() { deleteTree(Root); Root = nullptr; Intervals.clear(); IntervalsLeft.clear(); IntervalsRight.clear(); EndPoints.clear(); } /// Add a mapping of [Left;Right] to \a Value. void insert(PointType Left, PointType Right, ValueType Value) { assert(empty() && "Invalid insertion. Interval tree already constructed."); Intervals.emplace_back(Left, Right, Value); } /// Return all the intervals in their natural tree location, that /// contain the given point. IntervalReferences getContaining(PointType Point) const { assert(!empty() && "Interval tree it is not constructed."); IntervalReferences IntervalSet; for (find_iterator Iter = find(Point), E = find_end(); Iter != E; ++Iter) IntervalSet.push_back(const_cast(&(*Iter))); return IntervalSet; } /// Sort the given intervals using the following sort options: /// Ascending: return the intervals with the smallest at the front. /// Descending: return the intervals with the biggest at the front. static void sortIntervals(IntervalReferences &IntervalSet, Sorting Sort) { std::stable_sort(IntervalSet.begin(), IntervalSet.end(), [Sort](const DataType *RHS, const DataType *LHS) { return Sort == Sorting::Ascending ? (LHS->right() - LHS->left()) > (RHS->right() - RHS->left()) : (LHS->right() - LHS->left()) < (RHS->right() - RHS->left()); }); } /// Print the interval tree. /// When \a HexFormat is true, the interval tree interval ranges and /// associated values are printed in hexadecimal format. void print(raw_ostream &OS, bool HexFormat = true) { printTree(OS, 0, Root, HexFormat); } /// Create the interval tree. void create() { assert(empty() && "Interval tree already constructed."); // Sorted vector of unique end points values of all the intervals. // Records references to the collected intervals. SmallVector Points; for (const DataType &Data : Intervals) { Points.push_back(Data.left()); Points.push_back(Data.right()); References.push_back(std::addressof(Data)); } std::stable_sort(Points.begin(), Points.end()); auto Last = std::unique(Points.begin(), Points.end()); Points.erase(Last, Points.end()); EndPoints.assign(Points.begin(), Points.end()); IntervalsLeft.resize(Intervals.size()); IntervalsRight.resize(Intervals.size()); // Given a set of n intervals, construct a data structure so that // we can efficiently retrieve all intervals overlapping another // interval or point. unsigned IntervalsSize = 0; Root = createTree(IntervalsSize, /*PointsBeginIndex=*/0, EndPoints.size() - 1, /*ReferencesBeginIndex=*/0, References.size()); // Save to clear this storage, as it used only to sort the intervals. References.clear(); } /// Iterator to start a find operation; it returns find_end() if the /// tree has not been built. /// There is no support to iterate over all the elements of the tree. find_iterator find(PointType Point) const { return empty() ? find_end() : find_iterator(&IntervalsLeft, &IntervalsRight, Root, Point); } /// Iterator to end find operation. find_iterator find_end() const { return End; } }; } // namespace llvm #endif // LLVM_ADT_INTERVALTREE_H