C++ Reference

C++ Reference: Algorithms

dynamic_partition.h
Go to the documentation of this file.
1 // Copyright 2010-2018 Google LLC
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 //
6 // http://www.apache.org/licenses/LICENSE-2.0
7 //
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13 
14 // TODO(user,user): refine this toplevel comment when this file settles.
15 //
16 // Two dynamic partition classes: one that incrementally splits a partition
17 // into more and more parts; one that incrementally merges a partition into less
18 // and less parts.
19 //
20 // GLOSSARY:
21 // The partition classes maintain a partition of N integers 0..N-1
22 // (aka "elements") into disjoint equivalence classes (aka "parts").
23 //
24 // SAFETY:
25 // Like vector<int> crashes when used improperly, these classes are not "safe":
26 // most of their methods may crash if called with invalid arguments. The client
27 // code is responsible for using this class properly. A few DCHECKs() will help
28 // catch bugs, though.
29 
30 #ifndef OR_TOOLS_ALGORITHMS_DYNAMIC_PARTITION_H_
31 #define OR_TOOLS_ALGORITHMS_DYNAMIC_PARTITION_H_
32 
33 #include <string>
34 #include <vector>
35 
36 #include "ortools/base/logging.h"
37 
38 namespace operations_research {
39 
40 // Partition class that supports incremental splitting, with backtracking.
41 // See http://en.wikipedia.org/wiki/Partition_refinement .
42 // More precisely, the supported edit operations are:
43 // - Refine the partition so that a subset S (typically, |S| <<< N)
44 // of elements are all considered non-equivalent to any element in ¬S.
45 // Typically, this should be done in O(|S|).
46 // - Undo the above operations (backtracking).
47 //
48 // TODO(user): rename this to BacktrackableSplittingPartition.
50  public:
51  // Creates a DynamicPartition on n elements, numbered 0..n-1. Start with
52  // the trivial partition (only one subset containing all elements).
53  explicit DynamicPartition(int num_elements);
54 
55  // Ditto, but specify the initial part of each elements. Part indices must
56  // form a dense integer set starting at 0; eg. [2, 1, 0, 1, 1, 3, 0] is valid.
57  explicit DynamicPartition(const std::vector<int>& initial_part_of_element);
58 
59  // Accessors.
60  int NumElements() const { return element_.size(); }
61  const int NumParts() const { return part_.size(); }
62 
63  // To iterate over the elements in part #i:
64  // for (int element : partition.ElementsInPart(i)) { ... }
65  //
66  // ORDERING OF ELEMENTS INSIDE PARTS: the order of elements within a given
67  // part is volatile, and may change with Refine() or UndoRefine*() operations,
68  // even if the part itself doesn't change.
69  struct IterablePart;
70  IterablePart ElementsInPart(int i) const;
71 
72  int PartOf(int element) const;
73  int SizeOfPart(int part) const;
74  int ParentOfPart(int part) const;
75 
76  // A handy shortcut to ElementsInPart(PartOf(e)). The returned IterablePart
77  // will never be empty, since it contains at least i.
78  IterablePart ElementsInSamePartAs(int i) const;
79 
80  // Returns a fingerprint of the given part. While collisions are possible,
81  // their probability is quite low. Two parts that have the same size and the
82  // same fingerprint are most likely identical.
83  // Also, two parts that have the exact same set of elements will *always*
84  // have the same fingerprint.
85  uint64 FprintOfPart(int part) const;
86 
87  // Refines the partition such that elements that are in distinguished_subset
88  // never share the same part as elements that aren't in that subset.
89  // This might be a no-op: in that case, NumParts() won't change, but the
90  // order of elements inside each part may change.
91  //
92  // ORDERING OF PARTS:
93  // For each i such that Part #i has a non-trivial intersection with
94  // "distinguished_subset" (neither empty, nor the full Part); Part #i is
95  // stripped out of all elements that are in "distinguished_subset", and
96  // those elements are sent to a newly created part, whose parent_part = i.
97  // The parts newly created by a single Refine() operations are sorted
98  // by parent_part.
99  // Example: a Refine() on a partition with 6 parts causes parts #1, #3 and
100  // #4 to be split: the partition will now contain 3 new parts: part #6 (with
101  // parent_part = 1), part #7 (with parent_part = 3) and part #8 (with
102  // parent_part = 4).
103  //
104  // TODO(user): the graph symmetry finder could probably benefit a lot from
105  // keeping track of one additional bit of information for each part that
106  // remains unchanged by a Refine() operation: was that part entirely *in*
107  // the distinguished subset or entirely *out*?
108  void Refine(const std::vector<int>& distinguished_subset);
109 
110  // Undo one or several Refine() operations, until the number of parts
111  // becomes equal to "original_num_parts".
112  // Prerequisite: NumParts() >= original_num_parts.
113  void UndoRefineUntilNumPartsEqual(int original_num_parts);
114 
115  // Dump the partition to a string. There might be different conventions for
116  // sorting the parts and the elements inside them.
118  // Elements are sorted within parts, and parts are then sorted
119  // lexicographically.
121  // Elements are sorted within parts, and parts are kept in order.
123  };
124  std::string DebugString(DebugStringSorting sorting) const;
125 
126  // ADVANCED USAGE:
127  // All elements (0..n-1) of the partition, sorted in a way that's compatible
128  // with the hierarchical partitioning:
129  // - All the elements of any given part are contiguous.
130  // - Elements of a part P are always after elements of part Parent(P).
131  // - The order remains identical (and the above property holds) after any
132  // UndoRefine*() operation.
133  // Note that the order does get changed by Refine() operations.
134  // This is a reference, so it'll only remain valid and constant until the
135  // class is destroyed or until Refine() get called.
136  const std::vector<int>& ElementsInHierarchicalOrder() const {
137  return element_;
138  }
139 
140  private:
141  // A DynamicPartition instance maintains a list of all of its elements,
142  // 'sorted' by partitions: elements of the same subset are contiguous
143  // in that list.
144  std::vector<int> element_;
145 
146  // The reverse of elements_[]: element_[index_of_[i]] = i.
147  std::vector<int> index_of_;
148 
149  // part_of_[i] is the index of the part that contains element i.
150  std::vector<int> part_of_;
151 
152  struct Part {
153  // This part holds elements[start_index .. end_index-1].
154  // INVARIANT: end_index > start_index.
155  int start_index; // Inclusive
156  int end_index; // Exclusive
157 
158  // The Part that this part was split out of. See the comment at Refine().
159  // INVARIANT: part[i].parent_part <= i, and the equality holds iff part[i]
160  // has no parent.
161  int parent_part; // Index into the part[] array.
162 
163  // The part's fingerprint is the XOR of all fingerprints of its elements.
164  // See FprintOfInt32() in the .cc.
165  uint64 fprint;
166 
167  Part() : start_index(0), end_index(0), parent_part(0), fprint(0) {}
168  Part(int start_index, int end_index, int parent_part, uint64 fprint)
169  : start_index(start_index),
170  end_index(end_index),
171  parent_part(parent_part),
172  fprint(fprint) {}
173  };
174  std::vector<Part> part_; // The disjoint parts.
175 
176  // Used temporarily and exclusively by Refine(). This prevents Refine()
177  // from being thread-safe.
178  // INVARIANT: tmp_counter_of_part_ contains only 0s before and after Refine().
179  std::vector<int> tmp_counter_of_part_;
180  std::vector<int> tmp_affected_parts_;
181 };
182 
184  std::vector<int>::const_iterator begin() const { return begin_; }
185  std::vector<int>::const_iterator end() const { return end_; }
186  std::vector<int>::const_iterator begin_;
187  std::vector<int>::const_iterator end_;
188 
189  int size() const { return end_ - begin_; }
190 
192  IterablePart(const std::vector<int>::const_iterator& b,
193  const std::vector<int>::const_iterator& e)
194  : begin_(b), end_(e) {}
195 
196  // These typedefs allow this iterator to be used within testing::ElementsAre.
197  typedef int value_type;
198  typedef std::vector<int>::const_iterator const_iterator;
199 };
200 
201 // Partition class that supports incremental merging, using the union-find
202 // algorithm (see http://en.wikipedia.org/wiki/Disjoint-set_data_structure).
204  public:
205  // At first, all nodes are in their own singleton part.
207  explicit MergingPartition(int num_nodes) { Reset(num_nodes); }
208  void Reset(int num_nodes);
209 
210  int NumNodes() const { return parent_.size(); }
211 
212  // Complexity: amortized O(Ackermann⁻¹(N)) -- which is essentially O(1) --
213  // where N is the number of nodes.
214  //
215  // Return value: If this merge caused a representative node (of either node1
216  // or node2) to stop being a representative (because only one can remain);
217  // this method returns that removed representative. Otherwise it returns -1.
218  //
219  // Details: a smaller part will always be merged onto a larger one.
220  // Upons ties, the smaller representative becomes the overall representative.
221  int MergePartsOf(int node1, int node2); // The 'union' of the union-find.
222 
223  // Get the representative of "node" (a node in the same equivalence class,
224  // which will also be returned for any other "node" in the same class).
225  // The complexity if the same as MergePartsOf().
226  int GetRootAndCompressPath(int node);
227 
228  // Specialized reader API: prunes "nodes" to only keep at most one node per
229  // part: any node which is in the same part as an earlier node will be pruned.
230  void KeepOnlyOneNodePerPart(std::vector<int>* nodes);
231 
232  // Output the whole partition as node equivalence classes: if there are K
233  // parts and N nodes, node_equivalence_classes[i] will contain the part index
234  // (a number in 0..K-1) of node #i. Parts will be sorted by their first node
235  // (i.e. node 0 will always be in part 0; then the next node that isn't in
236  // part 0 will be in part 1, and so on).
237  // Returns the number K of classes.
238  int FillEquivalenceClasses(std::vector<int>* node_equivalence_classes);
239 
240  // Dump all components, with nodes sorted within each part and parts
241  // sorted lexicographically. Eg. "0 1 3 4 | 2 5 | 6 7 8".
242  std::string DebugString();
243 
244  // Advanced usage: sets 'node' to be in its original singleton. All nodes
245  // who may point to 'node' as a parent will remain in an inconsistent state.
246  // This can be used to reinitialize a MergingPartition that has been sparsely
247  // modified in O(|modifications|).
248  // CRASHES IF USED INCORRECTLY.
249  void ResetNode(int node);
250 
251  int NumNodesInSamePartAs(int node) {
252  return part_size_[GetRootAndCompressPath(node)];
253  }
254 
255  // FOR DEBUGGING OR SPECIAL "CONST" ACCESS ONLY:
256  // Find the root of the union-find tree with leaf 'node', i.e. its
257  // representative node, but don't use path compression.
258  // The amortized complexity can be as bad as log(N), as opposed to the
259  // version using path compression.
260  int GetRoot(int node) const;
261 
262  private:
263  // Along the upwards path from 'node' to its root, set the parent of all
264  // nodes (including the root) to 'parent'.
265  void SetParentAlongPathToRoot(int node, int parent);
266 
267  std::vector<int> parent_;
268  std::vector<int> part_size_;
269 
270  // Used transiently by KeepOnlyOneNodePerPart().
271  std::vector<bool> tmp_part_bit_;
272 };
273 
274 // *** Implementation of inline methods of the above classes. ***
275 
277  int i) const {
278  DCHECK_GE(i, 0);
279  DCHECK_LT(i, NumParts());
280  return IterablePart(element_.begin() + part_[i].start_index,
281  element_.begin() + part_[i].end_index);
282 }
283 
284 inline int DynamicPartition::PartOf(int element) const {
285  DCHECK_GE(element, 0);
286  DCHECK_LT(element, part_of_.size());
287  return part_of_[element];
288 }
289 
290 inline int DynamicPartition::SizeOfPart(int part) const {
291  DCHECK_GE(part, 0);
292  DCHECK_LT(part, part_.size());
293  const Part& p = part_[part];
294  return p.end_index - p.start_index;
295 }
296 
297 inline int DynamicPartition::ParentOfPart(int part) const {
298  DCHECK_GE(part, 0);
299  DCHECK_LT(part, part_.size());
300  return part_[part].parent_part;
301 }
302 
304  int i) const {
305  return ElementsInPart(PartOf(i));
306 }
307 
308 inline uint64 DynamicPartition::FprintOfPart(int part) const {
309  DCHECK_GE(part, 0);
310  DCHECK_LT(part, part_.size());
311  return part_[part].fprint;
312 }
313 
314 inline int MergingPartition::GetRoot(int node) const {
315  DCHECK_GE(node, 0);
316  DCHECK_LT(node, NumNodes());
317  int child = node;
318  while (true) {
319  const int parent = parent_[child];
320  if (parent == child) return child;
321  child = parent;
322  }
323 }
324 
325 inline void MergingPartition::SetParentAlongPathToRoot(int node, int parent) {
326  DCHECK_GE(node, 0);
327  DCHECK_LT(node, NumNodes());
328  DCHECK_GE(parent, 0);
329  DCHECK_LT(parent, NumNodes());
330  int child = node;
331  while (true) {
332  const int old_parent = parent_[child];
333  parent_[child] = parent;
334  if (old_parent == child) return;
335  child = old_parent;
336  }
337 }
338 
339 inline void MergingPartition::ResetNode(int node) {
340  DCHECK_GE(node, 0);
341  DCHECK_LT(node, NumNodes());
342  parent_[node] = node;
343  part_size_[node] = 1;
344 }
345 
346 } // namespace operations_research
347 
348 #endif // OR_TOOLS_ALGORITHMS_DYNAMIC_PARTITION_H_
int size() const
void ResetNode(int node)
DynamicPartition(int num_elements)
void UndoRefineUntilNumPartsEqual(int original_num_parts)
std::vector< int >::const_iterator begin() const
int value_type
IterablePart ElementsInPart(int i) const
std::vector< int >::const_iterator begin_
uint64 FprintOfPart(int part) const
IterablePart()
std::vector< int >::const_iterator const_iterator
IterablePart ElementsInSamePartAs(int i) const
int ParentOfPart(int part) const
std::string DebugString()
std::vector< int >::const_iterator end() const
int NumElements() const
MergingPartition()
std::vector< int >::const_iterator end_
MergingPartition(int num_nodes)
const int NumParts() const
int NumNodesInSamePartAs(int node)
void Refine(const std::vector< int > &distinguished_subset)
int PartOf(int element) const
DynamicPartition(const std::vector< int > &initial_part_of_element)
DebugStringSorting
IterablePart(const std::vector< int >::const_iterator &b, const std::vector< int >::const_iterator &e)
int GetRootAndCompressPath(int node)
void KeepOnlyOneNodePerPart(std::vector< int > *nodes)
void Reset(int num_nodes)
const std::vector< int > & ElementsInHierarchicalOrder() const
int GetRoot(int node) const
std::string DebugString(DebugStringSorting sorting) const
int FillEquivalenceClasses(std::vector< int > *node_equivalence_classes)
int NumNodes() const
@ SORT_LEXICOGRAPHICALLY
int MergePartsOf(int node1, int node2)
@ SORT_BY_PART
int SizeOfPart(int part) const