OR-Tools  8.1
connected_components.h
Go to the documentation of this file.
1 // Copyright 2010-2018 Google LLC
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 //
6 // http://www.apache.org/licenses/LICENSE-2.0
7 //
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13 
14 //
15 // Licensed under the Apache License, Version 2.0 (the "License");
16 // you may not use this file except in compliance with the License.
17 // You may obtain a copy of the License at
18 //
19 // http://www.apache.org/licenses/LICENSE-2.0
20 //
21 // Unless required by applicable law or agreed to in writing, software
22 // distributed under the License is distributed on an "AS IS" BASIS,
23 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
24 // See the License for the specific language governing permissions and
25 // limitations under the License.
26 
27 // Finds the connected components in an undirected graph:
28 // https://en.wikipedia.org/wiki/Connected_component_(graph_theory)
29 //
30 // If you have a fixed graph where the node are dense integers, use
31 // GetConnectedComponents(): it's very fast and uses little memory.
32 //
33 // If you have a more dynamic scenario where you want to incrementally
34 // add nodes or edges and query the connectivity between them, use the
35 // [Dense]ConnectedComponentsFinder class, which uses the union-find algorithm
36 // aka disjoint sets: https://en.wikipedia.org/wiki/Disjoint-set_data_structure.
37 
38 #ifndef UTIL_GRAPH_CONNECTED_COMPONENTS_H_
39 #define UTIL_GRAPH_CONNECTED_COMPONENTS_H_
40 
41 #include <functional>
42 #include <map>
43 #include <memory>
44 #include <set>
45 #include <type_traits>
46 #include <vector>
47 
48 #include "absl/container/flat_hash_map.h"
49 #include "absl/container/flat_hash_set.h"
50 #include "absl/hash/hash.h"
51 #include "absl/meta/type_traits.h"
52 #include "ortools/base/logging.h"
53 #include "ortools/base/map_util.h"
54 #include "ortools/base/ptr_util.h"
55 
56 namespace util {
57 // Finds the connected components of the graph, using BFS internally.
58 // Works on any *undirected* graph class whose nodes are dense integers and that
59 // supports the [] operator for adjacency lists: graph[x] must be an integer
60 // container listing the nodes that are adjacent to node #x.
61 // Example: std::vector<std::vector<int>>.
62 //
63 // "Undirected" means that for all y in graph[x], x is in graph[y].
64 //
65 // Returns the mapping from node to component index. The component indices are
66 // deterministic: Component #0 will be the one that has node #0, component #1
67 // the one that has the lowest-index node that isn't in component #0, and so on.
68 //
69 // Example on the following 6-node graph: 5--3--0--1 2--4
70 // vector<vector<int>> graph = {{1, 3}, {0}, {4}, {0, 5}, {2}, {3}};
71 // GetConnectedComponents(graph); // returns [0, 0, 1, 0, 1, 0].
72 template <class UndirectedGraph>
73 std::vector<int> GetConnectedComponents(int num_nodes,
74  const UndirectedGraph& graph);
75 } // namespace util
76 
77 // NOTE(user): The rest of the functions below should also be in namespace
78 // util, but for historical reasons it hasn't been done yet.
79 
80 // A connected components finder that only works on dense ints.
82  public:
84 
86  delete;
88  const DenseConnectedComponentsFinder&) = delete;
89 
90  // The main API is the same as ConnectedComponentsFinder (below): see the
91  // homonymous functions there.
92  void AddEdge(int node1, int node2);
93  bool Connected(int node1, int node2);
94  int GetSize(int node);
95  int GetNumberOfComponents() const { return num_components_; }
96  int GetNumberOfNodes() const { return parent_.size(); }
97 
98  // Gets the current set of root nodes in sorted order. Runs in amortized
99  // O(#components) time.
100  const std::vector<int>& GetComponentRoots();
101 
102  // Sets the number of nodes in the graph. The graph can only grow: this
103  // dies if "num_nodes" is lower or equal to any of the values ever given
104  // to AddEdge(), or lower than a previous value given to SetNumberOfNodes().
105  // You need this if there are nodes that don't have any edges.
106  void SetNumberOfNodes(int num_nodes);
107 
108  // Returns the root of the set for the given node. node must be in
109  // [0;GetNumberOfNodes()-1].
110  // Non-const because it does path compression internally.
111  int FindRoot(int node);
112 
113  // Returns the same as GetConnectedComponents().
114  std::vector<int> GetComponentIds();
115 
116  private:
117  // parent[i] is the id of an ancestor for node i. A node is a root iff
118  // parent[i] == i.
119  std::vector<int> parent_;
120  // If i is a root, component_size_[i] is the number of elements in the
121  // component. If i is not a root, component_size_[i] is meaningless.
122  std::vector<int> component_size_;
123  // rank[i] is the depth of the tree.
124  std::vector<int> rank_;
125  // Number of connected components.
126  int num_components_ = 0;
127  // The current roots. This is maintained lazily by GetComponentRoots().
128  std::vector<int> roots_;
129  // The number of nodes that existed the last time GetComponentRoots() was
130  // called.
131  int num_nodes_at_last_get_roots_call_ = 0;
132 };
133 
134 namespace internal {
135 // A helper to deduce the type of map to use depending on whether CompareOrHashT
136 // is a comparator or a hasher (prefer the latter).
137 template <typename T, typename CompareOrHashT>
139  // SFINAE trait to detect hash functors and select unordered containers if so,
140  // and ordered containers otherwise (= by default).
141  template <typename U, typename E = void>
143  using Set = std::set<T, CompareOrHashT>;
144  using Map = std::map<T, int, CompareOrHashT>;
145  };
146 
147  // The expression inside decltype is basically saying that "H(x)" is
148  // well-formed, where H is an instance of U and x is an instance of T, and is
149  // a value of integral type. That is, we are "duck-typing" on whether U looks
150  // like a hash functor.
151  template <typename U>
153  U, absl::enable_if_t<std::is_integral<decltype(std::declval<const U&>()(
154  std::declval<const T&>()))>::value>> {
155  using Set = absl::flat_hash_set<T, CompareOrHashT>;
156  using Map = absl::flat_hash_map<T, int, CompareOrHashT>;
157  };
158 
161 };
162 
163 } // namespace internal
164 
165 // Usage:
166 // ConnectedComponentsFinder<MyNodeType> cc;
167 // cc.AddNode(node1);
168 // cc.AddNode(node2);
169 // cc.AddEdge(node1, node2);
170 // ... repeating, adding nodes and edges as needed. Adding an edge
171 // will automatically also add the two nodes at its ends, if they
172 // haven't already been added.
173 // vector<set<MyNodeType> > components;
174 // cc.FindConnectedComponents(&components);
175 // Each entry in components now contains all the nodes in a single
176 // connected component.
177 //
178 // Usage with flat_hash_set:
179 // using ConnectedComponentType = flat_hash_set<MyNodeType>;
180 // ConnectedComponentsFinder<ConnectedComponentType::key_type,
181 // ConnectedComponentType::hasher>
182 // cc;
183 // ...
184 // vector<ConnectedComponentType> components;
185 // cc.FindConnectedComponents(&components);
186 //
187 // If you want to, you can continue adding nodes and edges after calling
188 // FindConnectedComponents, then call it again later.
189 //
190 // If your node type isn't STL-friendly, then you can use pointers to
191 // it instead:
192 // ConnectedComponentsFinder<MySTLUnfriendlyNodeType*> cc;
193 // cc.AddNode(&node1);
194 // ... and so on...
195 // Of course, in this usage, the connected components finder retains
196 // these pointers through its lifetime (though it doesn't dereference them).
197 template <typename T, typename CompareOrHashT = std::less<T>>
199  public:
200  // Constructs a connected components finder.
202 
205  delete;
206 
207  // Adds a node in the graph. It is OK to add the same node more than
208  // once; additions after the first have no effect.
209  void AddNode(T node) { LookupOrInsertNode<true>(node); }
210 
211  // Adds an edge in the graph. Also adds both endpoint nodes as necessary.
212  // It is not an error to add the same edge twice. Self-edges are OK too.
213  void AddEdge(T node1, T node2) {
214  delegate_.AddEdge(LookupOrInsertNode<false>(node1),
215  LookupOrInsertNode<false>(node2));
216  }
217 
218  // Returns true iff both nodes are in the same connected component.
219  // Returns false if either node has not been already added with AddNode.
220  bool Connected(T node1, T node2) {
221  return delegate_.Connected(gtl::FindWithDefault(index_, node1, -1),
222  gtl::FindWithDefault(index_, node2, -1));
223  }
224 
225  // Finds the connected component containing a node, and returns the
226  // total number of nodes in that component. Returns zero iff the
227  // node has not been already added with AddNode.
228  int GetSize(T node) {
229  return delegate_.GetSize(gtl::FindWithDefault(index_, node, -1));
230  }
231 
232  // Finds all the connected components and assigns them to components.
233  // Components are ordered in the same way nodes were added, i.e. if node 'b'
234  // was added before node 'c', then either:
235  // - 'c' belongs to the same component as a node 'a' added before 'b', or
236  // - the component for 'c' comes after the one for 'b'.
237  // There are two versions:
238  // - The first one returns the result, and stores each component in a vector.
239  // This is the preferred version.
240  // - The second one populates the result, and stores each component in a set.
241  std::vector<std::vector<T>> FindConnectedComponents() {
242  const auto component_ids = delegate_.GetComponentIds();
243  std::vector<std::vector<T>> components(delegate_.GetNumberOfComponents());
244  for (const auto& elem_id : index_) {
245  components[component_ids[elem_id.second]].push_back(elem_id.first);
246  }
247  return components;
248  }
250  std::vector<typename internal::ConnectedComponentsTypeHelper<
251  T, CompareOrHashT>::Set>* components) {
252  const auto component_ids = delegate_.GetComponentIds();
253  components->clear();
254  components->resize(delegate_.GetNumberOfComponents());
255  for (const auto& elem_id : index_) {
256  components->at(component_ids[elem_id.second]).insert(elem_id.first);
257  }
258  }
259 
260  // Returns the current number of connected components.
261  // This number can change as the new nodes or edges are added.
262  int GetNumberOfComponents() const {
263  return delegate_.GetNumberOfComponents();
264  }
265 
266  // Returns the current number of added distinct nodes.
267  // This includes nodes added explicitly via the calls to AddNode() method
268  // and implicitly via the calls to AddEdge() method.
269  // Nodes that were added several times only count once.
270  int GetNumberOfNodes() const { return delegate_.GetNumberOfNodes(); }
271 
272  private:
273  // Returns the index for the given node. If the node does not exist and
274  // update_delegate is true, explicitly add the node to the delegate.
275  template <bool update_delegate>
276  int LookupOrInsertNode(T node) {
277  const auto result = index_.emplace(node, index_.size());
278  const int node_id = result.first->second;
279  if (update_delegate && result.second) {
280  // A new index was created.
281  delegate_.SetNumberOfNodes(node_id + 1);
282  }
283  return node_id;
284  }
285 
288  index_;
289 };
290 
291 // =============================================================================
292 // Implementations of the method templates
293 // =============================================================================
294 namespace util {
295 template <class UndirectedGraph>
296 std::vector<int> GetConnectedComponents(int num_nodes,
297  const UndirectedGraph& graph) {
298  std::vector<int> component_of_node(num_nodes, -1);
299  std::vector<int> bfs_queue;
300  int num_components = 0;
301  for (int src = 0; src < num_nodes; ++src) {
302  if (component_of_node[src] >= 0) continue;
303  bfs_queue.push_back(src);
304  component_of_node[src] = num_components;
305  for (int num_visited = 0; num_visited < bfs_queue.size(); ++num_visited) {
306  const int node = bfs_queue[num_visited];
307  for (const int neighbor : graph[node]) {
308  if (component_of_node[neighbor] >= 0) continue;
309  component_of_node[neighbor] = num_components;
310  bfs_queue.push_back(neighbor);
311  }
312  }
313  ++num_components;
314  bfs_queue.clear();
315  }
316  return component_of_node;
317 }
318 } // namespace util
319 
320 #endif // UTIL_GRAPH_CONNECTED_COMPONENTS_H_
DenseConnectedComponentsFinder::GetComponentRoots
const std::vector< int > & GetComponentRoots()
Definition: connected_components.cc:73
ptr_util.h
map_util.h
DenseConnectedComponentsFinder::SetNumberOfNodes
void SetNumberOfNodes(int num_nodes)
Definition: connected_components.cc:36
internal::ConnectedComponentsTypeHelper::SelectContainer::Map
std::map< T, int, CompareOrHashT > Map
Definition: connected_components.h:144
DenseConnectedComponentsFinder::FindRoot
int FindRoot(int node)
Definition: connected_components.cc:54
ConnectedComponentsFinder::GetNumberOfComponents
int GetNumberOfComponents() const
Definition: connected_components.h:262
ConnectedComponentsFinder::ConnectedComponentsFinder
ConnectedComponentsFinder(const ConnectedComponentsFinder &)=delete
logging.h
internal::ConnectedComponentsTypeHelper< T, std::less< T > >::Map
typename SelectContainer< std::less< T > >::Map Map
Definition: connected_components.h:160
DenseConnectedComponentsFinder::GetNumberOfComponents
int GetNumberOfComponents() const
Definition: connected_components.h:95
gtl::FindWithDefault
const Collection::value_type::second_type & FindWithDefault(const Collection &collection, const typename Collection::value_type::first_type &key, const typename Collection::value_type::second_type &value)
Definition: map_util.h:26
ConnectedComponentsFinder
Definition: connected_components.h:198
internal::ConnectedComponentsTypeHelper::SelectContainer::Set
std::set< T, CompareOrHashT > Set
Definition: connected_components.h:143
DenseConnectedComponentsFinder::GetSize
int GetSize(int node)
Definition: connected_components.cc:141
ConnectedComponentsFinder::FindConnectedComponents
void FindConnectedComponents(std::vector< typename internal::ConnectedComponentsTypeHelper< T, CompareOrHashT >::Set > *components)
Definition: connected_components.h:249
ConnectedComponentsFinder::Connected
bool Connected(T node1, T node2)
Definition: connected_components.h:220
DenseConnectedComponentsFinder::operator=
DenseConnectedComponentsFinder & operator=(const DenseConnectedComponentsFinder &)=delete
DenseConnectedComponentsFinder::DenseConnectedComponentsFinder
DenseConnectedComponentsFinder(const DenseConnectedComponentsFinder &)=delete
internal::ConnectedComponentsTypeHelper::SelectContainer< U, absl::enable_if_t< std::is_integral< decltype(std::declval< const U & >()(std::declval< const T & >()))>::value > >::Map
absl::flat_hash_map< T, int, CompareOrHashT > Map
Definition: connected_components.h:156
internal::ConnectedComponentsTypeHelper< T, std::less< T > >::Set
typename SelectContainer< std::less< T > >::Set Set
Definition: connected_components.h:159
DenseConnectedComponentsFinder::AddEdge
void AddEdge(int node1, int node2)
Definition: connected_components.cc:96
ConnectedComponentsFinder::operator=
ConnectedComponentsFinder & operator=(const ConnectedComponentsFinder &)=delete
util::GetConnectedComponents
std::vector< int > GetConnectedComponents(int num_nodes, const UndirectedGraph &graph)
Definition: connected_components.h:296
internal::ConnectedComponentsTypeHelper::SelectContainer< U, absl::enable_if_t< std::is_integral< decltype(std::declval< const U & >()(std::declval< const T & >()))>::value > >::Set
absl::flat_hash_set< T, CompareOrHashT > Set
Definition: connected_components.h:155
ConnectedComponentsFinder::AddNode
void AddNode(T node)
Definition: connected_components.h:209
ConnectedComponentsFinder::FindConnectedComponents
std::vector< std::vector< T > > FindConnectedComponents()
Definition: connected_components.h:241
DenseConnectedComponentsFinder::GetNumberOfNodes
int GetNumberOfNodes() const
Definition: connected_components.h:96
DenseConnectedComponentsFinder::DenseConnectedComponentsFinder
DenseConnectedComponentsFinder()
Definition: connected_components.h:83
ConnectedComponentsFinder::ConnectedComponentsFinder
ConnectedComponentsFinder()
Definition: connected_components.h:201
internal::ConnectedComponentsTypeHelper
Definition: connected_components.h:138
DenseConnectedComponentsFinder::GetComponentIds
std::vector< int > GetComponentIds()
Definition: connected_components.cc:148
DenseConnectedComponentsFinder::Connected
bool Connected(int node1, int node2)
Definition: connected_components.cc:133
ConnectedComponentsFinder::GetSize
int GetSize(T node)
Definition: connected_components.h:228
internal
Definition: bop_parameters.pb.h:39
ConnectedComponentsFinder::GetNumberOfNodes
int GetNumberOfNodes() const
Definition: connected_components.h:270
absl
Definition: cleanup.h:22
DenseConnectedComponentsFinder
Definition: connected_components.h:81
util
Definition: status_builder.h:21
internal::ConnectedComponentsTypeHelper::SelectContainer
Definition: connected_components.h:142
ConnectedComponentsFinder::AddEdge
void AddEdge(T node1, T node2)
Definition: connected_components.h:213