OR-Tools  8.1
tuple_set.h
Go to the documentation of this file.
1 // Copyright 2010-2018 Google LLC
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 //
6 // http://www.apache.org/licenses/LICENSE-2.0
7 //
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13 
14 // Set of integer tuples (fixed-size arrays, all of the same size) with
15 // a basic API.
16 // It supports several types of integer arrays transparently, with an
17 // inherent storage based on int64 arrays.
18 //
19 // The key feature is the "lazy" copy:
20 // - Copying an IntTupleSet won't actually copy the data right away; we
21 // will just have several IntTupleSet pointing at the same data.
22 // - Modifying an IntTupleSet which shares his data with others
23 // will create a new, modified instance of the data payload, and make
24 // the IntTupleSet point to that new data.
25 // - Modifying an IntTupleSet that doesn't share its data with any other
26 // IntTupleSet will modify the data directly.
27 // Therefore, you don't need to use const IntTupleSet& in methods. Just do:
28 // void MyMethod(IntTupleSet tuple_set) { ... }
29 //
30 // This class is thread hostile as the copy and reference counter are
31 // not protected by a mutex.
32 
33 #ifndef OR_TOOLS_UTIL_TUPLE_SET_H_
34 #define OR_TOOLS_UTIL_TUPLE_SET_H_
35 
36 #include <algorithm>
37 #include <vector>
38 
39 #include "absl/container/flat_hash_map.h"
40 #include "absl/container/flat_hash_set.h"
41 #include "ortools/base/hash.h"
43 #include "ortools/base/logging.h"
44 #include "ortools/base/macros.h"
45 #include "ortools/base/map_util.h"
46 
47 namespace operations_research {
48 // ----- Main IntTupleSet class -----
49 class IntTupleSet {
50  public:
51  // Creates an empty tuple set with a fixed length for all tuples.
52  explicit IntTupleSet(int arity);
53  // Copy constructor (it actually does a lazy copy, see toplevel comment).
54  IntTupleSet(const IntTupleSet& set); // NOLINT
55  ~IntTupleSet();
56 
57  // Clears data.
58  void Clear();
59 
60  // Inserts the tuple to the set. It does nothing if the tuple is
61  // already in the set. The size of the tuple must be equal to the
62  // arity of the set. It returns the index at which the tuple was
63  // inserted (-1 if it was already present).
64  int Insert(const std::vector<int>& tuple);
65  int Insert(const std::vector<int64>& tuple);
66  // Arity fixed version of Insert removing the need for a vector for the user.
67  int Insert2(int64 v0, int64 v1);
68  int Insert3(int64 v0, int64 v1, int64 v2);
69  int Insert4(int64 v0, int64 v1, int64 v2, int64 v3);
70  // Inserts the tuples.
71  void InsertAll(const std::vector<std::vector<int64> >& tuples);
72  void InsertAll(const std::vector<std::vector<int> >& tuples);
73 
74  // Checks if the tuple is in the set.
75  bool Contains(const std::vector<int>& tuple) const;
76  bool Contains(const std::vector<int64>& tuple) const;
77 
78  // Returns the number of tuples.
79  int NumTuples() const;
80  // Get the given tuple's value at the given position. The indices
81  // of the tuples correspond to the order in which they were
82  // inserted.
83  int64 Value(int tuple_index, int pos_in_tuple) const;
84  // Returns the arity of the set.
85  int Arity() const;
86  // Access the raw data, see IntTupleSet::Data::flat_tuples_.
87  const int64* RawData() const;
88  // Returns the number of different values in the given column.
89  int NumDifferentValuesInColumn(int col) const;
90  // Return a copy of the set, sorted by the "col"-th value of each
91  // tuples. The sort is stable.
92  IntTupleSet SortedByColumn(int col) const;
93  // Returns a copy of the tuple set lexicographically sorted.
95 
96  private:
97  // Class that holds the actual data of an IntTupleSet. It handles
98  // the reference counters, etc.
99  class Data {
100  public:
101  explicit Data(int arity);
102  Data(const Data& data);
103  ~Data();
104  void AddSharedOwner();
105  bool RemovedSharedOwner();
106  Data* CopyIfShared();
107  template <class T>
108  int Insert(const std::vector<T>& tuple);
109  template <class T>
110  bool Contains(const std::vector<T>& candidate) const;
111  template <class T>
112  int64 Fingerprint(const std::vector<T>& tuple) const;
113  int NumTuples() const;
114  int64 Value(int index, int pos) const;
115  int Arity() const;
116  const int64* RawData() const;
117  void Clear();
118 
119  private:
120  const int arity_;
121  int num_owners_;
122  // Concatenation of all tuples ever added.
123  std::vector<int64> flat_tuples_;
124  // Maps a tuple's fingerprint to the list of tuples with this
125  // fingerprint, represented by their start index in the
126  // flat_tuples_ vector.
127  absl::flat_hash_map<int64, std::vector<int> > tuple_fprint_to_index_;
128  };
129 
130  // Used to represent a light representation of a tuple.
131  struct IndexData {
132  int index;
133  IntTupleSet::Data* data;
134  IndexData(int i, IntTupleSet::Data* const d) : index(i), data(d) {}
135  static bool Compare(const IndexData& a, const IndexData& b);
136  };
137 
138  struct IndexValue {
139  int index;
140  int64 value;
141  IndexValue(int i, int64 v) : index(i), value(v) {}
142  static bool Compare(const IndexValue& a, const IndexValue& b);
143  };
144 
145  mutable Data* data_;
146 };
147 
148 // ----- Data -----
149 inline IntTupleSet::Data::Data(int arity) : arity_(arity), num_owners_(0) {}
150 
151 inline IntTupleSet::Data::Data(const Data& data)
152  : arity_(data.arity_),
153  num_owners_(0),
154  flat_tuples_(data.flat_tuples_),
155  tuple_fprint_to_index_(data.tuple_fprint_to_index_) {}
156 
157 inline IntTupleSet::Data::~Data() {}
158 
159 inline void IntTupleSet::Data::AddSharedOwner() { num_owners_++; }
160 
161 inline bool IntTupleSet::Data::RemovedSharedOwner() {
162  return (--num_owners_ == 0);
163 }
164 
165 inline IntTupleSet::Data* IntTupleSet::Data::CopyIfShared() {
166  if (num_owners_ > 1) { // Copy on write.
167  Data* const new_data = new Data(*this);
168  RemovedSharedOwner();
169  new_data->AddSharedOwner();
170  return new_data;
171  }
172  return this;
173 }
174 
175 template <class T>
176 int IntTupleSet::Data::Insert(const std::vector<T>& tuple) {
177  DCHECK(arity_ == 0 || flat_tuples_.size() % arity_ == 0);
178  CHECK_EQ(arity_, tuple.size());
179  DCHECK_EQ(1, num_owners_);
180  if (!Contains(tuple)) {
181  const int index = NumTuples();
182  const int offset = flat_tuples_.size();
183  flat_tuples_.resize(offset + arity_);
184  // On mac os X, using this instead of push_back gives a 10x speedup!
185  for (int i = 0; i < arity_; ++i) {
186  flat_tuples_[offset + i] = tuple[i];
187  }
188  const int64 fingerprint = Fingerprint(tuple);
189  tuple_fprint_to_index_[fingerprint].push_back(index);
190  return index;
191  } else {
192  return -1;
193  }
194 }
195 
196 template <class T>
197 bool IntTupleSet::Data::Contains(const std::vector<T>& candidate) const {
198  if (candidate.size() != arity_) {
199  return false;
200  }
201  const int64 fingerprint = Fingerprint(candidate);
202  if (gtl::ContainsKey(tuple_fprint_to_index_, fingerprint)) {
203  const std::vector<int>& indices =
204  gtl::FindOrDie(tuple_fprint_to_index_, fingerprint);
205  for (int i = 0; i < indices.size(); ++i) {
206  const int tuple_index = indices[i];
207  for (int j = 0; j < arity_; ++j) {
208  if (candidate[j] != flat_tuples_[tuple_index * arity_ + j]) {
209  return false;
210  }
211  }
212  return true;
213  }
214  }
215  return false;
216 }
217 
218 template <class T>
219 int64 IntTupleSet::Data::Fingerprint(const std::vector<T>& tuple) const {
220  switch (arity_) {
221  case 0:
222  return 0;
223  case 1:
224  return tuple[0];
225  case 2: {
226  uint64 x = tuple[0];
227  uint64 y = GG_ULONGLONG(0xe08c1d668b756f82);
228  uint64 z = tuple[1];
229  mix(x, y, z);
230  return z;
231  }
232  default: {
233  uint64 x = tuple[0];
234  uint64 y = GG_ULONGLONG(0xe08c1d668b756f82);
235  for (int i = 1; i < tuple.size(); ++i) {
236  uint64 z = tuple[i];
237  mix(x, y, z);
238  x = z;
239  }
240  return x;
241  }
242  }
243 }
244 
245 inline int IntTupleSet::Data::NumTuples() const {
246  return tuple_fprint_to_index_.size();
247 }
248 
249 inline int64 IntTupleSet::Data::Value(int index, int pos) const {
250  DCHECK_GE(index, 0);
251  DCHECK_LT(index, flat_tuples_.size() / arity_);
252  DCHECK_GE(pos, 0);
253  DCHECK_LT(pos, arity_);
254  return flat_tuples_[index * arity_ + pos];
255 }
256 
257 inline int IntTupleSet::Data::Arity() const { return arity_; }
258 
259 inline const int64* IntTupleSet::Data::RawData() const {
260  return flat_tuples_.data();
261 }
262 
263 inline void IntTupleSet::Data::Clear() {
264  flat_tuples_.clear();
265  tuple_fprint_to_index_.clear();
266 }
267 
268 inline IntTupleSet::IntTupleSet(int arity) : data_(new Data(arity)) {
269  CHECK_GE(arity, 0);
270  data_->AddSharedOwner();
271 }
272 
273 inline IntTupleSet::IntTupleSet(const IntTupleSet& set) : data_(set.data_) {
274  data_->AddSharedOwner();
275 }
276 
278  CHECK(data_ != nullptr);
279  if (data_->RemovedSharedOwner()) {
280  delete data_;
281  }
282 }
283 
284 inline void IntTupleSet::Clear() {
285  data_ = data_->CopyIfShared();
286  data_->Clear();
287 }
288 
289 inline int IntTupleSet::Insert(const std::vector<int>& tuple) {
290  data_ = data_->CopyIfShared();
291  return data_->Insert(tuple);
292 }
293 
294 inline int IntTupleSet::Insert(const std::vector<int64>& tuple) {
295  data_ = data_->CopyIfShared();
296  return data_->Insert(tuple);
297 }
298 
299 inline int IntTupleSet::Insert2(int64 v0, int64 v1) {
300  std::vector<int64> tuple(2);
301  tuple[0] = v0;
302  tuple[1] = v1;
303  return Insert(tuple);
304 }
305 
306 inline int IntTupleSet::Insert3(int64 v0, int64 v1, int64 v2) {
307  std::vector<int64> tuple(3);
308  tuple[0] = v0;
309  tuple[1] = v1;
310  tuple[2] = v2;
311  return Insert(tuple);
312 }
313 
314 inline int IntTupleSet::Insert4(int64 v0, int64 v1, int64 v2, int64 v3) {
315  std::vector<int64> tuple(4);
316  tuple[0] = v0;
317  tuple[1] = v1;
318  tuple[2] = v2;
319  tuple[3] = v3;
320  return Insert(tuple);
321 }
322 
323 inline bool IntTupleSet::Contains(const std::vector<int>& tuple) const {
324  return data_->Contains(tuple);
325 }
326 
327 inline bool IntTupleSet::Contains(const std::vector<int64>& tuple) const {
328  return data_->Contains(tuple);
329 }
330 
332  const std::vector<std::vector<int> >& tuples) {
333  data_ = data_->CopyIfShared();
334  for (int i = 0; i < tuples.size(); ++i) {
335  Insert(tuples[i]);
336  }
337 }
338 
340  const std::vector<std::vector<int64> >& tuples) {
341  data_ = data_->CopyIfShared();
342  for (int i = 0; i < tuples.size(); ++i) {
343  Insert(tuples[i]);
344  }
345 }
346 
347 inline int IntTupleSet::NumTuples() const { return data_->NumTuples(); }
348 
349 inline int64 IntTupleSet::Value(int index, int pos) const {
350  return data_->Value(index, pos);
351 }
352 
353 inline int IntTupleSet::Arity() const { return data_->Arity(); }
354 
355 inline const int64* IntTupleSet::RawData() const { return data_->RawData(); }
356 
358  if (col < 0 || col >= data_->Arity()) {
359  return 0;
360  }
361  absl::flat_hash_set<int64> values;
362  for (int i = 0; i < data_->NumTuples(); ++i) {
363  values.insert(data_->Value(i, col));
364  }
365  return values.size();
366 }
367 
368 inline bool IntTupleSet::IndexValue::Compare(const IndexValue& a,
369  const IndexValue& b) {
370  return a.value < b.value || (a.value == b.value && a.index < b.index);
371 }
372 
374  std::vector<IndexValue> keys;
375  keys.reserve(data_->NumTuples());
376  for (int index = 0; index < data_->NumTuples(); ++index) {
377  keys.push_back(IndexValue(index, data_->Value(index, col)));
378  }
379  std::sort(keys.begin(), keys.end(), IntTupleSet::IndexValue::Compare);
380  const int arity = data_->Arity();
381  IntTupleSet sorted(arity);
382  for (int i = 0; i < keys.size(); ++i) {
383  const int64* tuple_ptr = data_->RawData() + keys[i].index * arity;
384  sorted.Insert(std::vector<int64>(tuple_ptr, tuple_ptr + arity));
385  }
386  return sorted;
387 }
388 
389 inline bool IntTupleSet::IndexData::Compare(const IndexData& a,
390  const IndexData& b) {
391  const IntTupleSet::Data* const data = a.data;
392  const int arity = data->Arity();
393  for (int i = 0; i < arity; ++i) {
394  const int64 value1 = data->Value(a.index, i);
395  const int64 value2 = data->Value(b.index, i);
396  if (value1 < value2) {
397  return true;
398  }
399  if (value1 > value2) {
400  return false;
401  }
402  }
403  return false;
404 }
405 
407  std::vector<IndexData> keys;
408  keys.reserve(data_->NumTuples());
409  for (int index = 0; index < data_->NumTuples(); ++index) {
410  keys.push_back(IndexData(index, data_));
411  }
412  std::sort(keys.begin(), keys.end(), IntTupleSet::IndexData::Compare);
413  const int arity = data_->Arity();
414  IntTupleSet sorted(arity);
415  for (int i = 0; i < keys.size(); ++i) {
416  std::vector<int64> tuple(arity);
417  const int64* tuple_ptr = data_->RawData() + keys[i].index * arity;
418  sorted.Insert(std::vector<int64>(tuple_ptr, tuple_ptr + arity));
419  }
420  return sorted;
421 }
422 } // namespace operations_research
423 
424 #endif // OR_TOOLS_UTIL_TUPLE_SET_H_
operations_research::IntTupleSet::InsertAll
void InsertAll(const std::vector< std::vector< int64 > > &tuples)
Definition: tuple_set.h:339
integral_types.h
DCHECK_LT
#define DCHECK_LT(val1, val2)
Definition: base/logging.h:888
map_util.h
operations_research::IntTupleSet::NumTuples
int NumTuples() const
Definition: tuple_set.h:347
operations_research::IntTupleSet::Insert2
int Insert2(int64 v0, int64 v1)
Definition: tuple_set.h:299
CHECK_GE
#define CHECK_GE(val1, val2)
Definition: base/logging.h:701
GG_ULONGLONG
#define GG_ULONGLONG(x)
Definition: integral_types.h:47
logging.h
value
int64 value
Definition: demon_profiler.cc:43
operations_research::IntTupleSet::RawData
const int64 * RawData() const
Definition: tuple_set.h:355
macros.h
operations_research
The vehicle routing library lets one model and solve generic vehicle routing problems ranging from th...
Definition: dense_doubly_linked_list.h:21
operations_research::mix
static void mix(uint32 &a, uint32 &b, uint32 &c)
Definition: hash.h:28
int64
int64_t int64
Definition: integral_types.h:34
index
int index
Definition: pack.cc:508
gtl::FindOrDie
const Collection::value_type::second_type & FindOrDie(const Collection &collection, const typename Collection::value_type::first_type &key)
Definition: map_util.h:176
operations_research::IntTupleSet::Insert4
int Insert4(int64 v0, int64 v1, int64 v2, int64 v3)
Definition: tuple_set.h:314
a
int64 a
Definition: constraint_solver/table.cc:42
operations_research::IntTupleSet::NumDifferentValuesInColumn
int NumDifferentValuesInColumn(int col) const
Definition: tuple_set.h:357
operations_research::IntTupleSet::~IntTupleSet
~IntTupleSet()
Definition: tuple_set.h:277
CHECK_EQ
#define CHECK_EQ(val1, val2)
Definition: base/logging.h:697
operations_research::IntTupleSet::Insert3
int Insert3(int64 v0, int64 v1, int64 v2)
Definition: tuple_set.h:306
operations_research::IntTupleSet::Clear
void Clear()
Definition: tuple_set.h:284
uint64
uint64_t uint64
Definition: integral_types.h:39
operations_research::sat::Value
std::function< int64(const Model &)> Value(IntegerVariable v)
Definition: integer.h:1470
DCHECK
#define DCHECK(condition)
Definition: base/logging.h:884
arity_
const int arity_
Definition: constraint_solver/table.cc:222
operations_research::IntTupleSet::Contains
bool Contains(const std::vector< int > &tuple) const
Definition: tuple_set.h:323
operations_research::IntTupleSet::Insert
int Insert(const std::vector< int > &tuple)
Definition: tuple_set.h:289
operations_research::IntTupleSet::Arity
int Arity() const
Definition: tuple_set.h:353
DCHECK_GE
#define DCHECK_GE(val1, val2)
Definition: base/logging.h:889
col
ColIndex col
Definition: markowitz.cc:176
DCHECK_EQ
#define DCHECK_EQ(val1, val2)
Definition: base/logging.h:885
hash.h
b
int64 b
Definition: constraint_solver/table.cc:43
operations_research::IntTupleSet
Definition: tuple_set.h:49
operations_research::IntTupleSet::SortedByColumn
IntTupleSet SortedByColumn(int col) const
Definition: tuple_set.h:373
operations_research::IntTupleSet::IntTupleSet
IntTupleSet(int arity)
Definition: tuple_set.h:268
operations_research::IntTupleSet::Value
int64 Value(int tuple_index, int pos_in_tuple) const
Definition: tuple_set.h:349
operations_research::IntTupleSet::SortedLexicographically
IntTupleSet SortedLexicographically() const
Definition: tuple_set.h:406
CHECK
#define CHECK(condition)
Definition: base/logging.h:495
gtl::ContainsKey
bool ContainsKey(const Collection &collection, const Key &key)
Definition: map_util.h:170