linear_assignment.h 62.1 KB
Newer Older
Valentin Platzgummer's avatar
Valentin Platzgummer committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792
// Copyright 2010-2018 Google LLC
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//
// An implementation of a cost-scaling push-relabel algorithm for the
// assignment problem (minimum-cost perfect bipartite matching), from
// the paper of Goldberg and Kennedy (1995).
//
//
// This implementation finds the minimum-cost perfect assignment in
// the given graph with integral edge weights set through the
// SetArcCost method.
//
// The running time is O(n*m*log(nC)) where n is the number of nodes,
// m is the number of edges, and C is the largest magnitude of an edge cost.
// In principle it can be worse than the Hungarian algorithm but we don't know
// of any class of problems where that actually happens. An additional sqrt(n)
// factor could be shaved off the running time bound using the technique
// described in http://dx.doi.org/10.1137/S0895480194281185
// (see also http://theory.stanford.edu/~robert/papers/glob_upd.ps).
//
//
// Example usage:
//
//   #include "ortools/graph/graph.h"
//   #include "ortools/graph/linear_assignment.h"
//
//   // Choose a graph implementation (we recommend StaticGraph<>).
//   typedef util::StaticGraph<> Graph;
//
//   // Define a num_nodes / 2 by num_nodes / 2 assignment problem:
//   const int num_nodes = ...
//   const int num_arcs = ...
//   const int num_left_nodes = num_nodes / 2;
//   Graph graph(num_nodes, num_arcs);
//   std::vector<operations_research::CostValue> arc_costs(num_arcs);
//   for (int arc = 0; arc < num_arcs; ++arc) {
//     const int arc_tail = ...   // must be in [0, num_left_nodes)
//     const int arc_head = ...   // must be in [num_left_nodes, num_nodes)
//     graph.AddArc(arc_tail, arc_head);
//     arc_costs[arc] = ...
//   }
//
//   // Build the StaticGraph. You can skip this step by using a ListGraph<>
//   // instead, but then the ComputeAssignment() below will be slower. It is
//   // okay if your graph is small and performance is not critical though.
//   {
//     std::vector<Graph::ArcIndex> arc_permutation;
//     graph.Build(&arc_permutation);
//     util::Permute(arc_permutation, &arc_costs);
//   }
//
//   // Construct the LinearSumAssignment.
//   ::operations_research::LinearSumAssignment<Graph> a(graph, num_left_nodes);
//   for (int arc = 0; arc < num_arcs; ++arc) {
//      // You can also replace 'arc_costs[arc]' by something like
//      //   ComputeArcCost(permutation.empty() ? arc : permutation[arc])
//      // if you don't want to store the costs in arc_costs to save memory.
//      a.SetArcCost(arc, arc_costs[arc]);
//   }
//
//   // Compute the optimum assignment.
//   bool success = a.ComputeAssignment();
//   // Retrieve the cost of the optimum assignment.
//   operations_research::CostValue optimum_cost = a.GetCost();
//   // Retrieve the node-node correspondence of the optimum assignment and the
//   // cost of each node pairing.
//   for (int left_node = 0; left_node < num_left_nodes; ++left_node) {
//     const int right_node = a.GetMate(left_node);
//     operations_research::CostValue node_pair_cost =
//         a.GetAssignmentCost(left_node);
//     ...
//   }
//
// In the following, we consider a bipartite graph
//   G = (V = X union Y, E subset XxY),
// where V denotes the set of nodes (vertices) in the graph, E denotes
// the set of arcs (edges), n = |V| denotes the number of nodes in the
// graph, and m = |E| denotes the number of arcs in the graph.
//
// The set of nodes is divided into two parts, X and Y, and every arc
// must go between a node of X and a node of Y. With each arc is
// associated a cost c(v, w). A matching M is a subset of E with the
// property that no two arcs in M have a head or tail node in common,
// and a perfect matching is a matching that touches every node in the
// graph. The cost of a matching M is the sum of the costs of all the
// arcs in M.
//
// The assignment problem is to find a perfect matching of minimum
// cost in the given bipartite graph. The present algorithm reduces
// the assignment problem to an instance of the minimum-cost flow
// problem and takes advantage of special properties of the resulting
// minimum-cost flow problem to solve it efficiently using a
// push-relabel method. For more information about minimum-cost flow
// see google3/ortools/graph/min_cost_flow.h
//
// The method used here is the cost-scaling approach for the
// minimum-cost circulation problem as described in [Goldberg and
// Tarjan] with some technical modifications:
// 1. For efficiency, we solve a transportation problem instead of
//    minimum-cost circulation. We might revisit this decision if it
//    is important to handle problems in which no perfect matching
//    exists.
// 2. We use a modified "asymmetric" notion of epsilon-optimality in
//    which left-to-right residual arcs are required to have reduced
//    cost bounded below by zero and right-to-left residual arcs are
//    required to have reduced cost bounded below by -epsilon. For
//    each residual arc direction, the reduced-cost threshold for
//    admissibility is epsilon/2 above the threshold for epsilon
//    optimality.
// 3. We do not limit the applicability of the relabeling operation to
//    nodes with excess. Instead we use the double-push operation
//    (discussed in the Goldberg and Kennedy CSA paper and Kennedy's
//    thesis) which relabels right-side nodes just *after* they have
//    been discharged.
// The above differences are explained in detail in [Kennedy's thesis]
// and explained not quite as cleanly in [Goldberg and Kennedy's CSA
// paper]. But note that the thesis explanation uses a value of
// epsilon that's double what we use here.
//
// Some definitions:
//   Active: A node is called active when it has excess. It is
//     eligible to be pushed from. In this implementation, every active
//     node is on the left side of the graph where prices are determined
//     implicitly, so no left-side relabeling is necessary before
//     pushing from an active node. We do, however, need to compute
//     the implications for price changes on the affected right-side
//     nodes.
//   Admissible: A residual arc (one that can carry more flow) is
//     called admissible when its reduced cost is small enough. We can
//     push additional flow along such an arc without violating
//     epsilon-optimality. In the case of a left-to-right residual
//     arc, the reduced cost must be at most epsilon/2. In the case of
//     a right-to-left residual arc, the reduced cost must be at
//     most -epsilon/2. The careful reader will note that these thresholds
//     are not used explicitly anywhere in this implementation, and
//     the reason is the implicit pricing of left-side nodes.
//   Reduced cost: Essentially an arc's reduced cost is its
//     complementary slackness. In push-relabel algorithms this is
//       c_p(v, w) = p(v) + c(v, w) - p(w),
//     where p() is the node price function and c(v, w) is the cost of
//     the arc from v to w. See min_cost_flow.h for more details.
//   Partial reduced cost: We maintain prices implicitly for left-side
//     nodes in this implementation, so instead of reduced costs we
//     work with partial reduced costs, defined as
//       c'_p(v, w) = c(v, w) - p(w).
//
// We check at initialization time for the possibility of arithmetic
// overflow and warn if the given costs are too large. In many cases
// the bound we use to trigger the warning is pessimistic so the given
// problem can often be solved even if we warn that overflow is
// possible.
//
// We don't use the interface from
// operations_research/algorithms/hungarian.h because we want to be
// able to express sparse problems efficiently.
//
// When asked to solve the given assignment problem we return a
// boolean to indicate whether the given problem was feasible.
//
// References:
// [ Goldberg and Kennedy's CSA paper ] A. V. Goldberg and R. Kennedy,
// "An Efficient Cost Scaling Algorithm for the Assignment Problem."
// Mathematical Programming, Vol. 71, pages 153-178, December 1995.
//
// [ Goldberg and Tarjan ] A. V. Goldberg and R. E. Tarjan, "Finding
// Minimum-Cost Circulations by Successive Approximation." Mathematics
// of Operations Research, Vol. 15, No. 3, pages 430-466, August 1990.
//
// [ Kennedy's thesis ] J. R. Kennedy, Jr., "Solving Unweighted and
// Weighted Bipartite Matching Problems in Theory and Practice."
// Stanford University Doctoral Dissertation, Department of Computer
// Science, 1995.
//
// [ Burkard et al. ] R. Burkard, M. Dell'Amico, S. Martello, "Assignment
// Problems", SIAM, 2009, ISBN: 978-0898716634,
// http://www.amazon.com/dp/0898716632/
//
// [ Ahuja et al. ] R. K. Ahuja, T. L. Magnanti, J. B. Orlin, "Network Flows:
// Theory, Algorithms, and Applications," Prentice Hall, 1993,
// ISBN: 978-0136175490, http://www.amazon.com/dp/013617549X.
//
// Keywords: linear sum assignment problem, Hungarian method, Goldberg, Kennedy.

#ifndef OR_TOOLS_GRAPH_LINEAR_ASSIGNMENT_H_
#define OR_TOOLS_GRAPH_LINEAR_ASSIGNMENT_H_

#include <algorithm>
#include <cstdlib>
#include <deque>
#include <limits>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "absl/strings/str_format.h"
#include "ortools/base/commandlineflags.h"
#include "ortools/base/integral_types.h"
#include "ortools/base/logging.h"
#include "ortools/base/macros.h"
#include "ortools/graph/ebert_graph.h"
#include "ortools/util/permutation.h"
#include "ortools/util/zvector.h"

#ifndef SWIG
DECLARE_int64(assignment_alpha);
DECLARE_int32(assignment_progress_logging_period);
DECLARE_bool(assignment_stack_order);
#endif

namespace operations_research {

// This class does not take ownership of its underlying graph.
template <typename GraphType>
class LinearSumAssignment {
 public:
  typedef typename GraphType::NodeIndex NodeIndex;
  typedef typename GraphType::ArcIndex ArcIndex;

  // Constructor for the case in which we will build the graph
  // incrementally as we discover arc costs, as might be done with any
  // of the dynamic graph representations such as StarGraph or ForwardStarGraph.
  LinearSumAssignment(const GraphType& graph, NodeIndex num_left_nodes);

  // Constructor for the case in which the underlying graph cannot be
  // built until after all the arc costs are known, as is the case
  // with ForwardStarStaticGraph. In this case, the graph is passed to
  // us later via the SetGraph() method, below.
  LinearSumAssignment(NodeIndex num_left_nodes, ArcIndex num_arcs);

  ~LinearSumAssignment() {}

  // Sets the graph used by the LinearSumAssignment instance, for use
  // when the graph layout can be determined only after arc costs are
  // set. This happens, for example, when we use a ForwardStarStaticGraph.
  void SetGraph(const GraphType* graph) {
    DCHECK(graph_ == nullptr);
    graph_ = graph;
  }

  // Sets the cost-scaling divisor, i.e., the amount by which we
  // divide the scaling parameter on each iteration.
  void SetCostScalingDivisor(CostValue factor) { alpha_ = factor; }

  // Returns a permutation cycle handler that can be passed to the
  // TransformToForwardStaticGraph method so that arc costs get
  // permuted along with arcs themselves.
  //
  // Passes ownership of the cycle handler to the caller.
  //
  operations_research::PermutationCycleHandler<typename GraphType::ArcIndex>*
  ArcAnnotationCycleHandler();

  // Optimizes the layout of the graph for the access pattern our
  // implementation will use.
  //
  // REQUIRES for LinearSumAssignment template instantiation if a call
  // to the OptimizeGraphLayout() method is compiled: GraphType is a
  // dynamic graph, i.e., one that implements the
  // GroupForwardArcsByFunctor() member template method.
  //
  // If analogous optimization is needed for LinearSumAssignment
  // instances based on static graphs, the graph layout should be
  // constructed such that each node's outgoing arcs are sorted by
  // head node index before the
  // LinearSumAssignment<GraphType>::SetGraph() method is called.
  void OptimizeGraphLayout(GraphType* graph);

  // Allows tests, iterators, etc., to inspect our underlying graph.
  inline const GraphType& Graph() const { return *graph_; }

  // These handy member functions make the code more compact, and we
  // expose them to clients so that client code that doesn't have
  // direct access to the graph can learn about the optimum assignment
  // once it is computed.
  inline NodeIndex Head(ArcIndex arc) const { return graph_->Head(arc); }

  // Returns the original arc cost for use by a client that's
  // iterating over the optimum assignment.
  CostValue ArcCost(ArcIndex arc) const {
    DCHECK_EQ(0, scaled_arc_cost_[arc] % cost_scaling_factor_);
    return scaled_arc_cost_[arc] / cost_scaling_factor_;
  }

  // Sets the cost of an arc already present in the given graph.
  void SetArcCost(ArcIndex arc, CostValue cost);

  // Completes initialization after the problem is fully specified.
  // Returns true if we successfully prove that arithmetic
  // calculations are guaranteed not to overflow. ComputeAssignment()
  // calls this method itself, so only clients that care about
  // obtaining a warning about the possibility of arithmetic precision
  // problems need to call this method explicitly.
  //
  // Separate from ComputeAssignment() for white-box testing and for
  // clients that need to react to the possibility that arithmetic
  // overflow is not ruled out.
  //
  // FinalizeSetup() is idempotent.
  bool FinalizeSetup();

  // Computes the optimum assignment. Returns true on success. Return
  // value of false implies the given problem is infeasible.
  bool ComputeAssignment();

  // Returns the cost of the minimum-cost perfect matching.
  // Precondition: success_ == true, signifying that we computed the
  // optimum assignment for a feasible problem.
  CostValue GetCost() const;

  // Returns the total number of nodes in the given problem.
  NodeIndex NumNodes() const {
    if (graph_ == nullptr) {
      // Return a guess that must be true if ultimately we are given a
      // feasible problem to solve.
      return 2 * NumLeftNodes();
    } else {
      return graph_->num_nodes();
    }
  }

  // Returns the number of nodes on the left side of the given
  // problem.
  NodeIndex NumLeftNodes() const { return num_left_nodes_; }

  // Returns the arc through which the given node is matched.
  inline ArcIndex GetAssignmentArc(NodeIndex left_node) const {
    DCHECK_LT(left_node, num_left_nodes_);
    return matched_arc_[left_node];
  }

  // Returns the cost of the assignment arc incident to the given
  // node.
  inline CostValue GetAssignmentCost(NodeIndex node) const {
    return ArcCost(GetAssignmentArc(node));
  }

  // Returns the node to which the given node is matched.
  inline NodeIndex GetMate(NodeIndex left_node) const {
    DCHECK_LT(left_node, num_left_nodes_);
    ArcIndex matching_arc = GetAssignmentArc(left_node);
    DCHECK_NE(GraphType::kNilArc, matching_arc);
    return Head(matching_arc);
  }

  std::string StatsString() const { return total_stats_.StatsString(); }

  class BipartiteLeftNodeIterator {
   public:
    BipartiteLeftNodeIterator(const GraphType& graph, NodeIndex num_left_nodes)
        : num_left_nodes_(num_left_nodes), node_iterator_(0) {}

    explicit BipartiteLeftNodeIterator(const LinearSumAssignment& assignment)
        : num_left_nodes_(assignment.NumLeftNodes()), node_iterator_(0) {}

    NodeIndex Index() const { return node_iterator_; }

    bool Ok() const { return node_iterator_ < num_left_nodes_; }

    void Next() { ++node_iterator_; }

   private:
    const NodeIndex num_left_nodes_;
    typename GraphType::NodeIndex node_iterator_;
  };

 private:
  struct Stats {
    Stats() : pushes_(0), double_pushes_(0), relabelings_(0), refinements_(0) {}
    void Clear() {
      pushes_ = 0;
      double_pushes_ = 0;
      relabelings_ = 0;
      refinements_ = 0;
    }
    void Add(const Stats& that) {
      pushes_ += that.pushes_;
      double_pushes_ += that.double_pushes_;
      relabelings_ += that.relabelings_;
      refinements_ += that.refinements_;
    }
    std::string StatsString() const {
      return absl::StrFormat(
          "%d refinements; %d relabelings; "
          "%d double pushes; %d pushes",
          refinements_, relabelings_, double_pushes_, pushes_);
    }
    int64 pushes_;
    int64 double_pushes_;
    int64 relabelings_;
    int64 refinements_;
  };

#ifndef SWIG
  class ActiveNodeContainerInterface {
   public:
    virtual ~ActiveNodeContainerInterface() {}
    virtual bool Empty() const = 0;
    virtual void Add(NodeIndex node) = 0;
    virtual NodeIndex Get() = 0;
  };

  class ActiveNodeStack : public ActiveNodeContainerInterface {
   public:
    ~ActiveNodeStack() override {}

    bool Empty() const override { return v_.empty(); }

    void Add(NodeIndex node) override { v_.push_back(node); }

    NodeIndex Get() override {
      DCHECK(!Empty());
      NodeIndex result = v_.back();
      v_.pop_back();
      return result;
    }

   private:
    std::vector<NodeIndex> v_;
  };

  class ActiveNodeQueue : public ActiveNodeContainerInterface {
   public:
    ~ActiveNodeQueue() override {}

    bool Empty() const override { return q_.empty(); }

    void Add(NodeIndex node) override { q_.push_front(node); }

    NodeIndex Get() override {
      DCHECK(!Empty());
      NodeIndex result = q_.back();
      q_.pop_back();
      return result;
    }

   private:
    std::deque<NodeIndex> q_;
  };
#endif

  // Type definition for a pair
  //   (arc index, reduced cost gap)
  // giving the arc along which we will push from a given left-side
  // node and the gap between that arc's partial reduced cost and the
  // reduced cost of the next-best (necessarily residual) arc out of
  // the node. This information helps us efficiently relabel
  // right-side nodes during DoublePush operations.
  typedef std::pair<ArcIndex, CostValue> ImplicitPriceSummary;

  // Returns true if and only if the current pseudoflow is
  // epsilon-optimal. To be used in a DCHECK.
  bool EpsilonOptimal() const;

  // Checks that all nodes are matched.
  // To be used in a DCHECK.
  bool AllMatched() const;

  // Calculates the implicit price of the given node.
  // Only for debugging, for use in EpsilonOptimal().
  inline CostValue ImplicitPrice(NodeIndex left_node) const;

  // For use by DoublePush()
  inline ImplicitPriceSummary BestArcAndGap(NodeIndex left_node) const;

  // Accumulates stats between iterations and reports them if the
  // verbosity level is high enough.
  void ReportAndAccumulateStats();

  // Utility function to compute the next error parameter value. This
  // is used to ensure that the same sequence of error parameter
  // values is used for computation of price bounds as is used for
  // computing the optimum assignment.
  CostValue NewEpsilon(CostValue current_epsilon) const;

  // Advances internal state to prepare for the next scaling
  // iteration. Returns false if infeasibility is detected, true
  // otherwise.
  bool UpdateEpsilon();

  // Indicates whether the given left_node has positive excess. Called
  // only for nodes on the left side.
  inline bool IsActive(NodeIndex left_node) const;

  // Indicates whether the given node has nonzero excess. The idea
  // here is the same as the IsActive method above, but that method
  // contains a safety DCHECK() that its argument is a left-side node,
  // while this method is usable for any node.
  // To be used in a DCHECK.
  inline bool IsActiveForDebugging(NodeIndex node) const;

  // Performs the push/relabel work for one scaling iteration.
  bool Refine();

  // Puts all left-side nodes in the active set in preparation for the
  // first scaling iteration.
  void InitializeActiveNodeContainer();

  // Saturates all negative-reduced-cost arcs at the beginning of each
  // scaling iteration. Note that according to the asymmetric
  // definition of admissibility, this action is different from
  // saturating all admissible arcs (which we never do). All negative
  // arcs are admissible, but not all admissible arcs are negative. It
  // is alwsys enough to saturate only the negative ones.
  void SaturateNegativeArcs();

  // Performs an optimized sequence of pushing a unit of excess out of
  // the left-side node v and back to another left-side node if no
  // deficit is cancelled with the first push.
  bool DoublePush(NodeIndex source);

  // Returns the partial reduced cost of the given arc.
  inline CostValue PartialReducedCost(ArcIndex arc) const {
    return scaled_arc_cost_[arc] - price_[Head(arc)];
  }

  // The graph underlying the problem definition we are given. Not
  // owned by *this.
  const GraphType* graph_;

  // The number of nodes on the left side of the graph we are given.
  NodeIndex num_left_nodes_;

  // A flag indicating, after FinalizeSetup() has run, whether the
  // arc-incidence precondition required by BestArcAndGap() is
  // satisfied by every left-side node. If not, the problem is
  // infeasible.
  bool incidence_precondition_satisfied_;

  // A flag indicating that an optimal perfect matching has been computed.
  bool success_;

  // The value by which we multiply all the arc costs we are given in
  // order to be able to use integer arithmetic in all our
  // computations. In order to establish optimality of the final
  // matching we compute, we need that
  //   (cost_scaling_factor_ / kMinEpsilon) > graph_->num_nodes().
  const CostValue cost_scaling_factor_;

  // Scaling divisor.
  CostValue alpha_;

  // Minimum value of epsilon. When a flow is epsilon-optimal for
  // epsilon == kMinEpsilon, the flow is optimal.
  static const CostValue kMinEpsilon;

  // Current value of epsilon, the cost scaling parameter.
  CostValue epsilon_;

  // The following two data members, price_lower_bound_ and
  // slack_relabeling_price_, have to do with bounds on the amount by
  // which node prices can change during execution of the algorithm.
  // We need some detailed discussion of this topic because we violate
  // several simplifying assumptions typically made in the theoretical
  // literature. In particular, we use integer arithmetic, we use a
  // reduction to the transportation problem rather than min-cost
  // circulation, we provide detection of infeasible problems rather
  // than assume feasibility, we detect when our computations might
  // exceed the range of representable cost values, and we use the
  // double-push heuristic which relabels nodes that do not have
  // excess.
  //
  // In the following discussion, we prove the following propositions:
  // Proposition 1. [Fidelity of arithmetic precision guarantee] If
  //                FinalizeSetup() returns true, no arithmetic
  //                overflow occurs during ComputeAssignment().
  // Proposition 2. [Fidelity of feasibility detection] If no
  //                arithmetic overflow occurs during
  //                ComputeAssignment(), the return value of
  //                ComputeAssignment() faithfully indicates whether
  //                the given problem is feasible.
  //
  // We begin with some general discussion.
  //
  // The ideas used to prove our two propositions are essentially
  // those that appear in [Goldberg and Tarjan], but several details
  // are different: [Goldberg and Tarjan] assumes a feasible problem,
  // uses a symmetric notion of epsilon-optimality, considers only
  // nodes with excess eligible for relabeling, and does not treat the
  // question of arithmetic overflow. This implementation, on the
  // other hand, detects and reports infeasible problems, uses
  // asymmetric epsilon-optimality, relabels nodes with no excess in
  // the course of the double-push operation, and gives a reasonably
  // tight guarantee of arithmetic precision. No fundamentally new
  // ideas are involved, but the details are a bit tricky so they are
  // explained here.
  //
  // We have two intertwined needs that lead us to compute bounds on
  // the prices nodes can have during the assignment computation, on
  // the assumption that the given problem is feasible:
  // 1. Infeasibility detection: Infeasibility is detected by
  //    observing that some node's price has been reduced too much by
  //    relabeling operations (see [Goldberg and Tarjan] for the
  //    argument -- duplicated in modified form below -- bounding the
  //    running time of the push/relabel min-cost flow algorithm for
  //    feasible problems); and
  // 2. Aggressively relabeling nodes and arcs whose matching is
  //    forced: When a left-side node is incident to only one arc a,
  //    any feasible solution must include a, and reducing the price
  //    of Head(a) by any nonnegative amount preserves epsilon-
  //    optimality. Because of this freedom, we'll call this sort of
  //    relabeling (i.e., a relabeling of a right-side node that is
  //    the only neighbor of the left-side node to which it has been
  //    matched in the present double-push operation) a "slack"
  //    relabeling. Relabelings that are not slack relabelings are
  //    called "confined" relabelings. By relabeling Head(a) to have
  //    p(Head(a))=-infinity, we could guarantee that a never becomes
  //    unmatched during the current iteration, and this would prevent
  //    our wasting time repeatedly unmatching and rematching a. But
  //    there are some details we need to handle:
  //    a. The CostValue type cannot represent -infinity;
  //    b. Low node prices are precisely the signal we use to detect
  //       infeasibility (see (1)), so we must be careful not to
  //       falsely conclude that the problem is infeasible as a result
  //       of the low price we gave Head(a); and
  //    c. We need to indicate accurately to the client when our best
  //       understanding indicates that we can't rule out arithmetic
  //       overflow in our calculations. Most importantly, if we don't
  //       warn the client, we must be certain to avoid overflow. This
  //       means our slack relabelings must not be so aggressive as to
  //       create the possibility of unforeseen overflow. Although we
  //       will not achieve this in practice, slack relabelings would
  //       ideally not introduce overflow unless overflow was
  //       inevitable were even the smallest reasonable price change
  //       (== epsilon) used for slack relabelings.
  //    Using the analysis below, we choose a finite amount of price
  //    change for slack relabelings aggressive enough that we don't
  //    waste time doing repeated slack relabelings in a single
  //    iteration, yet modest enough that we keep a good handle on
  //    arithmetic precision and our ability to detect infeasible
  //    problems.
  //
  // To provide faithful detection of infeasibility, a dependable
  // guarantee of arithmetic precision whenever possible, and good
  // performance by aggressively relabeling nodes whose matching is
  // forced, we exploit these facts:
  // 1. Beyond the first iteration, infeasibility detection isn't needed
  //    because a problem is feasible in some iteration if and only if
  //    it's feasible in all others. Therefore we are free to use an
  //    infeasibility detection mechanism that might work in just one
  //    iteration and switch it off in all other iterations.
  // 2. When we do a slack relabeling, we must choose the amount of
  //    price reduction to use. We choose an amount large enough to
  //    guarantee putting the node's matching to rest, yet (although
  //    we don't bother to prove this explicitly) small enough that
  //    the node's price obeys the overall lower bound that holds if
  //    the slack relabeling amount is small.
  //
  // We will establish Propositions (1) and (2) above according to the
  // following steps:
  // First, we prove Lemma 1, which is a modified form of lemma 5.8 of
  // [Goldberg and Tarjan] giving a bound on the difference in price
  // between the end nodes of certain paths in the residual graph.
  // Second, we prove Lemma 2, which is technical lemma to establish
  // reachability of certain "anchor" nodes in the residual graph from
  // any node where a relabeling takes place.
  // Third, we apply the first two lemmas to prove Lemma 3 and Lemma
  // 4, which give two similar bounds that hold whenever the given
  // problem is feasible: (for feasibility detection) a bound on the
  // price of any node we relabel during any iteration (and the first
  // iteration in particular), and (for arithmetic precision) a bound
  // on the price of any node we relabel during the entire algorithm.
  //
  // Finally, we note that if the whole-algorithm price bound can be
  // represented precisely by the CostValue type, arithmetic overflow
  // cannot occur (establishing Proposition 1), and assuming no
  // overflow occurs during the first iteration, any violation of the
  // first-iteration price bound establishes infeasibility
  // (Proposition 2).
  //
  // The statement of Lemma 1 is perhaps easier to understand when the
  // reader knows how it will be used. To wit: In this lemma, f' and
  // e_0 are the flow and error parameter (epsilon) at the beginning
  // of the current iteration, while f and e_1 are the current
  // pseudoflow and error parameter when a relabeling of interest
  // occurs. Without loss of generality, c is the reduced cost
  // function at the beginning of the current iteration and p is the
  // change in prices that has taken place in the current iteration.
  //
  // Lemma 1 (a variant of lemma 5.8 from [Goldberg and Tarjan]): Let
  // f be a pseudoflow and let f' be a flow. Suppose P is a simple
  // path from right-side node v to right-side node w such that P is
  // residual with respect to f and reverse(P) is residual with
  // respect to f'. Further, suppose c is an arc cost function with
  // respect to which f' is e_0-optimal with the zero price function
  // and p is a price function with respect to which f is e_1-optimal
  // with respect to p. Then
  //   p(v) - p(w) >= -(e_0 + e_1) * (n-2)/2.     (***)
  //
  // Proof: We have c_p(P) = p(v) + c(P) - p(w) and hence
  //   p(v) - p(w) = c_p(P) - c(P).
  // So we seek a bound on c_p(P) - c(P).
  //   p(v) = c_p(P) - c(P).
  // Let arc a lie on P, which implies that a is residual with respect
  // to f and reverse(a) is residual with respect to f'.
  // Case 1: a is a forward arc. Then by e_1-optimality of f with
  //         respect to p, c_p(a) >= 0 and reverse(a) is residual with
  //         respect to f'. By e_0-optimality of f', c(a) <= e_0. So
  //           c_p(a) - c(a) >= -e_0.
  // Case 2: a is a reverse arc. Then by e_1-optimality of f with
  //         respect to p, c_p(a) >= -e_1 and reverse(a) is residual
  //         with respect to f'. By e_0-optimality of f', c(a) <= 0.
  //         So
  //           c_p(a) - c(a) >= -e_1.
  // We assumed v and w are both right-side nodes, so there are at
  // most n - 2 arcs on the path P, of which at most (n-2)/2 are
  // forward arcs and at most (n-2)/2 are reverse arcs, so
  //   p(v) - p(w) = c_p(P) - c(P)
  //               >= -(e_0 + e_1) * (n-2)/2.     (***)
  //
  // Some of the rest of our argument is given as a sketch, omitting
  // several details. Also elided here are some minor technical issues
  // related to the first iteration, inasmuch as our arguments assume
  // on the surface a "previous iteration" that doesn't exist in that
  // case. The issues are not substantial, just a bit messy.
  //
  // Lemma 2 is analogous to lemma 5.7 of [Goldberg and Tarjan], where
  // they have only relabelings that take place at nodes with excess
  // while we have only relabelings that take place as part of the
  // double-push operation at nodes without excess.
  //
  // Lemma 2: If the problem is feasible, for any node v with excess,
  // there exists a path P from v to a node w with deficit such that P
  // is residual with respect to the current pseudoflow, and
  // reverse(P) is residual with respect to the flow at the beginning
  // of the current iteration. (Note that such a path exactly
  // satisfies the conditions of Lemma 1.)
  //
  // Let the bound from Lemma 1 with p(w) = 0 be called B(e_0, e_1),
  // and let us say that when a slack relabeling of a node v occurs,
  // we will change the price of v by B(e_0, e_1) such that v tightly
  // satisfies the bound of Lemma 1. Explicitly, we define
  //   B(e_0, e_1) = -(e_0 + e_1) * (n-2)/2.
  //
  // Lemma 1 and Lemma 2 combine to bound the price change during an
  // iteration for any node with excess. Viewed a different way, Lemma
  // 1 and Lemma 2 tell us that if epsilon-optimality can be preserved
  // by changing the price of a node by B(e_0, e_1), that node will
  // never have excess again during the current iteration unless the
  // problem is infeasible. This insight gives us an approach to
  // detect infeasibility (by observing prices on nodes with excess
  // that violate this bound) and to relabel nodes aggressively enough
  // to avoid unnecessary future work while we also avoid falsely
  // concluding the problem is infeasible.
  //
  // From Lemma 1 and Lemma 2, and taking into account our knowledge
  // of the slack relabeling amount, we have Lemma 3.
  //
  // Lemma 3: During any iteration, if the given problem is feasible
  // the price of any node is reduced by less than
  //   -2 * B(e_0, e_1) = (e_0 + e_1) * (n-2).
  //
  // Proof: Straightforward, omitted for expedience.
  //
  // In the case where e_0 = e_1 * alpha, we can express the bound
  // just in terms of e_1, the current iteration's value of epsilon_:
  //   B(e_1) = B(e_1 * alpha, e_1) = -(1 + alpha) * e_1 * (n-2)/2,
  // so we have that p(v) is reduced by less than 2 * B(e_1).
  //
  // Because we use truncating division to compute each iteration's error
  // parameter from that of the previous iteration, it isn't exactly
  // the case that e_0 = e_1 * alpha as we just assumed. To patch this
  // up, we can use the observation that
  //   e_1 = floor(e_0 / alpha),
  // which implies
  //   -e_0 > -(e_1 + 1) * alpha
  // to rewrite from (***):
  //   p(v) > 2 * B(e_0, e_1) > 2 * B((e_1 + 1) * alpha, e_1)
  //        = 2 * -((e_1 + 1) * alpha + e_1) * (n-2)/2
  //        = 2 * -(1 + alpha) * e_1 * (n-2)/2 - alpha * (n-2)
  //        = 2 * B(e_1) - alpha * (n-2)
  //        = -((1 + alpha) * e_1 + alpha) * (n-2).
  //
  // We sum up the bounds for all the iterations to get Lemma 4:
  //
  // Lemma 4: If the given problem is feasible, after k iterations the
  // price of any node is always greater than
  //   -((1 + alpha) * C + (k * alpha)) * (n-2)
  //
  // Proof: Suppose the price decrease of every node in the iteration
  // with epsilon_ == x is bounded by B(x) which is proportional to x
793
  // (not surprisingly, this will be the same function B() as
Valentin Platzgummer's avatar
Valentin Platzgummer committed
794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486
  // above). Assume for simplicity that C, the largest cost magnitude,
  // is a power of alpha. Then the price of each node, tallied across
  // all iterations is bounded
  //   p(v) > 2 * B(C/alpha) + 2 * B(C/alpha^2) + ... + 2 * B(kMinEpsilon)
  //        == 2 * B(C/alpha) * alpha / (alpha - 1)
  //        == 2 * B(C) / (alpha - 1).
  // As above, this needs some patching up to handle the fact that we
  // use truncating arithmetic. We saw that each iteration effectively
  // reduces the price bound by alpha * (n-2), hence if there are k
  // iterations, the bound is
  //   p(v) > 2 * B(C) / (alpha - 1) - k * alpha * (n-2)
  //        = -(1 + alpha) * C * (n-2) / (alpha - 1) - k * alpha * (n-2)
  //        = (n-2) * (C * (1 + alpha) / (1 - alpha) - k * alpha).
  //
  // The bound of lemma 4 can be used to warn for possible overflow of
  // arithmetic precision. But because it involves the number of
  // iterations, k, we might as well count through the iterations
  // simply adding up the bounds given by Lemma 3 to get a tighter
  // result. This is what the implementation does.

  // A lower bound on the price of any node at any time throughout the
  // computation. A price below this level proves infeasibility; this
  // value is used for feasibility detection. We use this value also
  // to rule out the possibility of arithmetic overflow or warn the
  // client that we have not been able to rule out that possibility.
  //
  // We can use the value implied by Lemma 4 here, but note that that
  // value includes k, the number of iterations. It's plenty fast if
  // we count through the iterations to compute that value, but if
  // we're going to count through the iterations, we might as well use
  // the two-parameter bound from Lemma 3, summing up as we go. This
  // gives us a tighter bound and more comprehensible code.
  //
  // While computing this bound, if we find the value justified by the
  // theory lies outside the representable range of CostValue, we
  // conclude that the given arc costs have magnitudes so large that
  // we cannot guarantee our calculations don't overflow. If the value
  // justified by the theory lies inside the representable range of
  // CostValue, we commit that our calculation will not overflow. This
  // commitment means we need to be careful with the amount by which
  // we relabel right-side nodes that are incident to any node with
  // only one neighbor.
  CostValue price_lower_bound_;

  // A bound on the amount by which a node's price can be reduced
  // during the current iteration, used only for slack
  // relabelings. Where epsilon is the first iteration's error
  // parameter and C is the largest magnitude of an arc cost, we set
  //   slack_relabeling_price_ = -B(C, epsilon)
  //                           = (C + epsilon) * (n-2)/2.
  //
  // We could use slack_relabeling_price_ for feasibility detection
  // but the feasibility threshold is double the slack relabeling
  // amount and we judge it not to be worth having to multiply by two
  // gratuitously to check feasibility in each double push
  // operation. Instead we settle for feasibility detection using
  // price_lower_bound_ instead, which is somewhat slower in the
  // infeasible case because more relabelings will be required for
  // some node price to attain the looser bound.
  CostValue slack_relabeling_price_;

  // Computes the value of the bound on price reduction for an
  // iteration, given the old and new values of epsilon_.  Because the
  // expression computed here is used in at least one place where we
  // want an additional factor in the denominator, we take that factor
  // as an argument. If extra_divisor == 1, this function computes of
  // the function B() discussed above.
  //
  // Avoids overflow in computing the bound, and sets *in_range =
  // false if the value of the bound doesn't fit in CostValue.
  inline CostValue PriceChangeBound(CostValue old_epsilon,
                                    CostValue new_epsilon,
                                    bool* in_range) const {
    const CostValue n = graph_->num_nodes();
    // We work in double-precision floating point to determine whether
    // we'll overflow the integral CostValue type's range of
    // representation. Switching between integer and double is a
    // rather expensive operation, but we do this only twice per
    // scaling iteration, so we can afford it rather than resort to
    // complex and subtle tricks within the bounds of integer
    // arithmetic.
    //
    // You will want to read the comments above about
    // price_lower_bound_ and slack_relabeling_price_, and have a
    // pencil handy. :-)
    const double result =
        static_cast<double>(std::max<CostValue>(1, n / 2 - 1)) *
        (static_cast<double>(old_epsilon) + static_cast<double>(new_epsilon));
    const double limit =
        static_cast<double>(std::numeric_limits<CostValue>::max());
    if (result > limit) {
      // Our integer computations could overflow.
      if (in_range != nullptr) *in_range = false;
      return std::numeric_limits<CostValue>::max();
    } else {
      // Don't touch *in_range; other computations could already have
      // set it to false and we don't want to overwrite that result.
      return static_cast<CostValue>(result);
    }
  }

  // A scaled record of the largest arc-cost magnitude we've been
  // given during problem setup. This is used to set the initial value
  // of epsilon_, which in turn is used not only as the error
  // parameter but also to determine whether we risk arithmetic
  // overflow during the algorithm.
  //
  // Note: Our treatment of arithmetic overflow assumes the following
  // property of CostValue:
  //   -std::numeric_limits<CostValue>::max() is a representable
  //   CostValue.
  // That property is satisfied if CostValue uses a two's-complement
  // representation.
  CostValue largest_scaled_cost_magnitude_;

  // The total excess in the graph. Given our asymmetric definition of
  // epsilon-optimality and our use of the double-push operation, this
  // equals the number of unmatched left-side nodes.
  NodeIndex total_excess_;

  // Indexed by node index, the price_ values are maintained only for
  // right-side nodes.
  //
  // Note: We use a ZVector to only allocate a vector of size num_left_nodes_
  // instead of 2*num_left_nodes_ since the right-side node indices start at
  // num_left_nodes_.
  ZVector<CostValue> price_;

  // Indexed by left-side node index, the matched_arc_ array gives the
  // arc index of the arc matching any given left-side node, or
  // GraphType::kNilArc if the node is unmatched.
  std::vector<ArcIndex> matched_arc_;

  // Indexed by right-side node index, the matched_node_ array gives
  // the node index of the left-side node matching any given
  // right-side node, or GraphType::kNilNode if the right-side node is
  // unmatched.
  //
  // Note: We use a ZVector for the same reason as for price_.
  ZVector<NodeIndex> matched_node_;

  // The array of arc costs as given in the problem definition, except
  // that they are scaled up by the number of nodes in the graph so we
  // can use integer arithmetic throughout.
  std::vector<CostValue> scaled_arc_cost_;

  // The container of active nodes (i.e., unmatched nodes). This can
  // be switched easily between ActiveNodeStack and ActiveNodeQueue
  // for experimentation.
  std::unique_ptr<ActiveNodeContainerInterface> active_nodes_;

  // Statistics giving the overall numbers of various operations the
  // algorithm performs.
  Stats total_stats_;

  // Statistics giving the numbers of various operations the algorithm
  // has performed in the current iteration.
  Stats iteration_stats_;

  DISALLOW_COPY_AND_ASSIGN(LinearSumAssignment);
};

// Implementation of out-of-line LinearSumAssignment template member
// functions.

template <typename GraphType>
const CostValue LinearSumAssignment<GraphType>::kMinEpsilon = 1;

template <typename GraphType>
LinearSumAssignment<GraphType>::LinearSumAssignment(
    const GraphType& graph, const NodeIndex num_left_nodes)
    : graph_(&graph),
      num_left_nodes_(num_left_nodes),
      success_(false),
      cost_scaling_factor_(1 + num_left_nodes),
      alpha_(FLAGS_assignment_alpha),
      epsilon_(0),
      price_lower_bound_(0),
      slack_relabeling_price_(0),
      largest_scaled_cost_magnitude_(0),
      total_excess_(0),
      price_(num_left_nodes, 2 * num_left_nodes - 1),
      matched_arc_(num_left_nodes, 0),
      matched_node_(num_left_nodes, 2 * num_left_nodes - 1),
      scaled_arc_cost_(graph.max_end_arc_index(), 0),
      active_nodes_(FLAGS_assignment_stack_order
                        ? static_cast<ActiveNodeContainerInterface*>(
                              new ActiveNodeStack())
                        : static_cast<ActiveNodeContainerInterface*>(
                              new ActiveNodeQueue())) {}

template <typename GraphType>
LinearSumAssignment<GraphType>::LinearSumAssignment(
    const NodeIndex num_left_nodes, const ArcIndex num_arcs)
    : graph_(nullptr),
      num_left_nodes_(num_left_nodes),
      success_(false),
      cost_scaling_factor_(1 + num_left_nodes),
      alpha_(FLAGS_assignment_alpha),
      epsilon_(0),
      price_lower_bound_(0),
      slack_relabeling_price_(0),
      largest_scaled_cost_magnitude_(0),
      total_excess_(0),
      price_(num_left_nodes, 2 * num_left_nodes - 1),
      matched_arc_(num_left_nodes, 0),
      matched_node_(num_left_nodes, 2 * num_left_nodes - 1),
      scaled_arc_cost_(num_arcs, 0),
      active_nodes_(FLAGS_assignment_stack_order
                        ? static_cast<ActiveNodeContainerInterface*>(
                              new ActiveNodeStack())
                        : static_cast<ActiveNodeContainerInterface*>(
                              new ActiveNodeQueue())) {}

template <typename GraphType>
void LinearSumAssignment<GraphType>::SetArcCost(ArcIndex arc, CostValue cost) {
  if (graph_ != nullptr) {
    DCHECK_GE(arc, 0);
    DCHECK_LT(arc, graph_->num_arcs());
    NodeIndex head = Head(arc);
    DCHECK_LE(num_left_nodes_, head);
  }
  cost *= cost_scaling_factor_;
  const CostValue cost_magnitude = std::abs(cost);
  largest_scaled_cost_magnitude_ =
      std::max(largest_scaled_cost_magnitude_, cost_magnitude);
  scaled_arc_cost_[arc] = cost;
}

template <typename ArcIndexType>
class CostValueCycleHandler : public PermutationCycleHandler<ArcIndexType> {
 public:
  explicit CostValueCycleHandler(std::vector<CostValue>* cost)
      : temp_(0), cost_(cost) {}

  void SetTempFromIndex(ArcIndexType source) override {
    temp_ = (*cost_)[source];
  }

  void SetIndexFromIndex(ArcIndexType source,
                         ArcIndexType destination) const override {
    (*cost_)[destination] = (*cost_)[source];
  }

  void SetIndexFromTemp(ArcIndexType destination) const override {
    (*cost_)[destination] = temp_;
  }

  ~CostValueCycleHandler() override {}

 private:
  CostValue temp_;
  std::vector<CostValue>* const cost_;

  DISALLOW_COPY_AND_ASSIGN(CostValueCycleHandler);
};

// Logically this class should be defined inside OptimizeGraphLayout,
// but compilation fails if we do that because C++98 doesn't allow
// instantiation of member templates with function-scoped types as
// template parameters, which in turn is because those function-scoped
// types lack linkage.
template <typename GraphType>
class ArcIndexOrderingByTailNode {
 public:
  explicit ArcIndexOrderingByTailNode(const GraphType& graph) : graph_(graph) {}

  // Says ArcIndex a is less than ArcIndex b if arc a's tail is less
  // than arc b's tail. If their tails are equal, orders according to
  // heads.
  bool operator()(typename GraphType::ArcIndex a,
                  typename GraphType::ArcIndex b) const {
    return ((graph_.Tail(a) < graph_.Tail(b)) ||
            ((graph_.Tail(a) == graph_.Tail(b)) &&
             (graph_.Head(a) < graph_.Head(b))));
  }

 private:
  const GraphType& graph_;

  // Copy and assign are allowed; they have to be for STL to work
  // with this functor, although it seems like a bug for STL to be
  // written that way.
};

// Passes ownership of the cycle handler to the caller.
template <typename GraphType>
PermutationCycleHandler<typename GraphType::ArcIndex>*
LinearSumAssignment<GraphType>::ArcAnnotationCycleHandler() {
  return new CostValueCycleHandler<typename GraphType::ArcIndex>(
      &scaled_arc_cost_);
}

template <typename GraphType>
void LinearSumAssignment<GraphType>::OptimizeGraphLayout(GraphType* graph) {
  // The graph argument is only to give us a non-const-qualified
  // handle on the graph we already have. Any different graph is
  // nonsense.
  DCHECK_EQ(graph_, graph);
  const ArcIndexOrderingByTailNode<GraphType> compare(*graph_);
  CostValueCycleHandler<typename GraphType::ArcIndex> cycle_handler(
      &scaled_arc_cost_);
  TailArrayManager<GraphType> tail_array_manager(graph);
  tail_array_manager.BuildTailArrayFromAdjacencyListsIfForwardGraph();
  graph->GroupForwardArcsByFunctor(compare, &cycle_handler);
  tail_array_manager.ReleaseTailArrayIfForwardGraph();
}

template <typename GraphType>
CostValue LinearSumAssignment<GraphType>::NewEpsilon(
    const CostValue current_epsilon) const {
  return std::max(current_epsilon / alpha_, kMinEpsilon);
}

template <typename GraphType>
bool LinearSumAssignment<GraphType>::UpdateEpsilon() {
  CostValue new_epsilon = NewEpsilon(epsilon_);
  slack_relabeling_price_ = PriceChangeBound(epsilon_, new_epsilon, nullptr);
  epsilon_ = new_epsilon;
  VLOG(3) << "Updated: epsilon_ == " << epsilon_;
  VLOG(4) << "slack_relabeling_price_ == " << slack_relabeling_price_;
  DCHECK_GT(slack_relabeling_price_, 0);
  // For today we always return true; in the future updating epsilon
  // in sophisticated ways could conceivably detect infeasibility
  // before the first iteration of Refine().
  return true;
}

// For production code that checks whether a left-side node is active.
template <typename GraphType>
inline bool LinearSumAssignment<GraphType>::IsActive(
    NodeIndex left_node) const {
  DCHECK_LT(left_node, num_left_nodes_);
  return matched_arc_[left_node] == GraphType::kNilArc;
}

// Only for debugging. Separate from the production IsActive() method
// so that method can assert that its argument is a left-side node,
// while for debugging we need to be able to test any node.
template <typename GraphType>
inline bool LinearSumAssignment<GraphType>::IsActiveForDebugging(
    NodeIndex node) const {
  if (node < num_left_nodes_) {
    return IsActive(node);
  } else {
    return matched_node_[node] == GraphType::kNilNode;
  }
}

template <typename GraphType>
void LinearSumAssignment<GraphType>::InitializeActiveNodeContainer() {
  DCHECK(active_nodes_->Empty());
  for (BipartiteLeftNodeIterator node_it(*graph_, num_left_nodes_);
       node_it.Ok(); node_it.Next()) {
    const NodeIndex node = node_it.Index();
    if (IsActive(node)) {
      active_nodes_->Add(node);
    }
  }
}

// There exists a price function such that the admissible arcs at the
// beginning of an iteration are exactly the reverse arcs of all
// matching arcs. Saturating all admissible arcs with respect to that
// price function therefore means simply unmatching every matched
// node.
//
// In the future we will price out arcs, which will reduce the set of
// nodes we unmatch here. If a matching arc is priced out, we will not
// unmatch its endpoints since that element of the matching is
// guaranteed not to change.
template <typename GraphType>
void LinearSumAssignment<GraphType>::SaturateNegativeArcs() {
  total_excess_ = 0;
  for (BipartiteLeftNodeIterator node_it(*graph_, num_left_nodes_);
       node_it.Ok(); node_it.Next()) {
    const NodeIndex node = node_it.Index();
    if (IsActive(node)) {
      // This can happen in the first iteration when nothing is
      // matched yet.
      total_excess_ += 1;
    } else {
      // We're about to create a unit of excess by unmatching these nodes.
      total_excess_ += 1;
      const NodeIndex mate = GetMate(node);
      matched_arc_[node] = GraphType::kNilArc;
      matched_node_[mate] = GraphType::kNilNode;
    }
  }
}

// Returns true for success, false for infeasible.
template <typename GraphType>
bool LinearSumAssignment<GraphType>::DoublePush(NodeIndex source) {
  DCHECK_GT(num_left_nodes_, source);
  DCHECK(IsActive(source)) << "Node " << source
                           << "must be active (unmatched)!";
  ImplicitPriceSummary summary = BestArcAndGap(source);
  const ArcIndex best_arc = summary.first;
  const CostValue gap = summary.second;
  // Now we have the best arc incident to source, i.e., the one with
  // minimum reduced cost. Match that arc, unmatching its head if
  // necessary.
  if (best_arc == GraphType::kNilArc) {
    return false;
  }
  const NodeIndex new_mate = Head(best_arc);
  const NodeIndex to_unmatch = matched_node_[new_mate];
  if (to_unmatch != GraphType::kNilNode) {
    // Unmatch new_mate from its current mate, pushing the unit of
    // flow back to a node on the left side as a unit of excess.
    matched_arc_[to_unmatch] = GraphType::kNilArc;
    active_nodes_->Add(to_unmatch);
    // This counts as a double push.
    iteration_stats_.double_pushes_ += 1;
  } else {
    // We are about to increase the cardinality of the matching.
    total_excess_ -= 1;
    // This counts as a single push.
    iteration_stats_.pushes_ += 1;
  }
  matched_arc_[source] = best_arc;
  matched_node_[new_mate] = source;
  // Finally, relabel new_mate.
  iteration_stats_.relabelings_ += 1;
  const CostValue new_price = price_[new_mate] - gap - epsilon_;
  price_[new_mate] = new_price;
  return new_price >= price_lower_bound_;
}

template <typename GraphType>
bool LinearSumAssignment<GraphType>::Refine() {
  SaturateNegativeArcs();
  InitializeActiveNodeContainer();
  while (total_excess_ > 0) {
    // Get an active node (i.e., one with excess == 1) and discharge
    // it using DoublePush.
    const NodeIndex node = active_nodes_->Get();
    if (!DoublePush(node)) {
      // Infeasibility detected.
      //
      // If infeasibility is detected after the first iteration, we
      // have a bug. We don't crash production code in this case but
      // we know we're returning a wrong answer so we we leave a
      // message in the logs to increase our hope of chasing down the
      // problem.
      LOG_IF(DFATAL, total_stats_.refinements_ > 0)
          << "Infeasibility detection triggered after first iteration found "
          << "a feasible assignment!";
      return false;
    }
  }
  DCHECK(active_nodes_->Empty());
  iteration_stats_.refinements_ += 1;
  return true;
}

// Computes best_arc, the minimum reduced-cost arc incident to
// left_node and admissibility_gap, the amount by which the reduced
// cost of best_arc must be increased to make it equal in reduced cost
// to another residual arc incident to left_node.
//
// Precondition: left_node is unmatched and has at least one incident
// arc. This allows us to simplify the code. The debug-only
// counterpart to this routine is LinearSumAssignment::ImplicitPrice()
// and it assumes there is an incident arc but does not assume the
// node is unmatched. The condition that each left node has at least
// one incident arc is explicitly computed during FinalizeSetup().
//
// This function is large enough that our suggestion that the compiler
// inline it might be pointless.
template <typename GraphType>
inline typename LinearSumAssignment<GraphType>::ImplicitPriceSummary
LinearSumAssignment<GraphType>::BestArcAndGap(NodeIndex left_node) const {
  DCHECK(IsActive(left_node))
      << "Node " << left_node << " must be active (unmatched)!";
  DCHECK_GT(epsilon_, 0);
  typename GraphType::OutgoingArcIterator arc_it(*graph_, left_node);
  ArcIndex best_arc = arc_it.Index();
  CostValue min_partial_reduced_cost = PartialReducedCost(best_arc);
  // We choose second_min_partial_reduced_cost so that in the case of
  // the largest possible gap (which results from a left-side node
  // with only a single incident residual arc), the corresponding
  // right-side node will be relabeled by an amount that exactly
  // matches slack_relabeling_price_.
  const CostValue max_gap = slack_relabeling_price_ - epsilon_;
  CostValue second_min_partial_reduced_cost =
      min_partial_reduced_cost + max_gap;
  for (arc_it.Next(); arc_it.Ok(); arc_it.Next()) {
    const ArcIndex arc = arc_it.Index();
    const CostValue partial_reduced_cost = PartialReducedCost(arc);
    if (partial_reduced_cost < second_min_partial_reduced_cost) {
      if (partial_reduced_cost < min_partial_reduced_cost) {
        best_arc = arc;
        second_min_partial_reduced_cost = min_partial_reduced_cost;
        min_partial_reduced_cost = partial_reduced_cost;
      } else {
        second_min_partial_reduced_cost = partial_reduced_cost;
      }
    }
  }
  const CostValue gap = std::min<CostValue>(
      second_min_partial_reduced_cost - min_partial_reduced_cost, max_gap);
  DCHECK_GE(gap, 0);
  return std::make_pair(best_arc, gap);
}

// Only for debugging.
//
// Requires the precondition, explicitly computed in FinalizeSetup(),
// that every left-side node has at least one incident arc.
template <typename GraphType>
inline CostValue LinearSumAssignment<GraphType>::ImplicitPrice(
    NodeIndex left_node) const {
  DCHECK_GT(num_left_nodes_, left_node);
  DCHECK_GT(epsilon_, 0);
  typename GraphType::OutgoingArcIterator arc_it(*graph_, left_node);
  // We must not execute this method if left_node has no incident arc.
  DCHECK(arc_it.Ok());
  ArcIndex best_arc = arc_it.Index();
  if (best_arc == matched_arc_[left_node]) {
    arc_it.Next();
    if (arc_it.Ok()) {
      best_arc = arc_it.Index();
    }
  }
  CostValue min_partial_reduced_cost = PartialReducedCost(best_arc);
  if (!arc_it.Ok()) {
    // Only one arc is incident to left_node, and the node is
    // currently matched along that arc, which must be the case in any
    // feasible solution. Therefore we implicitly price this node so
    // low that we will never consider unmatching it.
    return -(min_partial_reduced_cost + slack_relabeling_price_);
  }
  for (arc_it.Next(); arc_it.Ok(); arc_it.Next()) {
    const ArcIndex arc = arc_it.Index();
    if (arc != matched_arc_[left_node]) {
      const CostValue partial_reduced_cost = PartialReducedCost(arc);
      if (partial_reduced_cost < min_partial_reduced_cost) {
        min_partial_reduced_cost = partial_reduced_cost;
      }
    }
  }
  return -min_partial_reduced_cost;
}

// Only for debugging.
template <typename GraphType>
bool LinearSumAssignment<GraphType>::AllMatched() const {
  for (NodeIndex node = 0; node < graph_->num_nodes(); ++node) {
    if (IsActiveForDebugging(node)) {
      return false;
    }
  }
  return true;
}

// Only for debugging.
template <typename GraphType>
bool LinearSumAssignment<GraphType>::EpsilonOptimal() const {
  for (BipartiteLeftNodeIterator node_it(*graph_, num_left_nodes_);
       node_it.Ok(); node_it.Next()) {
    const NodeIndex left_node = node_it.Index();
    // Get the implicit price of left_node and make sure the reduced
    // costs of left_node's incident arcs are in bounds.
    CostValue left_node_price = ImplicitPrice(left_node);
    for (typename GraphType::OutgoingArcIterator arc_it(*graph_, left_node);
         arc_it.Ok(); arc_it.Next()) {
      const ArcIndex arc = arc_it.Index();
      const CostValue reduced_cost = left_node_price + PartialReducedCost(arc);
      // Note the asymmetric definition of epsilon-optimality that we
      // use because it means we can saturate all admissible arcs in
      // the beginning of Refine() just by unmatching all matched
      // nodes.
      if (matched_arc_[left_node] == arc) {
        // The reverse arc is residual. Epsilon-optimality requires
        // that the reduced cost of the forward arc be at most
        // epsilon_.
        if (reduced_cost > epsilon_) {
          return false;
        }
      } else {
        // The forward arc is residual. Epsilon-optimality requires
        // that the reduced cost of the forward arc be at least zero.
        if (reduced_cost < 0) {
          return false;
        }
      }
    }
  }
  return true;
}

template <typename GraphType>
bool LinearSumAssignment<GraphType>::FinalizeSetup() {
  incidence_precondition_satisfied_ = true;
  // epsilon_ must be greater than kMinEpsilon so that in the case
  // where the largest arc cost is zero, we still do a Refine()
  // iteration.
  epsilon_ = std::max(largest_scaled_cost_magnitude_, kMinEpsilon + 1);
  VLOG(2) << "Largest given cost magnitude: "
          << largest_scaled_cost_magnitude_ / cost_scaling_factor_;
  // Initialize left-side node-indexed arrays and check incidence
  // precondition.
  for (NodeIndex node = 0; node < num_left_nodes_; ++node) {
    matched_arc_[node] = GraphType::kNilArc;
    typename GraphType::OutgoingArcIterator arc_it(*graph_, node);
    if (!arc_it.Ok()) {
      incidence_precondition_satisfied_ = false;
    }
  }
  // Initialize right-side node-indexed arrays. Example: prices are
  // stored only for right-side nodes.
  for (NodeIndex node = num_left_nodes_; node < graph_->num_nodes(); ++node) {
    price_[node] = 0;
    matched_node_[node] = GraphType::kNilNode;
  }
  bool in_range = true;
  double double_price_lower_bound = 0.0;
  CostValue new_error_parameter;
  CostValue old_error_parameter = epsilon_;
  do {
    new_error_parameter = NewEpsilon(old_error_parameter);
    double_price_lower_bound -=
        2.0 * static_cast<double>(PriceChangeBound(
                  old_error_parameter, new_error_parameter, &in_range));
    old_error_parameter = new_error_parameter;
  } while (new_error_parameter != kMinEpsilon);
  const double limit =
      -static_cast<double>(std::numeric_limits<CostValue>::max());
  if (double_price_lower_bound < limit) {
    in_range = false;
    price_lower_bound_ = -std::numeric_limits<CostValue>::max();
  } else {
    price_lower_bound_ = static_cast<CostValue>(double_price_lower_bound);
  }
  VLOG(4) << "price_lower_bound_ == " << price_lower_bound_;
  DCHECK_LE(price_lower_bound_, 0);
  if (!in_range) {
    LOG(WARNING) << "Price change bound exceeds range of representable "
                 << "costs; arithmetic overflow is not ruled out and "
                 << "infeasibility might go undetected.";
  }
  return in_range;
}

template <typename GraphType>
void LinearSumAssignment<GraphType>::ReportAndAccumulateStats() {
  total_stats_.Add(iteration_stats_);
  VLOG(3) << "Iteration stats: " << iteration_stats_.StatsString();
  iteration_stats_.Clear();
}

template <typename GraphType>
bool LinearSumAssignment<GraphType>::ComputeAssignment() {
  CHECK(graph_ != nullptr);
  bool ok = graph_->num_nodes() == 2 * num_left_nodes_;
  if (!ok) return false;
  // Note: FinalizeSetup() might have been called already by white-box
  // test code or by a client that wants to react to the possibility
  // of overflow before solving the given problem, but FinalizeSetup()
  // is idempotent and reasonably fast, so we call it unconditionally
  // here.
  FinalizeSetup();
  ok = ok && incidence_precondition_satisfied_;
  DCHECK(!ok || EpsilonOptimal());
  while (ok && epsilon_ > kMinEpsilon) {
    ok = ok && UpdateEpsilon();
    ok = ok && Refine();
    ReportAndAccumulateStats();
    DCHECK(!ok || EpsilonOptimal());
    DCHECK(!ok || AllMatched());
  }
  success_ = ok;
  VLOG(1) << "Overall stats: " << total_stats_.StatsString();
  return ok;
}

template <typename GraphType>
CostValue LinearSumAssignment<GraphType>::GetCost() const {
  // It is illegal to call this method unless we successfully computed
  // an optimum assignment.
  DCHECK(success_);
  CostValue cost = 0;
  for (BipartiteLeftNodeIterator node_it(*this); node_it.Ok(); node_it.Next()) {
    cost += GetAssignmentCost(node_it.Index());
  }
  return cost;
}

}  // namespace operations_research

#endif  // OR_TOOLS_GRAPH_LINEAR_ASSIGNMENT_H_