pub_bandit_exp3.h 4.09 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/*                                                                           */
/*                  This file is part of the program and library             */
/*         SCIP --- Solving Constraint Integer Programs                      */
/*                                                                           */
/*    Copyright (C) 2002-2020 Konrad-Zuse-Zentrum                            */
/*                            fuer Informationstechnik Berlin                */
/*                                                                           */
/*  SCIP is distributed under the terms of the ZIB Academic License.         */
/*                                                                           */
/*  You should have received a copy of the ZIB Academic License              */
/*  along with SCIP; see the file COPYING. If not visit scipopt.org.         */
/*                                                                           */
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */

/**@file   pub_bandit_exp3.h
 * @ingroup PublicBanditMethods
 * @brief  public methods for Exp.3
 * @author Gregor Hendel
 */

/*---+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+----9----+----0----+----1----+----2*/

#ifndef SRC_SCIP_PUB_BANDIT_EXP3_H_
#define SRC_SCIP_PUB_BANDIT_EXP3_H_

#include "scip/def.h"
#include "scip/type_bandit.h"
#include "scip/type_retcode.h"
#include "scip/type_scip.h"

#ifdef __cplusplus
extern "C" {
#endif

/**@addtogroup PublicBanditMethods
 *
 * ## Exp.3
 *
 * Exp.3 is a randomized selection method for the multi-armed bandit problem
 *
 * Exp3 maintains a probability distribution
 * according to which an action is drawn
 * in every iteration.
 * The probability distribution is a mixture between
 * a uniform distribution and a softmax distribution
 * based on the cumulative rewards of the actions.
 * The weight of the uniform distribution in the mixture
 * is controlled by the parameter \f$ \gamma \f$, ie.,
 * setting \f$ \gamma = 1\f$ uses a uniform distribution
 * in every selection step.
 * The cumulative reward for the actions can be
 * fine-tuned by adding a general bias for all actions.
 * The bias is given by the parameter \f$ \beta \f$.
 *
 * @{
 */

/** creates and resets an Exp.3 bandit algorithm using \p scip pointer */
SCIP_EXPORT
SCIP_RETCODE SCIPcreateBanditExp3(
   SCIP*                 scip,               /**< SCIP data structure */
   SCIP_BANDIT**         exp3,               /**< pointer to store bandit algorithm */
   SCIP_Real*            priorities,         /**< nonnegative priorities for each action, or NULL if not needed */
   SCIP_Real             gammaparam,         /**< weight between uniform (gamma ~ 1) and weight driven (gamma ~ 0) probability distribution */
   SCIP_Real             beta,               /**< gain offset between 0 and 1 at every observation */
   int                   nactions,           /**< the positive number of actions for this bandit algorithm */
   unsigned int          initseed            /**< initial seed for random number generation */
   );

/** set gamma parameter of Exp.3 bandit algorithm to increase weight of uniform distribution */
SCIP_EXPORT
void SCIPsetGammaExp3(
   SCIP_BANDIT*          exp3,               /**< bandit algorithm */
   SCIP_Real             gammaparam          /**< weight between uniform (gamma ~ 1) and weight driven (gamma ~ 0) probability distribution */
   );

/** set beta parameter of Exp.3 bandit algorithm to increase gain offset for actions that were not played */
SCIP_EXPORT
void SCIPsetBetaExp3(
   SCIP_BANDIT*          exp3,               /**< bandit algorithm */
   SCIP_Real             beta                /**< gain offset between 0 and 1 at every observation */
   );

/** returns probability to play an action */
SCIP_EXPORT
SCIP_Real SCIPgetProbabilityExp3(
   SCIP_BANDIT*          exp3,               /**< bandit algorithm */
   int                   action              /**< index of the requested action */
   );

/** @}*/

#ifdef __cplusplus
}
#endif

#endif