Source code for routerl.human_learning.learning_model

import numpy as np
import random

from abc import ABC, abstractmethod

from routerl.keychain import Keychain as kc


[docs] class BaseLearningModel(ABC): """ This is an abstract base class for the learning models used to model human learning and decision-making.\n Users can create their own learning models by inheriting from this class. """ def __init__(self): pass
[docs] @abstractmethod def act(self, state) -> None: """Method to select an action based on the current state and cost. Returns: None """ pass
[docs] @abstractmethod def learn(self, state, action, reward) -> None: """Method to learn the model based on the current state and cost. Arguments: state (Any): The current state of the environment. action (Any): The action to take. reward (Any): The reward received from the environment. Returns: None """ pass
[docs] class Gawron(BaseLearningModel): """ The Gawron learning model. This model is based on: `Gawron (1998) <https://kups.ub.uni-koeln.de/9257/>`_\n In summary, it iteratively shifts the cost expectations towards the received reward.\n For decision-making, calculates action utilities based on the ``beta`` parameter and cost expectations, and selects the action with the lowest utility. Args: params (dict): A dictionary containing model parameters. initial_knowledge (list or array): Initial knowledge of cost expectations. Attributes: beta (float): A parameter representing deviations in individual decision-making. alpha_zero (float): Agent's adaptation to new experiences. alpha_j (float): Weight for previous cost expectation (1 - ALPHA_ZERO). cost (np.ndarray): Agent's cost expectations for each option. """ def __init__(self, params, initial_knowledge): super().__init__() # Extract beta with added randomness beta_randomness = params[kc.BETA_RANDOMNESS] self.beta = random.uniform(params[kc.BETA] - beta_randomness, params[kc.BETA] + beta_randomness) # Learning rate components self.alpha_zero = params[kc.ALPHA_ZERO] self.alpha_j = 1.0 - self.alpha_zero # Initialize cost array with initial knowledge self.cost = np.array(initial_knowledge, dtype=float)
[docs] def act(self, state) -> int: """Selects an action based on the cost expectations. Args: state (Any): The current state of the environment (not used). Returns: action (int): The index of the selected action. """ utilities = list(map(lambda x: np.exp(x * self.beta), self.cost)) action = utilities.index(min(utilities)) return action
[docs] def learn(self, state, action, reward) -> None: """Updates the cost associated with the taken action based on the received reward. Args: state (string): The current state of the environment (not used). action (int): The action that was taken. reward (float): The reward received after taking the action. Returns: None """ self.cost[action] = (self.alpha_j * self.cost[action]) + (self.alpha_zero * reward)
[docs] class Culo(BaseLearningModel): """ The CUmulative LOgit learning model. This model is based on: `Li et al. (2024) <https://pubsonline.informs.org/doi/abs/10.1287/trsc.2023.0132/>`_.\n In summary, it updates its cost expectations by iteratively accumulating perceived rewards.\n For decision-making, calculates action utilities based on the ``beta`` parameter and cost expectations, and selects the action with the lowest utility. Args: params (dict): A dictionary containing model parameters. initial_knowledge (list or array): Initial knowledge of cost expectations. Attributes: beta (float): A parameter representing deviations in individual decision-making. alpha_zero (float): Agent's adaptation to new experiences. alpha_j (float): Weight for previous cost expectation (constant = 1). cost (np.ndarray): Agent's cost expectations for each option. """ def __init__(self, params, initial_knowledge): super().__init__() # Extract beta with randomness beta_randomness = params[kc.BETA_RANDOMNESS] self.beta = random.uniform(params[kc.BETA] - beta_randomness, params[kc.BETA] + beta_randomness) # Learning rate components self.alpha_zero = params[kc.ALPHA_ZERO] self.alpha_j = 1 # Initialize cost array with initial knowledge self.cost = np.array(initial_knowledge, dtype=float)
[docs] def act(self, state) -> int: """Selects an action based on the cost expectations. Args: state (Any): The current state of the environment (not used). Returns: action (int): The index of the selected action. """ utilities = list(map(lambda x: np.exp(x * self.beta), self.cost)) action = utilities.index(min(utilities)) return action
[docs] def learn(self, state, action, reward) -> None: """Updates the cost associated with the taken action based on the received reward. Args: state (Any): The current state of the environment (not used). action (int): The action that was taken. reward (float): The reward received after taking the action. Returns: None """ self.cost[action] = (self.alpha_j * self.cost[action]) + (self.alpha_zero * reward)
[docs] class WeightedAverage(BaseLearningModel): """ Weighted Average learning model. Theory based on: `Cascetta (2009) <https://link.springer.com/book/10.1007/978-0-387-75857-2/>`_.\n In summary, the model uses the reward and a weighted average of the past cost expectations to update the current cost expectation.\n For decision-making, calculates action utilities based on the ``beta`` parameter and cost expectations, and selects the action with the lowest utility. Args: params (dict): A dictionary containing model parameters. initial_knowledge (list or array): Initial knowledge of cost expectations. Attributes: beta (float): A parameter representing deviations in individual decision-making. alpha_zero (float): Agent's adaptation to new experiences. alpha_j (float): Weight for previous cost expectation (1 - ALPHA_ZERO). remember (string): Memory size. cost (np.ndarray): Agent's cost expectations for each option. memory (list(list)): A list of lists containing the memory of each state. """ def __init__(self, params, initial_knowledge): super().__init__() beta_randomness = params[kc.BETA_RANDOMNESS] self.beta = random.uniform(params[kc.BETA] - beta_randomness, params[kc.BETA] + beta_randomness) self.alpha_zero = params[kc.ALPHA_ZERO] self.alpha_j = 1.0 - self.alpha_zero self.remember = params[kc.REMEMBER] self.cost = np.array(initial_knowledge, dtype=float) self.memory = [list() for _ in range(len(initial_knowledge))] self.create_memory()
[docs] def act(self, state) -> int: """Selects an action based on the cost expectations. Args: state (Any): The current state of the environment (not used). Returns: action (int): The index of the selected action. """ utilities = list(map(lambda x: np.exp(x * self.beta), self.cost)) action = utilities.index(min(utilities)) return action
[docs] def learn(self, state, action, reward) -> None: """Updates the cost associated with the taken action based on the received reward. Args: state (Any): The current state of the environment (not used). action (int): The action that was taken. reward (float): The reward received after taking the action. Returns: None """ # Drop the least relevant memory (end of list) del(self.memory[action][-1]) # Insert the most recent expected cost at index 0 self.memory[action].insert(0, self.cost[action]) # Calculate the weights of the memory # The weights are proportional to item recency alpha_j_weights = [self.alpha_j / (memory_idx + 1) for memory_idx in range(self.remember)] # If remember=3 alpha_j=.5, then alpha_j_weights = [.5/1, .5/2, .5/3]. Now normalize alpha_j_weights. alpha_j_normalized = [a_j / sum(alpha_j_weights) for a_j in alpha_j_weights] # Calculate the weighted average of the memory c_hat = 0 for memory_idx, a_j in enumerate(alpha_j_normalized): c_hat += a_j * self.memory[action][memory_idx] # Update the cost expectation of the action self.cost[action] = c_hat + (self.alpha_zero * reward)
[docs] def create_memory(self) -> None: """ Creates a memory of previous cost expectations. Returns: None """ for i in range(len(self.cost)): for _ in range(self.remember): self.memory[i].append(self.cost[i])