Rewards¶
Understanding and customizing reward functions in EldenGym.
Default Reward Function¶
The default reward encourages defeating the boss while staying alive:
def default_reward(obs, info, terminated, truncated):
reward = 0.0
# Reward for damaging boss
if 'target_hp_delta' in info and info['target_hp_delta'] < 0:
reward += abs(info['target_hp_delta']) / 10.0
# Penalty for taking damage
if 'player_hp_delta' in info and info['player_hp_delta'] < 0:
reward += info['player_hp_delta'] / 10.0 # Negative value
# Big bonus for defeating boss
if terminated and info.get('target_hp', 0) <= 0:
reward += 100.0
# Penalty for dying
if terminated and info.get('player_hp', 0) <= 0:
reward -= 50.0
return reward
Custom Reward Functions¶
Signature¶
def my_reward_function(obs, info, terminated, truncated):
"""
Args:
obs: Current observation (np.ndarray or dict)
info: Info dictionary with game state
terminated: Whether episode ended
truncated: Whether episode was truncated
Returns:
float: Reward value
"""
reward = 0.0
# Your logic here
return reward
Usage¶
Reward Design Examples¶
Aggressive Play¶
Encourage attacking:
def aggressive_reward(obs, info, terminated, truncated):
reward = 0.0
# Big reward for boss damage
if 'target_hp_delta' in info and info['target_hp_delta'] < 0:
reward += abs(info['target_hp_delta']) * 2.0 # 2x multiplier
# Small penalty for taking damage
if 'player_hp_delta' in info and info['player_hp_delta'] < 0:
reward += info['player_hp_delta'] * 0.1 # Only 0.1x
# Huge bonus for winning
if terminated and info.get('target_hp', 0) <= 0:
reward += 500.0
return reward
Defensive Play¶
Encourage survival:
def defensive_reward(obs, info, terminated, truncated):
reward = 0.0
# Reward for staying alive (per step)
reward += 0.1
# Moderate reward for boss damage
if 'target_hp_delta' in info and info['target_hp_delta'] < 0:
reward += abs(info['target_hp_delta']) * 0.5
# Big penalty for taking damage
if 'player_hp_delta' in info and info['player_hp_delta'] < 0:
reward += info['player_hp_delta'] * 5.0 # 5x penalty
# Bonus for winning without damage
if terminated and info.get('target_hp', 0) <= 0:
hp_ratio = info['player_hp'] / info['player_max_hp']
reward += 100.0 * hp_ratio # Bonus scales with HP remaining
return reward
Distance-Based¶
Encourage maintaining optimal distance:
def distance_reward(obs, info, terminated, truncated):
reward = 0.0
# Optimal distance: 5-10 units
distance = info.get('distance', 0)
if 5 <= distance <= 10:
reward += 1.0 # Good positioning
elif distance < 3:
reward -= 0.5 # Too close
elif distance > 15:
reward -= 0.5 # Too far
# Standard combat rewards
if 'target_hp_delta' in info and info['target_hp_delta'] < 0:
reward += abs(info['target_hp_delta']) / 10.0
return reward
Time-Based¶
Encourage speed:
def speed_reward(obs, info, terminated, truncated):
reward = 0.0
# Penalty for each step (encourage speed)
reward -= 0.01
# Standard damage rewards
if 'target_hp_delta' in info and info['target_hp_delta'] < 0:
reward += abs(info['target_hp_delta']) / 10.0
# Huge bonus for fast win
if terminated and info.get('target_hp', 0) <= 0:
steps = info.get('step_count', 1000)
reward += 200.0 * (1000 / max(steps, 1)) # Bonus inversely proportional to steps
return reward
Shaped Reward¶
Dense rewards for learning:
def shaped_reward(obs, info, terminated, truncated):
reward = 0.0
# HP-based shaping
player_hp_ratio = info['player_hp'] / info['player_max_hp']
target_hp_ratio = info['target_hp'] / info['target_max_hp']
# Reward for HP advantage
reward += (player_hp_ratio - target_hp_ratio) * 0.1
# Distance shaping
distance = info.get('distance', 0)
if distance > 0:
reward += 1.0 / (distance + 1) # Closer = better
# Combat rewards
if 'target_hp_delta' in info:
reward += abs(info['target_hp_delta']) * 1.0
if 'player_hp_delta' in info:
reward += info['player_hp_delta'] * 2.0
# Terminal rewards
if terminated:
if info.get('target_hp', 0) <= 0:
reward += 100.0
else:
reward -= 50.0
return reward
Available Info Fields¶
Use these in your reward function:
info['player_hp'] # Current player HP
info['player_max_hp'] # Max player HP
info['target_hp'] # Current boss HP
info['target_max_hp'] # Max boss HP
info['distance'] # Distance to boss
info['player_animation_id'] # Player animation ID
info['target_animation_id'] # Boss animation ID
info['step_count'] # Steps in current episode
# After step (not in reset):
info['player_hp_delta'] # HP change this step
info['target_hp_delta'] # Boss HP change this step
Reward Scaling¶
Normalize Rewards¶
from gymnasium.wrappers import NormalizeReward
env = gym.make("EldenGym-v0")
env = NormalizeReward(env) # Normalize rewards using running statistics
Clip Rewards¶
from gymnasium.wrappers import ClipReward
env = gym.make("EldenGym-v0")
env = ClipReward(env, min_reward=-1.0, max_reward=1.0)
Transform Rewards¶
from gymnasium.wrappers import TransformReward
env = gym.make("EldenGym-v0")
env = TransformReward(env, lambda r: np.sign(r) * np.log(1 + abs(r)))
Debugging Rewards¶
Log Rewards¶
rewards = []
obs, info = env.reset()
for _ in range(1000):
action = env.action_space.sample()
obs, reward, terminated, truncated, info = env.step(action)
rewards.append(reward)
if terminated or truncated:
break
# Analyze
import matplotlib.pyplot as plt
plt.plot(rewards)
plt.title('Rewards over Episode')
plt.xlabel('Step')
plt.ylabel('Reward')
plt.show()
print(f"Total reward: {sum(rewards)}")
print(f"Mean reward: {np.mean(rewards)}")
print(f"Max reward: {max(rewards)}")
print(f"Min reward: {min(rewards)}")
Reward Components¶
Track individual components:
def tracked_reward(obs, info, terminated, truncated):
components = {
'damage_dealt': 0.0,
'damage_taken': 0.0,
'survival': 0.0,
'terminal': 0.0,
}
if 'target_hp_delta' in info and info['target_hp_delta'] < 0:
components['damage_dealt'] = abs(info['target_hp_delta']) / 10.0
if 'player_hp_delta' in info and info['player_hp_delta'] < 0:
components['damage_taken'] = info['player_hp_delta'] / 10.0
components['survival'] = 0.01
if terminated:
if info.get('target_hp', 0) <= 0:
components['terminal'] = 100.0
else:
components['terminal'] = -50.0
# Log components (to TensorBoard, wandb, etc.)
# ...
return sum(components.values())
env = gym.make("EldenGym-v0", reward_function=tracked_reward)
Best Practices¶
- Start Simple - Begin with sparse rewards (win/loss only)
- Add Shaping Gradually - Introduce dense rewards step by step
- Balance Components - Ensure no single component dominates
- Test Thoroughly - Run random policy to check reward distribution
- Scale Appropriately - Keep rewards in reasonable range (-1 to 1 or -100 to 100)
- Avoid Reward Hacking - Test for unintended behaviors