@inproceedings{d6f5be4cc1f6417db0c015ac4765b69e,
title = "No more hand-tuning rewards: Masked constrained policy optimization for safe reinforcement learning",
abstract = "In safe Reinforcement Learning (RL), the agent attempts to find policies which maximize the expectation of accumulated rewards and guarantee its safety to remain above a given threshold. Hence, it is straightforward to formalize safe RL problems by both a reward function and a safety constraint.We define safety as the probability of survival in environments where taking risky actions could lead to early termination of the task. Although the optimization problem is already constrained by a safety threshold, reward signals related to unsafe terminal states influence the original maximization objective of the task. Selecting the appropriate value of these signals is often a time consuming and challenging reward engineering task, which requires expert knowledge of the domain. This paper presents a safe RL algorithm, called Masked Constrained Policy Optimization (MCPO), in which the learning process is constrained by safety and excludes the unsafe reward signals. We develop MCPO as an extension of gradient-based policy search methods, in which the updates of the policy and the expected reward models are masked. Our method benefits from having a high probability of satisfying the given constraints for every policy in the learning process. We validate the proposed algorithm in two continuous tasks. Our findings prove the proposed algorithm is able to neglect unsafe reward signals, and thereby resolving the desired safety-performance trade-off without having the need for hand-tuning rewards.",
keywords = "Constrained Policy Optimization, Reward Engineering, Safe Reinforcement Learning",
author = "{Van Havermaet}, Stef and Yara Khaluf and Pieter Simoens",
year = "2021",
language = "English",
series = "Proceedings of the International Joint Conference on Autonomous Agents and Multiagent Systems, AAMAS",
publisher = "International Foundation for Autonomous Agents and Multiagent Systems (IFAAMAS)",
pages = "1332--1340",
booktitle = "20th International Conference on Autonomous Agents and Multiagent Systems, AAMAS 2021",
note = "20th International Conference on Autonomous Agents and Multiagent Systems, AAMAS 2021 ; Conference date: 03-05-2021 Through 07-05-2021",
}