Other formats:
BibTeX
LaTeX
RIS
@inproceedings{2298218, author = {Ajdarów, Michal and Brlej, Šimon and Novotný, Petr}, address = {Washington, DC, USA}, booktitle = {Proceedings of the 37th AAAI Conference on Artificial Intelligence}, doi = {http://dx.doi.org/10.1609/aaai.v37i12.26715}, editor = {Brian Williams, Yiling Chen, Jennifer Neville}, keywords = {decision making; Markov decision processes; controller synthesis; resource constraints; shielding}, howpublished = {elektronická verze "online"}, language = {eng}, location = {Washington, DC, USA}, isbn = {978-1-57735-880-0}, pages = {14674-14682}, publisher = {AAAI Press}, title = {Shielding in Resource-Constrained Goal POMDPs}, url = {https://ojs.aaai.org/index.php/AAAI/article/view/26715}, year = {2023} }
TY - JOUR ID - 2298218 AU - Ajdarów, Michal - Brlej, Šimon - Novotný, Petr PY - 2023 TI - Shielding in Resource-Constrained Goal POMDPs PB - AAAI Press CY - Washington, DC, USA SN - 9781577358800 KW - decision making KW - Markov decision processes KW - controller synthesis KW - resource constraints KW - shielding UR - https://ojs.aaai.org/index.php/AAAI/article/view/26715 N2 - We consider partially observable Markov decision processes (POMDPs) modeling an agent that needs a supply of a certain resource (e.g., electricity stored in batteries) to operate correctly. The resource is consumed by the agent's actions and can be replenished only in certain states. The agent aims to minimize the expected cost of reaching some goal while preventing resource exhaustion, a problem we call resource-constrained goal optimization (RSGO). We take a two-step approach to the RSGO problem. First, using formal methods techniques, we design an algorithm computing a shield for a given scenario: a procedure that observes the agent and prevents it from using actions that might eventually lead to resource exhaustion. Second, we augment the POMCP heuristic search algorithm for POMDP planning with our shields to obtain an algorithm solving the RSGO problem. We implement our algorithm and present experiments showing its applicability to benchmarks from the literature. ER -
AJDARÓW, Michal, Šimon BRLEJ and Petr NOVOTNÝ. Shielding in Resource-Constrained Goal POMDPs. Online. In Brian Williams, Yiling Chen, Jennifer Neville. \textit{Proceedings of the 37th AAAI Conference on Artificial Intelligence}. Washington, DC, USA: AAAI Press, 2023, p.~14674-14682. ISBN~978-1-57735-880-0. Available from: https://dx.doi.org/10.1609/aaai.v37i12.26715.
|