Other formats:
BibTeX
LaTeX
RIS
@article{1232717, author = {Brázdil, Tomáš and Brožek, Václav and Chatterjee, Krishnendu and Forejt, Vojtěch and Kučera, Antonín}, article_number = {1}, doi = {http://dx.doi.org/10.2168/LMCS-10(1:13)2014}, keywords = {Markov decision processes; mean-payoff reward; multi-objective optimisation; formal verification}, language = {eng}, issn = {1860-5974}, journal = {Logical Methods in Computer Science}, title = {Markov Decision Processes with Multiple Long-Run Average Objectives}, url = {http://www.lmcs-online.org/}, volume = {10}, year = {2014} }
TY - JOUR ID - 1232717 AU - Brázdil, Tomáš - Brožek, Václav - Chatterjee, Krishnendu - Forejt, Vojtěch - Kučera, Antonín PY - 2014 TI - Markov Decision Processes with Multiple Long-Run Average Objectives JF - Logical Methods in Computer Science VL - 10 IS - 1 SP - 1-29 EP - 1-29 PB - Technical University of Braunschweig SN - 18605974 KW - Markov decision processes KW - mean-payoff reward KW - multi-objective optimisation KW - formal verification UR - http://www.lmcs-online.org/ L2 - http://www.lmcs-online.org/ N2 - We study Markov decision processes (MDPs) with multiple limit-average (or mean-payoff) functions. We consider two different objectives, namely, expectation and satisfaction objectives. Given an MDP with k limit-average functions, in the expectation objective the goal is to maximize the expected limit-average value, and in the satisfaction objective the goal is to maximize the probability of runs such that the limit-average value stays above a given vector. We show that under the expectation objective, in contrast to the case of one limit-average function, both randomization and memory are necessary for strategies even for epsilon-approximation, and that finite-memory randomized strategies are sufficient for achieving Pareto optimal values. Under the satisfaction objective, in contrast to the case of one limit-average function, infinite memory is necessary for strategies achieving a specific value (i.e. randomized finite-memory strategies are not sufficient), whereas memoryless randomized strategies are sufficient for epsilon-approximation, for all epsilon>0. We further prove that the decision problems for both expectation and satisfaction objectives can be solved in polynomial time and the trade-off curve (Pareto curve) can be epsilon-approximated in time polynomial in the size of the MDP and 1/epsilon, and exponential in the number of limit-average functions, for all epsilon>0. Our analysis also reveals flaws in previous work for MDPs with multiple mean-payoff functions under the expectation objective, corrects the flaws, and allows us to obtain improved results. ER -
BRÁZDIL, Tomáš, Václav BROŽEK, Krishnendu CHATTERJEE, Vojtěch FOREJT and Antonín KUČERA. Markov Decision Processes with Multiple Long-Run Average Objectives. \textit{Logical Methods in Computer Science}. Technical University of Braunschweig, 2014, vol.~10, No~1, p.~1-29. ISSN~1860-5974. Available from: https://dx.doi.org/10.2168/LMCS-10(1:13)2014.
|