Další formáty:
BibTeX
LaTeX
RIS
@article{1076357, author = {Brázdil, Tomáš and Brožek, Václav and Etessami, Kousha and Kučera, Antonín}, article_location = {Netherlands}, article_number = {January}, doi = {http://dx.doi.org/10.1016/j.ic.2012.01.008}, keywords = {Markov decision processes; one-counter automata}, language = {eng}, issn = {0890-5401}, journal = {Information and Computation}, title = {Approximating the termination value of one-counter MDPs and stochastic games}, volume = {222}, year = {2013} }
TY - JOUR ID - 1076357 AU - Brázdil, Tomáš - Brožek, Václav - Etessami, Kousha - Kučera, Antonín PY - 2013 TI - Approximating the termination value of one-counter MDPs and stochastic games JF - Information and Computation VL - 222 IS - January SP - 121-138 EP - 121-138 PB - Elsevier Science SN - 08905401 KW - Markov decision processes KW - one-counter automata N2 - One-counter MDPs (OC-MDPs) and one-counter simple stochastic games (OC-SSGs) are 1-player, and 2-player turn-based zero-sum, stochastic games played on the transition graph of classic one-counter automata (equivalently, pushdown automata with a 1-letter stack alphabet). A key objective for the analysis and verification of these games is the termination objective, where the players aim to maximize (minimize, respectively) the probability of hitting counter value 0, starting at a given control state and given counter value. Recently, we studied qualitative decision problems ("is the optimal termination value equal to 1?") for OC-MDPs (and OC-SSGs) and showed them to be decidable in polynomial time (in NP intersection coNP, respectively). However, quantitative decision and approximation problems ("is the optimal termination value at least p", or "approximate the termination value within epsilon") are far more challenging. This is so in part because optimal strategies may not exist, and because even when they do exist they can have a highly non-trivial structure. It thus remained open even whether any of these quantitative termination problems are computable. In this paper we show that all quantitative approximation problems for the termination value for OC-MDPs and OC-SSGs are computable. Specifically, given an OC-SSG, and given epsilon>0, we can compute a value v that approximates the value of the OC-SSG termination game within additive error epsilon, and furthermore we can compute epsilon-optimal strategies for both players in the game. A key ingredient in our proofs is a subtle martingale, derived from solving certain linear programs that we can associate with a maximizing OC-MDP. An application of Azuma's inequality on these martingales yields a computable bound for the "wealth" at which a "rich person's strategy" becomes epsilon-optimal for OC-MDPs. ER -
BRÁZDIL, Tomáš, Václav BROŽEK, Kousha ETESSAMI a Antonín KUČERA. Approximating the termination value of one-counter MDPs and stochastic games. \textit{Information and Computation}. Netherlands: Elsevier Science, 2013, roč.~222, January, s.~121-138. ISSN~0890-5401. Dostupné z: https://dx.doi.org/10.1016/j.ic.2012.01.008.
|