@inproceedings{a4f59eb00baf4b2581dcdb4010e49f49,
title = "Enhanced reinforcement learning by recursive updating of Q-values for reward propagation",
abstract = "In this paper, we propose a method to reduce the learning time of Q-learning by combining the method of updating even to Q-values of unexecuted actions with the method of adding a terminal reward to unvisited Q-values. To verify the method, its performance was compared to that of conventional Q-learning. The proposed approach showed the same performance as conventional Q-learning, with only 27 % of the learning episodes required for conventional Q-learning. Accordingly, we verified that the proposed method reduced learning time by updating more Q-values in the early stage of learning and distributing a terminal reward to more Q-values.",
keywords = "Propagation, Q-learning, Q-value, Terminal reward",
author = "Yunsick Sung and Eunyoung Ahn and Kyungeun Cho",
year = "2013",
doi = "10.1007/978-94-007-5860-5_121",
language = "English",
isbn = "9789400758599",
series = "Lecture Notes in Electrical Engineering",
pages = "1003--1008",
booktitle = "IT Convergence and Security 2012",
note = "International Conference on IT Convergence and Security, ICITCS 2012 ; Conference date: 05-12-2012 Through 07-12-2012",
}