@inproceedings{224e6444e42b4484978b1e5c8e5bbfcd,
title = "EPPADS: An enhanced phase-based performance-aware dynamic scheduler for high job execution performance in large scale clusters",
abstract = "The way in which jobs are scheduled is critical to achieve high job processing performance in large scale data clusters. Most existing scheduling mechanism employs a First-In First-Out, serialized approach encompassed with task straggler hunting techniques which launches speculative tasks after detecting slow tasks. This is often achieved through the instrumentation of processing nodes. Such node instrumentation incurs frequent communication overheads as the number of processing nodes increase. Moreover the sequential scheduling of job tasks and the straggler hunting approach fails to meet optimal performance as they increase job waiting time in queue and incurs delayed speculative execution of straggling tasks respectively. In this paper we propose an Enhanced Phase based Performance Aware Dynamic Scheduler (EPPADS), which schedules job tasks without additional instrumentation modules. EPPADS uses a two staged scheduling approach, that is, the slow start phase (SSP) and accelerate phase (AccP). The SSP schedules the initial task in the queue in the normal FIFO way and records the initial execution times of the processing nodes. The AccP uses the initial execution times to compute the processing nodes task distribution ratio of the remaining tasks and schedules them using a single scheduling I/O. We implement EPPADS scheduler in Hadoop{\textquoteright}s MapReduce framework. Our evaluation shows that EPPADS can achieve a performance improvement on FIFO scheduler of 30\%. Compared with existing Dynamic scheduling approach which uses node instrumentation, EPPADS achieves a better performance of 22\%.",
keywords = "Distributed processing, MapReduce, Scheduling",
author = "Prince Hamandawana and Ronnie Mativenga and Kwon, \{Se Jin\} and Chung, \{Tae Sun\}",
note = "Publisher Copyright: {\textcopyright} Springer Nature Switzerland AG 2019.; 24th International Conference on Database Systems for Advanced Applications, DASFAA 2019 ; Conference date: 22-04-2019 Through 25-04-2019",
year = "2019",
doi = "10.1007/978-3-030-18576-3\_9",
language = "English",
isbn = "9783030185756",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "140--156",
editor = "Jun Yang and Juggapong Natwichai and Yongxin Tong and Joao Gama and Guoliang Li",
booktitle = "Database Systems for Advanced Applications - 24th International Conference, DASFAA 2019, Proceedings",
address = "Germany",
}