@inproceedings{8db9e9e6b8974df6a546438411aadafa,
title = "TensorExpress: In-network communication scheduling for distributed deep learning",
abstract = "TensorExpress provides in-network communication scheduling for distributed deep learning (DDL). In cloud-based DDL, parameter communication over a network is a key bottleneck. Previous studies proposed tensor packet reordering approaches to reduce network blocking time. However, network contention still exists in DDL. TensorExpress mitigates network contention and reduces overall training time. It schedules tensor packets in-network using P4, a switch programming language. TensorExpress improves latency and network blocking time up to 2.5 and 2.44 times, respectively.",
keywords = "Communication scheduling, Distributed deep learning, In-network delay, P4, Parameter server architecture",
author = "Minkoo Kang and Gyeongsik Yang and Yeonho Yoo and Chuck Yoo",
note = "Publisher Copyright: {\textcopyright} 2020 IEEE.; 13th IEEE International Conference on Cloud Computing, CLOUD 2020 ; Conference date: 18-10-2020 Through 24-10-2020",
year = "2020",
month = oct,
doi = "10.1109/CLOUD49709.2020.00014",
language = "English",
series = "IEEE International Conference on Cloud Computing, CLOUD",
publisher = "IEEE Computer Society",
pages = "25--27",
booktitle = "Proceedings - 2020 IEEE 13th International Conference on Cloud Computing, CLOUD 2020",
address = "United States",
}