2019
Nadig, D.; Ramamurthy, B.; Bockelman, B.; Swanson, D.
APRIL: An Application-Aware, Predictive and Intelligent Load Balancing Solution for Data-Intensive Science Proceedings Article
In: IEEE INFOCOM 2019 - IEEE Conference on Computer Communications, pp. 1909–1917, 2019.
Abstract | BibTeX | Tags: Big Data, Correlation, Deep learning, Load management, Load modeling, Predictive models, Servers | Links:
@inproceedings{nadig_april:_2019,
title = {APRIL: An Application-Aware, Predictive and Intelligent Load Balancing Solution for Data-Intensive Science},
author = {D. Nadig and B. Ramamurthy and B. Bockelman and D. Swanson},
url = {https://deepaknadig.com/wp-content/uploads/2021/09/Nadig-et-al.-2019-APRIL-An-Application-Aware-Predictive-and-Intell.pdf},
doi = {10.1109/INFOCOM.2019.8737537},
year = {2019},
date = {2019-04-01},
urldate = {2019-04-01},
booktitle = {IEEE INFOCOM 2019 - IEEE Conference on Computer Communications},
pages = {1909--1917},
abstract = {In this paper, we propose an application-aware intelligent load balancing system for high-throughput, distributed computing, and data-intensive science workflows. We leverage emerging deep learning techniques for time-series modeling to develop an application-aware predictive analytics system for accurately forecasting GridFTP connection loads. Our solution integrates with a major U.S. CMS Tier-2 site; we use a real dataset representing 670 million GridFTP transfer connections measured over 18 months to drive our predictive analytics solution. First, we perform extensive analysis on this dataset and use the connection loads as an example to study the temporal dependencies between various user-roles and workflow memberships. We use the analysis to motivate the design of a gated recurrent unit (GRU) based deep recurrent neural network (RNN) for modeling long-term temporal dependencies and predicting connection loads. We develop a novel application-aware, predictive and intelligent load balancer, APRIL, that effectively integrates application metadata and load forecast information to maximize server utilization. We conduct extensive experiments to evaluate the performance of our deep RNN predictive analytics system and compare it with other approaches such as ARIMA and multi-layer perceptron (MLP) predictors. The results show that our forecasting model, depending on the user-role, performs between 5.88%–92.6% better than the alternatives. We also demonstrate the effectiveness of APRIL by comparing it with the load balancing capabilities of an existing production Linux Virtual Server (LVS) cluster. Our approach improves server utilization, on an average, between 0.5 to 11 times, when compared with its LVS counterpart.},
keywords = {Big Data, Correlation, Deep learning, Load management, Load modeling, Predictive models, Servers},
pubstate = {published},
tppubtype = {inproceedings}
}
2018
Nadig, D.; Ramamurthy, B.; Bockelman, B.; Swanson, D.
Large Data Transfer Predictability and Forecasting using Application-Aware SDN Proceedings Article
In: 2018 IEEE International Conference on Advanced Networks and Telecommunications Systems (ANTS), pp. 1–6, 2018.
Abstract | BibTeX | Tags: Aggregates, Analytical models, Data analysis, Data models, Data transfer, Forecasting, Predictive models | Links:
@inproceedings{nadig_large_2018,
title = {Large Data Transfer Predictability and Forecasting using Application-Aware SDN},
author = {D. Nadig and B. Ramamurthy and B. Bockelman and D. Swanson},
url = {https://deepaknadig.com/wp-content/uploads/2021/09/Nadig-et-al.-2018-Large-Data-Transfer-Predictability-and-Forecasting.pdf},
doi = {10.1109/ANTS.2018.8710165},
year = {2018},
date = {2018-12-01},
urldate = {2018-12-01},
booktitle = {2018 IEEE International Conference on Advanced Networks and Telecommunications Systems (ANTS)},
pages = {1--6},
abstract = {Network management for applications that rely on large-scale data transfers is challenging due to the volatility and the dynamic nature of the access traffic patterns. Predictive analytics and forecasting play an important role in providing effective resource allocation strategies for large data transfers. We propose a predictive analytics solution for large data transfers using an application-aware software defined networking (SDN) approach. We perform extensive exploratory data analysis to characterize the GridFTP connection transfers dataset and present various strategies for its use with statistical forecasting models. We develop a univariate autoregressive integrated moving average (ARIMA) based prediction framework for forecasting GridFTP connection transfers. Our prediction model tightly integrates with an application-aware SDN solution to preemptively drive network management decisions for GridFTP resource allocation at a U.S. CMS Tier-2 site. Further, our framework has a mean absolute percentage error (MAPE) ranging from 6% to 10% when applied to make rolling forecasts.},
keywords = {Aggregates, Analytical models, Data analysis, Data models, Data transfer, Forecasting, Predictive models},
pubstate = {published},
tppubtype = {inproceedings}
}