@inproceedings{0f0ae87f92db4b529d6d162e418c695f,
title = "Accurately Predicting User Registration in Highly Unbalanced Real-World Datasets from Online News Portals",
abstract = "Getting visitors to register is a crucial factor in marketing for online news portals. Current approaches are rule-based by awarding points for specific actions [3]. Finding efficient rules can be challenging and depends on the specific task. Registration is generally rare compared to regular visitors, leading to highly imbalanced data. We analyze different supervised learning classification algorithms under consideration of the data imbalance. As case study, we use anonymized real-world data from an Austrian newspaper outlet containing the visitor{\textquoteright}s session behavior with around 0.1% registrations over all visits. We identify an ensemble approach combining the Balanced Random Forest Classifier and the RUSBoost Classifier correctly identifying 76% of registrations over five independent data sets.",
keywords = "Imbalanced data, Label prediction, Lead scoring",
author = "Eva-Maria Spitzer and Oliver Krauss and Andreas St{\"o}ckl",
note = "Publisher Copyright: {\textcopyright} 2022, The Author(s), under exclusive license to Springer Nature Switzerland AG.",
year = "2022",
month = jul,
day = "29",
doi = "10.1007/978-3-031-12423-5_23",
language = "English",
isbn = "9783031124228",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer",
pages = "302--315",
editor = "Christine Strauss and Alfredo Cuzzocrea and Gabriele Kotsis and Ismail Khalil and Tjoa, {A Min}",
booktitle = "Database and Expert Systems Applications - 33rd International Conference, DEXA 2022, Proceedings",
address = "Germany",
}