@inproceedings{afe32cabbc884152a02df130825f4431,
title = "On Missing Labels, Long-tails and Propensities in Extreme Multi-label Classification",
abstract = "The propensity model introduced by Jain et al has become a standard approach for dealing with missing and long-tail labels in extreme multi-label classification (XMLC). In this paper, we critically revise this approach showing that despite its theoretical soundness, its application in contemporary XMLC works is debatable. We exhaustively discuss the flaws of the propensity-based approach, and present several recipes, some of them related to solutions used in search engines and recommender systems, that we believe constitute promising alternatives to be followed in XMLC.",
keywords = "extreme classification, long-tail labels, missing labels, multi-label classification, propensity model, recommendation",
author = "Erik Schultheis and Marek Wydmuch and Rohit Babbar and Krzysztof Dembczynski",
year = "2022",
month = aug,
day = "14",
doi = "10.1145/3534678.3539466",
language = "English",
series = "Proceedings of the ACM SIGKDD International Conference on Knowledge Discovery and Data Mining",
publisher = "Association for Computing Machinery",
pages = "1547--1557",
booktitle = "KDD 2022 - Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining",
address = "USA United States",
note = "28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining, KDD 2022 ; Conference date: 14-08-2022 Through 18-08-2022",
}