@inproceedings{4e0444f84e0849df9a1c73bdb88e0cca,
title = "Universal NER: A Gold-Standard Multilingual Named Entity Recognition Benchmark",
abstract = "We introduce Universal NER (UNER), an open, community-driven project to develop gold-standard NER benchmarks in many languages. The overarching goal of UNER is to provide high-quality, cross-lingually consistent annotations to facilitate and standardize multilingual NER research. UNER v1 contains 19 datasets annotated with named entities in a cross-lingual consistent schema across 13 diverse languages. In this paper, we detail the dataset creation and composition of UNER; we also provide initial modeling baselines on both in-language and cross-lingual learning settings. We will release the data, code, and fitted models to the public.",
keywords = "cs.CL",
author = "Stephen Mayhew and Terra Blevins and Shuheng Liu and Marek {\v S}uppa and Hila Gonen and Imperial, {Joseph Marvin} and Karlsson, {B{\"o}rje F.} and Peiqin Lin and Nikola Ljube{\v s}i{\'c} and Miranda, {L. J.} and Barbara Plank and Arij Riabi and Yuval Pinter",
year = "2024",
month = jun,
day = "21",
language = "English",
series = "Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL 2024",
publisher = "Association for Computational Linguistics (ACL)",
pages = "4322--4337",
editor = "Kevin Duh and Helena Gomez and Steven Bethard",
booktitle = "Long Papers",
note = "2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL 2024 ; Conference date: 16-06-2024 Through 21-06-2024",
}