@inproceedings{db1f864674d547fd83f835e83a16bb01,
title = "Extraction of Entities from Stock Analyst Reports via Language Models",
abstract = "This work investigates the possibility of extracting structured data from financial analyses by stock analysts with the help of large language models. For this purpose, experts compiled and annotated a dataset of 346 documents, including 425 analyses varying in layout and format from analysts from 14 banks. The conceived method represents a 3-stage extraction process that enables a detailed format definition and prompting strategy. The first two steps represent basic classification tasks, while the third step performs entity extraction. The simplistic nature of the first steps causes different models to execute them with equal performance. Thus, the evaluation focuses on utilizing different models for the extraction step, specifically employing OpenAI's GPT-4o and GPT-4o Mini. The evaluation shows that the conceived method improves F1 scores from 0.763 to 0.972 for GPT-4o Mini and from 0.856 to 0.997 for GPT-4o compared to a generic single-step extraction. These results indicate that the multi-step approach may help in improving the effectiveness of lightweight models in sophisticated tasks.",
keywords = "Entity extraction, Financial data, Language models",
author = "Leopold B{\"o}ss and Stephan Keil and Andreas St{\"o}ckl",
note = "Publisher Copyright: {\textcopyright} The Author(s), under exclusive license to Springer Nature Switzerland AG 2025.",
year = "2025",
month = aug,
doi = "10.1007/978-3-032-00071-2\_6",
language = "English",
isbn = "9783032000705",
series = "Lecture Notes in Networks and Systems",
publisher = "Springer",
pages = "100--114",
editor = "Kohei Arai",
booktitle = "Intelligent Systems and Applications - Proceedings of the 2025 Intelligent Systems Conference IntelliSys",
address = "Germany",
}