klinger.bib
@misc{resendiz2025llmbasedaffectivetextgeneration,
title = {LLM-based Affective Text Generation Quality Based on Different Quantization Values},
author = {Yarik Menchaca Resendiz and Roman Klinger},
year = {2025},
eprint = {2501.19317},
archiveprefix = {arXiv},
primaryclass = {cs.CL},
url = {https://arxiv.org/abs/2501.19317},
internaltype = {preprint}
}
@inproceedings{greschner-klinger-2025-fearful,
title = {Fearful Falcons and Angry Llamas: Emotion Category
Annotations of Arguments by Humans and {LLM}s},
author = {Greschner, Lynn and Klinger, Roman},
editor = {H{\"a}m{\"a}l{\"a}inen, Mika and {\"O}hman, Emily
and Bizzoni, Yuri and Miyagawa, So and Alnajjar,
Khalid},
booktitle = {Proceedings of the 5th International Conference on
Natural Language Processing for Digital Humanities},
month = may,
year = {2025},
address = {Albuquerque, USA},
publisher = {Association for Computational Linguistics},
url = {https://aclanthology.org/2025.nlp4dh-1.52/},
pages = {628--646},
isbn = {979-8-89176-234-3},
abstract = {Arguments evoke emotions, influencing the effect of
the argument itself. Not only the emotional
intensity but also the category influences the
argument`s effects, for instance, the willingness to
adapt stances. While binary emotionality has been
studied in argumentative texts, there is no work on
discrete emotion categories (e.g.,
{\textquoteleft}anger') in such data. To fill this
gap, we crowdsource subjective annotations of
emotion categories in a German argument corpus and
evaluate automatic LLM-based labeling
methods. Specifically, we compare three prompting
strategies (zero-shot, one-shot, chain-of-thought)
on three large instruction-tuned language models
(Falcon-7b-instruct, Llama-3.1-8B-instruct,
GPT-4o-mini). We further vary the definition of the
output space to be binary (is there emotionality in
the argument?), closed-domain (which emotion from a
given label set is in the argument?), or open-domain
(which emotion is in the argument?). We find that
emotion categories enhance the prediction of
emotionality in arguments, emphasizing the need for
discrete emotion annotations in arguments. Across
all prompt settings and models, automatic
predictions show a high recall but low precision for
predicting anger and fear, indicating a strong bias
toward negative emotions.},
url = {https://arxiv.org/abs/2412.15993},
eprint = {2412.15993},
archiveprefix = {arXiv},
primaryclass = {cs.CL},
internaltype = {conferenceproc}
}
@inproceedings{menchaca-resendiz-klinger-2025-mopo,
title = {{MOPO}: Multi-Objective Prompt Optimization for Affective Text Generation},
author = {Menchaca Resendiz, Yarik and
Klinger, Roman},
editor = {Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven},
booktitle = {Proceedings of the 31st International Conference on Computational Linguistics},
month = jan,
year = {2025},
address = {Abu Dhabi, UAE},
publisher = {Association for Computational Linguistics},
url = {https://aclanthology.org/2025.coling-main.375/},
pages = {5588--5606},
eprint = {2412.12948},
archiveprefix = {arXiv},
primaryclass = {cs.CL},
url = {https://arxiv.org/abs/2412.12948},
internaltype = {conferenceproc},
abstract = {How emotions are expressed depends on the context and domain. On X (formerly Twitter), for instance, an author might simply use the hashtag {\#}anger, while in a news headline, emotions are typically written in a more polite, indirect manner. To enable conditional text generation models to create emotionally connotated texts that fit a domain, users need to have access to a parameter that allows them to choose the appropriate way to express an emotion. To achieve this, we introduce MOPO, a Multi-Objective Prompt Optimization methodology. MOPO optimizes prompts according to multiple objectives (which correspond here to the output probabilities assigned by emotion classifiers trained for different domains). In contrast to single objective optimization, MOPO outputs a set of prompts, each with a different weighting of the multiple objectives. Users can then choose the most appropriate prompt for their context. We evaluate MOPO using three objectives, determined by various domain-specific emotion classifiers. MOPO improves performance by up to 15 pp across all objectives with a minimal loss (1{--}2 pp) for any single objective compared to single-objective optimization. These minor performance losses are offset by a broader generalization across multiple objectives {--} which is not possible with single-objective optimization. Additionally, MOPO reduces computational requirements by simultaneously optimizing for multiple objectives, eliminating separate optimization procedures for each objective.}
}