klinger.bib
@techreport{klinger_final_2026,
title = {Final {Project} {Report}: "{Structured} {Multi}-{Domain} {Emotion} {Analysis} from {Text}" ({SEAT}) and "{Computational} {Event} {Analysis} based on {Appraisal} {Theories} for {Emotion} {Analysis}" ({CEAT})},
copyright = {Creative Commons Attribution 4.0 International},
shorttitle = {Final {Project} {Report}},
url = {https://zenodo.org/doi/10.5281/zenodo.20070182},
doi = {10.5281/ZENODO.20070182},
abstract = {Final report for the DFG project 667374.},
language = {en},
urldate = {2026-05-08},
institution = {Zenodo},
author = {Klinger, Roman},
month = may,
year = {2026},
internaltype = {preprint},
pdf = {https://www.romanklinger.de/publications/Final-Project-Report-SEAT-CEAT.pdf}
}
@article{Yadav2026,
author = {Itisha Yadav and Sirko Schindler and Diana Peters
and Roman Klinger},
title = {External Knowledge Integration in Large Language
Models: A Survey on Methods, Challenges, and Future
Directions},
journal = {Semantic Web Journal},
year = {2026},
note = {accepted},
url = {https://www.semantic-web-journal.net/content/external-knowledge-integration-large-language-models-survey-methods-challenges-and-future-0}
}
@proceedings{wassa-2026-1,
title = {The Proceedings for the 15th Workshop on
Computational Approaches to Subjectivity, Sentiment
Social Media Analysis ({WASSA} 2026)},
editor = {Barnes, Jeremy and Barriere, Valentin and De Clercq,
Orph{\'e}e and Klinger, Roman and Nouri, C{\'e}lia
and Nozza, Debora and Singh, Pranaydeep},
month = mar,
year = {2026},
address = {Rabat, Morocco},
publisher = {Association for Computational Linguistics},
url = {https://aclanthology.org/2026.wassa-1.0/},
isbn = {979-8-89176-378-4},
internaltype = {edited}
}
@article{greschner-etal-2026-trust,
title = {Trust Me, {I} Can Convince You: The Contextualized
Argument Appraisal Framework and the {C}ont{A}rg{A}
Corpus},
author = {Greschner, Lynn and Weber, Sabine and Klinger,
Roman},
editor = {Piperidis, Stelios and Bel, N{\'u}ria and van den
Heuvel, Henk and Ide, Nancy and Krek, Simon and
Toral, Antonio},
journal = {International Conference on Language Resources and
Evaluation},
volume = {main},
month = may,
year = {2026},
address = {Palma de Mallorca, Spain},
publisher = {ELRA Language Resource Association},
url = {https://aclanthology.org/2026.lrec-main.659/},
pages = {8327--8346},
abstract = {Emotions that somebody develops based on an argument
do not only depend on the argument itself - they are
also influenced by a subjective evaluation of the
argument{'}s potential impact on the self. For
instance, an argument to ban plastic bottles might
cause fear of losing a job for a bottle industry
worker, which lowers the convincingness {--}
presumably independent of its content. While binary
emotionality of arguments has been studied, such
cognitive appraisal models have only been proposed
in other subtasks of emotion analysis, but not in
the context of arguments and their
convincingness. To fill this research gap, we
propose the Contextualized Argument Appraisal
Framework to model the interplay between the sender,
receiver, and argument. We adapt established
appraisal models from psychology to argument mining,
including argument pleasantness, familiarity,
response urgency, and expected effort, as well as
convincingness variables. To evaluate the framework
and pave the way for computational modeling, we
develop a novel role-playing-based annotation setup,
mimicking real-world exposure to
arguments. Participants disclose their emotion,
explain the main cause, the argument appraisal, and
the perceived convincingness. To consider the
subjective nature of such annotations, we also
collect demographic data and personality traits of
both the participants and ask them to disclose the
same variables for their perception of the argument
sender. The analysis of the resulting corpus of 4000
annotations reveals that convincingness is
positively correlated with positive emotions (e.g.,
trust) and negatively correlated with negative
emotions (e.g., anger). The appraisal variables
particularly point to the importance of the
annotator{'}s familiarity with the argument.},
doi = {10.63317/484rpnvebop5},
internaltype = {conferenceproc},
eprint = {2509.17844},
archiveprefix = {arXiv},
primaryclass = {cs.CL},
url = {https://arxiv.org/abs/2509.17844},
pdf = {https://www.romanklinger.de/publications/GreschnerWeberKlinger2026LREC.pdf}
}
@article{menchaca-resendiz-klinger-2026-parl,
title = {{PARL}: Prompt-based Agents for Reinforcement
Learning},
author = {Menchaca Resendiz, Yarik and Klinger, Roman},
editor = {Piperidis, Stelios and Bel, N{\'u}ria and van den
Heuvel, Henk and Ide, Nancy and Krek, Simon and
Toral, Antonio},
journal = {International Conference on Language Resources and
Evaluation},
volume = {main},
month = may,
year = {2026},
address = {Palma de Mallorca, Spain},
publisher = {ELRA Language Resource Association},
url = {https://aclanthology.org/2026.lrec-main.488/},
doi = {10.63317/3z4zqifrngk9},
pages = {6166--6184},
abstract = {Large language models (LLMs) have demonstrated high
performance on tasks expressed in natural language,
particularly in zero- or few-shot settings. These
are typically framed as supervised (e.g.,
classification) or unsupervised (e.g., clustering)
problems. However, limited work evaluates LLMs as
agents in reinforcement learning (RL) tasks (e.g.,
playing games), where learning occurs through
interaction with an environment and a reward
system. While prior work focused on representing
tasks that rely on a language representation, we
study structured, non-linguistic reasoning {--} such
as interpreting positions in a grid world. We
therefore introduce PARL (Prompt-based Agent for
Reinforcement Learning), a method that uses LLMs as
RL agents through prompting, without any
fine-tuning. PARL encodes actions, states, and
rewards in the prompt, enabling the model to learn
through trial-and-error interaction. We evaluate
PARL on three standard RL tasks that do not entirely
rely on natural language. We show that it can match
or outperform traditional RL agents in simple
environments by leveraging pretrained
knowledge. However, we identify performance
limitations in tasks that require complex
mathematical operations or decoding states and
actions.},
internaltype = {conferenceproc},
eprint = {2510.21306},
archiveprefix = {arXiv},
primaryclass = {cs.CL},
url = {https://arxiv.org/abs/2510.21306},
pdf = {https://www.romanklinger.de/publications/MenchacaResendizKlinger2026LREC.pdf}
}
@article{greschner-etal-2026-categorical,
title = {Categorical Emotions or Appraisals - Which Emotion
Model Explains Argument Convincingness Better?},
author = {Greschner, Lynn and Bauer, Meike and Weber, Sabine
and Klinger, Roman},
editor = {Piperidis, Stelios and Bel, N{\'u}ria and van den
Heuvel, Henk and Ide, Nancy and Krek, Simon and
Toral, Antonio},
journal = {International Conference on Language Resources and
Evaluation},
volume = {main},
month = may,
year = {2026},
address = {Palma de Mallorca, Spain},
publisher = {ELRA Language Resource Association},
url = {https://aclanthology.org/2026.lrec-main.649/},
pages = {8190--8203},
abstract = {The convincingness of an argument does not only
depend on its structure (logos), the person who
makes the argument (ethos), but also on the emotion
that it causes in the recipient (pathos). While the
overall intensity and categorical values of emotions
in arguments have received considerable attention in
the research community, we argue that the emotion an
argument evokes in a recipient is subjective. It
depends on the recipient{'}s goals, standards, prior
knowledge, and stance. Appraisal theories lend
themselves as a link between the subjective
cognitive assessment of events and emotions. They
have been used in event-centric emotion analysis,
but their suitability for assessing argument
convincingness remains unexplored. In this paper, we
evaluate whether appraisal theories are suitable for
emotion analysis in arguments by considering
subjective cognitive evaluations of the importance
and impact of an argument on its receiver. Based on
the annotations in the recently published ContArgA
corpus, we perform zero-shot prompting experiments
to evaluate the importance of gold-annotated and
predicted emotions and appraisals for the assessment
of the subjective convincingness labels. We find
that, while categorical emotion information does
improve convincingness prediction, the improvement
is more pronounced with appraisals. This work
presents the first systematic comparison between
emotion models for convincingness prediction,
demonstrating the advantage of appraisals, providing
insights for theoretical and practical applications
in computational argumentation.},
doi = {10.63317/3vrvrgvtnvhn},
internaltype = {conferenceproc},
eprint = {2511.07162},
archiveprefix = {arXiv},
primaryclass = {cs.CL},
url = {https://arxiv.org/abs/2511.07162},
pdf = {https://www.romanklinger.de/publications/GreschnerBauerWeberKlinger2026LREC.pdf}
}
@article{ronningstad-etal-2026-entity,
title = {Entity-Level Sentiment Analysis with Sentence
Relevance Detection},
author = {R{\o}nningstad, Egil and Klinger, Roman and
{\O}vrelid, Lilja and Velldal, Erik},
editor = {Piperidis, Stelios and Bel, N{\'u}ria and van den
Heuvel, Henk and Ide, Nancy and Krek, Simon and
Toral, Antonio},
journal = {International Conference on Language Resources and
Evaluation},
volume = {main},
month = may,
year = {2026},
address = {Palma de Mallorca, Spain},
publisher = {ELRA Language Resource Association},
url = {https://aclanthology.org/2026.lrec-main.638/},
pages = {8040--8055},
abstract = {The task of entity-level sentiment analysis (Elsa)
is to extract sentiment scores for a given entity
(such as person names or organization names) from a
text. Elsa is a challenging task and involves
processing of longer documents, where several
entities may be mentioned with varying importance
for the final score aggregation. Fine-tuning
encoder-based Transformers (such as BERT)
constitutes the state of the art for sentiment
predictions, however, these models are still limited
by their restricted input lengths. Decoder-only
models so far still underperform on the task. We
approach the context limitation by learning to
extract segments that are relevant for the sentiment
prediction for a given entity, without preprocessing
by chunking and aggregation. For decoder models, we
explore fine-tuning these through supervised
fine-tuning and pairwise comparison, a method
borrowed from reward modeling for preference
optimization. Both methods perform well and set a
new standard for the Elsa task. We further show that
pairwise classification is faster, simpler, and
shows less variance than the more common direct
supervision for this task.},
doi = {10.63317/35ideqx4jk89},
internaltype = {conferenceproc},
pdf = {https://www.romanklinger.de/publications/RønningstadKlingerVelldalØvrelid2026LREC.pdf}
}
@article{schaefer-klinger-2026-disambiguation,
title = {Disambiguation of Emotion Annotations by
Contextualizing Events in Plausible Narratives},
author = {Schaefer, Johannes and Klinger, Roman},
editor = {Piperidis, Stelios and Bel, N{\'u}ria and van den
Heuvel, Henk and Ide, Nancy and Krek, Simon and
Toral, Antonio},
journal = {International Conference on Language Resources and
Evaluation},
volume = {main},
month = may,
year = {2026},
address = {Palma de Mallorca, Spain},
publisher = {ELRA Language Resource Association},
url = {https://preview.aclanthology.org/ingest-lrec/2026.lrec-main.757/},
pages = {9635--9656},
abstract = {Ambiguity in emotion analysis stems both from
potentially missing information and the subjectivity
of interpreting a text. The latter did receive
substantial attention, but can we fill missing
information to resolve ambiguity? We address this
question by developing a method to automatically
generate reasonable contexts for an otherwise
ambiguous classification instance. These generated
contexts may act as illustrations of potential
interpretations by different readers, as they can
fill missing information with their individual world
knowledge. This task to generate plausible
narratives is a challenging one: We combine
techniques from short story generation to achieve
coherent narratives. The resulting dataset of
Emotional BackStories, EBS, allows for the first
comprehensive and systematic examination of
contextualized emotion analysis. We conduct
automatic and human annotation and find that the
generated contextual narratives do indeed clarify
the interpretation of specific
emotions. Particularly relief and sadness benefit
from our approach, while joy does not require the
additional context we provide.},
doi = {10.63317/2agma6tpnh8h},
internaltype = {conferenceproc},
eprint = {2508.09954},
archiveprefix = {arXiv},
primaryclass = {cs.CL},
url = {https://arxiv.org/abs/2508.09954},
pdf = {https://www.romanklinger.de/publications/SchaeferKlinger2026LREC.pdf}
}
@article{weber-etal-2026-less,
title = {Less Is More? The Role of Demographic Author
Information in Emotion Classification of Ambiguous
Text},
author = {Weber, Sabine and Greschner, Lynn and Klinger,
Roman},
editor = {Piperidis, Stelios and Bel, N{\'u}ria and van den
Heuvel, Henk and Ide, Nancy and Krek, Simon and
Toral, Antonio},
journal = {International Conference on Language Resources and
Evaluation},
volume = {main},
month = may,
year = {2026},
address = {Palma de Mallorca, Spain},
publisher = {ELRA Language Resource Association},
url = {https://aclanthology.org/2026.lrec-main.646/},
pages = {8147--8161},
abstract = {Emotion annotation in text is a challenging task
that often yields low inter-annotator
agreement. Missing context, differences in world
knowledge and extra-linguistic factors such as the
author{'}s identity influence how emotions are
perceived. When the text does not provide sufficient
information, details about the author may help
resolve ambiguity. We test the hypothesis that
providing annotators with demographic information
reduces disagreement in emotion annotation. We
compare one group of annotators who sees each text
alongside demographic information about its author,
with a group who sees only the text. We find in our
study with 500 annotators and 250 texts that
displaying demographic information about the author
of the text does not improve agreement between
annotators, nor does it improve agreement with the
gold label. The only exception are cases where the
emotion polarity (positive or negative) is
unclear. We also find that annotators perform
overall better at identifying the correct emotion
label when it aligns with gender
stereotypes. Zero-shot prompting experiments with
large language models do resemble the human
annotation experimental results. Our findings
suggest that providing demographic information is
not a straightforward remedy for ambiguity in
emotion annotation and careful consideration is
needed when incorporating such data.},
doi = {10.63317/2cw8tpo82h55},
internaltype = {conferenceproc},
pdf = {https://www.romanklinger.de/publications/WeberGreschnerKlinger2026LREC.pdf}
}
@inproceedings{schafer-etal-2026-appraisal,
title = {Appraisal Trajectories in Narratives Reveal Distinct
Patterns of Emotion Evocation},
author = {Sch{\"a}fer, Johannes and Wagner, Janne and Klinger,
Roman},
editor = {Barnes, Jeremy and Barriere, Valentin and De Clercq,
Orph{\'e}e and Klinger, Roman and Nouri, C{\'e}lia
and Nozza, Debora and Singh, Pranaydeep},
booktitle = {The Proceedings for the 15th Workshop on
Computational Approaches to Subjectivity, Sentiment
Social Media Analysis ({WASSA} 2026)},
month = mar,
year = {2026},
address = {Rabat, Morocco},
publisher = {Association for Computational Linguistics},
url = {https://aclanthology.org/2026.wassa-1.7/},
pages = {73--82},
isbn = {979-8-89176-378-4},
abstract = {Understanding emotion responses relies on
reconstructing how individuals appraise
events. While prior work has studied emotion
trajectories and inherent correlations with
appraisals, it has considered appraisals only in a
snapshot analysis. However, because appraisal is a
complex, sequential process, we argue that it should
be analyzed based on how it unfolds throughout a
narrative. In this study, we investigate whether
trajectories of appraisals are distinctive for
different emotions in five-event stories {--}
narratives where each of five sentences describes an
event. We employ zero-shot prompting with a large
language model to predict appraisals on
sub-sequences of a narrative. We find that this
approach is effective in identifying relevant
appraisals in narratives, without prior knowledge of
the evoked emotion, enabling a comprehensive
analysis of appraisal trajectories. Furthermore, we
are the first to quantitatively identify typical
patterns of appraisal trajectories that distinguish
emotions. For example, a rising trajectory for
self-responsibility indicates trust, while a falling
trajectory suggests anger.},
internaltype = {workshop},
pdf = {https://www.romanklinger.de/publications/SchaeferWagnerKlingerWASSA2026.pdf}
}
@inproceedings{weber-etal-2026-says,
title = {Says Who? Argument Convincingness and Reader Stance
Are Correlated with Perceived Author Personality},
author = {Weber, Sabine and Greschner, Lynn and Klinger,
Roman},
editor = {Barnes, Jeremy and Barriere, Valentin and De Clercq,
Orph{\'e}e and Klinger, Roman and Nouri, C{\'e}lia
and Nozza, Debora and Singh, Pranaydeep},
booktitle = {The Proceedings for the 15th Workshop on
Computational Approaches to Subjectivity, Sentiment
Social Media Analysis ({WASSA} 2026)},
month = mar,
year = {2026},
address = {Rabat, Morocco},
publisher = {Association for Computational Linguistics},
url = {https://aclanthology.org/2026.wassa-1.20/},
pages = {265--277},
isbn = {979-8-89176-378-4},
abstract = {Alongside its literal meaning, text also carries
implicit social signals: information that is used by
the reader to assign the author of the text a
specific identity or make assumptions about the
author{'}s character. The reader creates a mental
image of the author which influences the
interpretation of the presented information. This is
especially relevant for argumentative text, where
the credibility of the information might depend on
who provides it. We therefore focus on the question:
How do readers of an argument imagine its author?
Using the ContArgA corpus, we study arguments
annotated for convincingness and perceived author
properties (level of education and Big Five
personality traits). We find that annotators
perceive an author to be similar to themselves when
they agree with the stance of the argument. We also
find that the envisioned personality traits and
education level of the author are statistically
significantly correlated with the argument{'}s
convincingness. We conduct experiments with four
generative LLMs and a RoBERTa-based regression model
showing that LLMs do not replicate the annotators
judgments. Argument convincingness can however
provide a useful signal for modeling perceived
author personality when it is explicitly used during
training.},
internaltype = {workshop},
note = {accepted},
pdf = {https://www.romanklinger.de/publications/WeberGreschnerKlinger_WASSA2026.pdf}
}
@inproceedings{chen-etal-2026-emotionally,
title = {Emotionally Charged, Logically Blurred: {AI}-driven
Emotional Framing Impairs Human Fallacy Detection},
author = {Chen, Yanran and Greschner, Lynn and Klinger, Roman
and Klenk, Michael and Eger, Steffen},
editor = {Demberg, Vera and Inui, Kentaro and Marquez,
Llu{\'i}s},
booktitle = {Proceedings of the 19th Conference of the {E}uropean
Chapter of the {A}ssociation for {C}omputational
{L}inguistics (Volume 1: Long Papers)},
month = mar,
year = {2026},
address = {Rabat, Morocco},
publisher = {Association for Computational Linguistics},
url = {https://aclanthology.org/2026.eacl-long.316/},
pages = {6709--6732},
isbn = {979-8-89176-380-7},
abstract = {Logical fallacies are common in public communication
and can mislead audiences; fallacious arguments may
still appear convincing despite lacking soundness,
because convincingness is inherently subjective. We
present the first computational study of how
emotional framing interacts with fallacies and
convincingness, using large language models (LLMs)
to systematically change emotional appeals in
fallacious arguments. We benchmark eight LLMs on
injecting emotional appeal into fallacious arguments
while preserving their logical structures, then use
the best models to generate stimuli for a human
study. Our results show that LLM-driven emotional
framing reduces human fallacy detection in F1 by
14.5{\%} on average. Humans perform better in
fallacy detection when perceiving enjoyment than
fear or sadness, and these three emotions also
correlate with significantly higher convincingness
compared to neutral or other emotion states. Our
work has implications for AI-driven emotional
manipulation in the context of fallacious
argumentation.},
eprint = {2510.09695},
archiveprefix = {arXiv},
primaryclass = {cs.CL},
url = {https://arxiv.org/abs/2510.09695},
internaltype = {conferenceproc}
}
@article{velutharambath2026deceptiondetectedcrosslinguisticstudy,
title = {What if Deception Cannot be Detected? A
Cross-Linguistic Study on the Limits of Deception
Detection from Text},
author = {Aswathy Velutharambath and Kai
Sassenberg and Roman Klinger},
journal = {Computational Linguistics},
year = {2026},
note = {in print},
eprint = {2505.13147},
archiveprefix = {arXiv},
primaryclass = {cs.CL},
url = {https://arxiv.org/abs/2505.13147},
internaltype = {journal},
doi = {10.1162/COLI.a.614}
}