klinger.bib
@inproceedings{Troiano2024,
title = {Dealing with Controversy: An Emotion and Coping
Strategy Corpus Based on Role Playing},
author = {Enrica Troiano and Sofie Labat and Marco Antonio
Stranisci and Rossana Damiano and Viviana Patti and
Roman Klinger},
booktitle = {Findings of the Association for Computational
Linguistics: EMNLP 2024},
year = {2024},
publisher = {Association for Computational Linguistics},
note = {accepted},
internaltype = {conferenceproc},
pdf = {https://www.romanklinger.de/publications/TroianoLabatStranisciDamianoPattiKlinger_EMNLP-Findings2024.pdf},
eprint = {2409.19025},
archiveprefix = {arXiv},
primaryclass = {cs.CL},
url = {https://arxiv.org/abs/2409.19025}
}
@inproceedings{Velutharambath2024b,
title = {How Entangled is Factuality and Deception in
German?},
author = {Velutharambath, Aswathy and Wuehrl, Amelie and
Klinger, Roman},
booktitle = {Findings of the Association for Computational
Linguistics: EMNLP 2024},
year = {2024},
publisher = {Association for Computational Linguistics},
note = {accepted},
internaltype = {conferenceproc},
pdf = {https://www.romanklinger.de/publications/VelutharambathWuehrlKlinger-EMNLP-Findings2024.pdf},
eprint = {2409.20165},
archiveprefix = {arXiv},
primaryclass = {cs.CL},
url = {https://arxiv.org/abs/2409.20165}
}
@misc{HofmannSindermannKlinger2024,
title = {Prompt-based Personality Profiling: Reinforcement Learning for Relevance Filtering},
author = {Jan Hofmann and Cornelia Sindermann and Roman Klinger},
year = {2024},
eprint = {2409.04122},
archiveprefix = {arXiv},
primaryclass = {cs.CL},
url = {http://arxiv.org/abs/2409.04122},
internaltype = {preprint}
}
@proceedings{wassa-2024-approaches,
title = {Proceedings of the 14th Workshop on Computational
Approaches to Subjectivity, Sentiment, {\&} Social
Media Analysis},
editor = {De Clercq, Orph{\'e}e and Barriere, Valentin and
Barnes, Jeremy and Klinger, Roman and Sedoc,
Jo{\~a}o and Tafreshi, Shabnam},
month = aug,
year = {2024},
address = {Bangkok, Thailand},
publisher = {Association for Computational Linguistics},
url = {https://aclanthology.org/2024.wassa-1.0},
internaltype = {edited}
}
@inproceedings{Wuehrl2024,
title = {{IMS}{\_}medic{ALY} at {\#}{SMM}4{H} 2024: Detecting
Impacts of Outdoor Spaces on Social Anxiety with
Data Augmented Ensembling},
author = {Wuehrl, Amelie and Greschner, Lynn and Menchaca
Resendiz, Yarik and Klinger, Roman},
editor = {Xu, Dongfang and Gonzalez-Hernandez, Graciela},
booktitle = {Proceedings of The 9th Social Media Mining for
Health Research and Applications (SMM4H 2024)
Workshop and Shared Tasks},
month = aug,
year = {2024},
address = {Bangkok, Thailand},
publisher = {Association for Computational Linguistics},
url = {https://aclanthology.org/2024.smm4h-1.19},
pages = {83--87},
abstract = {Many individuals affected by Social Anxiety Disorder
turn to social media platforms to share their
experiences and seek advice. This includes
discussing the potential benefits of engaging with
outdoor environments. As part of {\#}SMM4H 2024,
Shared Task 3 focuses on classifying the effects of
outdoor spaces on social anxiety symptoms in Reddit
posts. In our contribution to the task, we explore
the effectiveness of domain-specific models (trained
on social media data {--} SocBERT) against general
domain models (trained on diverse datasets {--}
BERT, RoBERTa, GPT-3.5) in predicting the sentiment
related to outdoor spaces. Further, we assess the
benefits of augmenting sparse human-labeled data
with synthetic training instances and evaluate the
complementary strengths of domain-specific and
general classifiers using an ensemble model. Our
results show that (1) fine-tuning small,
domain-specific models generally outperforms large
general language models in most cases. Only one
large language model (GPT-4) exhibits performance
comparable to the fine-tuned models (52{\%}
F1). Further, we find that (2) synthetic data does
improve the performance of fine-tuned models in some
cases, and (3) models do not appear to complement
each other in our ensemble setup.},
internaltype = {workshop}
}
@inproceedings{Schaefer2024,
title = {Hierarchical Adversarial Correction to Mitigate
Identity Term Bias in Toxicity Detection},
author = {Sch{\"a}fer, Johannes and Heid, Ulrich and Klinger,
Roman},
editor = {De Clercq, Orph{\'e}e and Barriere, Valentin and
Barnes, Jeremy and Klinger, Roman and Sedoc,
Jo{\~a}o and Tafreshi, Shabnam},
booktitle = {Proceedings of the 14th Workshop on Computational
Approaches to Subjectivity, Sentiment, {\&} Social
Media Analysis},
month = aug,
year = {2024},
address = {Bangkok, Thailand},
publisher = {Association for Computational Linguistics},
url = {https://aclanthology.org/2024.wassa-1.4},
pdf = {https://www.romanklinger.de/publications/SchaeferHeidKlingerWASSA2024.pdf},
pages = {35--51},
abstract = {Corpora that are the fundament for toxicity
detection contain such expressions typically
directed against a target individual or group, e.g.,
people of a specific gender or ethnicity. Prior work
has shown that the target identity mention can
constitute a confounding variable. As an example, a
model might learn that Christians are always
mentioned in the context of hate speech. This
misguided focus can lead to a limited generalization
to newly emerging targets that are not found in the
training data. In this paper, we hypothesize and
subsequently show that this issue can be mitigated
by considering targets on different levels of
specificity. We distinguish levels of (1) the
existence of a target, (2) a class (e.g., that the
target is a religious group), or (3) a specific
target group (e.g., Christians or Muslims). We
define a target label hierarchy based on these three
levels and then exploit this hierarchy in an
adversarial correction for the lowest level
(i.e. (3)) while maintaining some basic target
features. This approach does not lower the toxicity
detection performance but increases the
generalization to targets not being available at
training time.},
internaltype = {workshop}
}
@inproceedings{Ronningstad2024,
title = {Entity-Level Sentiment: More than the Sum of Its
Parts},
author = {R{\o}nningstad, Egil and Klinger, Roman and Velldal,
Erik and {\O}vrelid, Lilja},
editor = {De Clercq, Orph{\'e}e and Barriere, Valentin and
Barnes, Jeremy and Klinger, Roman and Sedoc,
Jo{\~a}o and Tafreshi, Shabnam},
booktitle = {Proceedings of the 14th Workshop on Computational
Approaches to Subjectivity, Sentiment, {\&} Social
Media Analysis},
month = aug,
year = {2024},
address = {Bangkok, Thailand},
publisher = {Association for Computational Linguistics},
url = {https://aclanthology.org/2024.wassa-1.8},
pages = {84--96},
abstract = {In sentiment analysis of longer texts, there may be
a variety of topics discussed, of entities
mentioned, and of sentiments expressed regarding
each entity. We find a lack of studies exploring how
such texts express their sentiment towards each
entity of interest, and how these sentiments can be
modelled. In order to better understand how
sentiment regarding persons and organizations (each
entity in our scope) is expressed in longer texts,
we have collected a dataset of expert annotations
where the overall sentiment regarding each entity is
identified, together with the sentence-level
sentiment for these entities separately. We show
that the reader{'}s perceived sentiment regarding an
entity often differs from an arithmetic aggregation
of sentiments at the sentence level. Only 70{\%} of
the positive and 55{\%} of the negative entities
receive a correct overall sentiment label when we
aggregate the (human-annotated) sentiment labels for
the sentences where the entity is mentioned. Our
dataset reveals the complexity of entity-specific
sentiment in longer texts, and allows for more
precise modelling and evaluation of such sentiment
expressions.},
internaltype = {workshop},
archiveprefix = {arXiv},
eprint = {2407.03916},
pdf = {https://www.romanklinger.de/publications/RønningstadKlingerVelldalØvrelid_WASSA2024.pdf}
}
@inproceedings{bagdon-etal-2024-expert,
title = {{``}You are an expert annotator{''}: Automatic Best{--}Worst-Scaling Annotations for Emotion Intensity Modeling},
author = {Bagdon, Christopher and
Karmalkar, Prathamesh and
Gurulingappa, Harsha and
Klinger, Roman},
editor = {Duh, Kevin and
Gomez, Helena and
Bethard, Steven},
booktitle = {Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)},
month = jun,
year = {2024},
address = {Mexico City, Mexico},
publisher = {Association for Computational Linguistics},
url = {https://aclanthology.org/2024.naacl-long.439},
pages = {7917--7929},
abstract = {Labeling corpora constitutes a bottleneck to create models for new tasks or domains. Large language models mitigate the issue with automatic corpus labeling methods, particularly for categorical annotations. Some NLP tasks such as emotion intensity prediction, however, require text regression, but there is no work on automating annotations for continuous label assignments. Regression is considered more challenging than classification: The fact that humans perform worse when tasked to choose values from a rating scale lead to comparative annotation methods, including best{--}worst scaling. This raises the question if large language model-based annotation methods show similar patterns, namely that they perform worse on rating scale annotation tasks than on comparative annotation tasks. To study this, we automate emotion intensity predictions and compare direct rating scale predictions, pairwise comparisons and best{--}worst scaling. We find that the latter shows the highest reliability. A transformer regressor fine-tuned on these data performs nearly on par with a model trained on the original manual annotations.},
internaltype = {conferenceproc},
url = {https://www.romanklinger.de/publications/BagdonNAACL2024.pdf}
}
@inproceedings{Wuehrl2024b,
title = {Understanding Fine-grained Distortions in Reports of Scientific Findings},
author = {Wuehrl, Amelie and
Wright, Dustin and
Klinger, Roman and
Augenstein, Isabelle},
editor = {Ku, Lun-Wei and
Martins, Andre and
Srikumar, Vivek},
booktitle = {Findings of the Association for Computational Linguistics ACL 2024},
month = aug,
year = {2024},
address = {Bangkok, Thailand and virtual meeting},
publisher = {Association for Computational Linguistics},
url = {https://aclanthology.org/2024.findings-acl.369},
pages = {6175--6191},
abstract = {Distorted science communication harms individuals and society as it can lead to unhealthy behavior change and decrease trust in scientific institutions. Given the rapidly increasing volume of science communication in recent years, a fine-grained understanding of how findings from scientific publications are reported to the general public, and methods to detect distortions from the original work automatically, are crucial. Prior work focused on individual aspects of distortions or worked with unpaired data. In this work, we make three foundational contributions towards addressing this problem: (1) annotating 1,600 instances of scientific findings from academic papers paired with corresponding findings as reported in news articles and tweets wrt. four characteristics: causality, certainty, generality and sensationalism; (2) establishing baselines for automatically detecting these characteristics; and (3) analyzing the prevalence of changes in these characteristics in both human-annotated and large-scale unlabeled data. Our results show that scientific findings frequently undergo subtle distortions when reported. Tweets distort findings more often than science news reports. Detecting fine-grained distortions automatically poses a challenging task. In our experiments, fine-tuned task-specific models consistently outperform few-shot LLM prompting.},
pdf = {https://www.romanklinger.de/publications/WuehrlEtAlACLFindings2024.pdf},
archiveprefix = {arXiv},
eprint = {2402.12431},
internaltype = {conferenceproc}
}
@inproceedings{Wemmer2024,
title = {{E}mo{P}rogress: Cumulated Emotion Progression
Analysis in Dreams and Customer Service Dialogues},
author = {Wemmer, Eileen and Labat, Sofie and Klinger, Roman},
editor = {Calzolari, Nicoletta and Kan, Min-Yen and Hoste,
Veronique and Lenci, Alessandro and Sakti, Sakriani
and Xue, Nianwen},
booktitle = {Proceedings of the 2024 Joint International
Conference on Computational Linguistics, Language
Resources and Evaluation (LREC-COLING 2024)},
month = may,
year = {2024},
address = {Torino, Italy},
publisher = {ELRA and ICCL},
url = {https://aclanthology.org/2024.lrec-main.503},
pages = {5660--5677},
pdf = {https://www.romanklinger.de/publications/WemmerLabatKlingerLRECCOLING2024.pdf},
internaltype = {conferenceproc}
}
@inproceedings{Velutharambath2024,
title = {Can Factual Statements Be Deceptive? The
{D}e{F}a{B}el Corpus of Belief-based Deception},
author = {Velutharambath, Aswathy and W{\"u}hrl, Amelie and
Klinger, Roman},
editor = {Calzolari, Nicoletta and Kan, Min-Yen and Hoste,
Veronique and Lenci, Alessandro and Sakti, Sakriani
and Xue, Nianwen},
booktitle = {Proceedings of the 2024 Joint International
Conference on Computational Linguistics, Language
Resources and Evaluation (LREC-COLING 2024)},
month = may,
year = {2024},
address = {Torino, Italy},
publisher = {ELRA and ICCL},
url = {https://aclanthology.org/2024.lrec-main.243},
pages = {2708--2723},
internaltype = {conferenceproc},
pdf = {https://www.romanklinger.de/publications/VelutharambathWuehrlKlinger-LREC-COLING2024.pdf},
eprint = {2403.10185},
archiveprefix = {arXiv},
primaryclass = {cs.CL}
}
@inproceedings{Barreiss20242,
author = {Barei\ss{}, Patrick and Klinger, Roman and Barnes,
Jeremy},
title = {English Prompts are Better for {NLI}-based Zero-Shot
Emotion Classification than Target-Language Prompts},
year = {2024},
isbn = {9798400701726},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3589335.3651902},
doi = {10.1145/3589335.3651902},
abstract = {Emotion classification in text is a challenging task
due to the processes involved when interpreting a
textual description of a potential emotion
stimulus. In addition, the set of emotion categories
is highly domain-specific. For instance, literature
analysis might require the use of aesthetic emotions
(e.g., finding something beautiful), and social
media analysis could benefit from fine-grained sets
(e.g., separating anger from annoyance) than only
those that represent basic categories as they have
been proposed by Paul Ekman (anger, disgust, fear,
joy, surprise, sadness). This renders the task an
interesting field for zero-shot classifications, in
which the label set is not known at model
development time. Unfortunately, most resources for
emotion analysis are English, and therefore, most
studies on emotion analysis have been performed in
English, including those that involve prompting
language models for text labels. This leaves us with
a research gap that we address in this paper: In
which language should we prompt for emotion labels
on non-English texts? This is particularly of
interest when we have access to a multilingual large
language model, because we could request labels with
English prompts even for non-English data. Our
experiments with natural language inference-based
language models show that it is consistently better
to use English prompts even if the data is in a
different language.},
booktitle = {Companion Proceedings of the ACM on Web Conference
2024},
pages = {1318–1326},
numpages = {9},
location = {Singapore, Singapore},
series = {WWW '24},
internaltype = {workshop}
}
@inproceedings{wegge-klinger-2024-topic,
title = {Topic Bias in Emotion Classification},
author = {Wegge, Maximilian and Klinger, Roman},
editor = {van der Goot, Rob and Bak, JinYeong and
M{\"u}ller-Eberstein, Max and Xu, Wei and Ritter,
Alan and Baldwin, Tim},
booktitle = {Proceedings of the Ninth Workshop on Noisy and
User-generated Text (W-NUT 2024)},
month = mar,
year = {2024},
address = {San {\.G}iljan, Malta},
publisher = {Association for Computational Linguistics},
url = {https://aclanthology.org/2024.wnut-1.9},
pages = {89--103},
abstract = {Emotion corpora are typically sampled based on
keyword/hashtag search or by asking study
participants to generate textual instances. In any
case, these corpora are not uniform samples
representing the entirety of a domain. We
hypothesize that this practice of data acquision
leads to unrealistic correlations between
overrepresented topics in these corpora that harm
the generalizability of models. Such topic bias
could lead to wrong predictions for instances like
{``}I organized the service for my aunt{'}s
funeral.{''} when funeral events are overpresented
for instances labeled with sadness, despite the
emotion of pride being more appropriate here. In
this paper, we study this topic bias both from the
data and the modeling perspective. We first label a
set of emotion corpora automatically via topic
modeling and show that emotions in fact correlate
with specific topics. Further, we see that emotion
classifiers are confounded by such topics. Finally,
we show that the established debiasing method of
adversarial correction via gradient reversal
mitigates the issue. Our work points out issues with
existing emotion corpora and that more
representative resources are required for fair
evaluation of models predicting affective concepts
from text.},
internaltype = {workshop},
pdf = {https://www.romanklinger.de/publications/WeggeKlinger2024.pdf},
eprint = {2312.09043},
archiveprefix = {arXiv},
primaryclass = {cs.CL}
}
@inproceedings{wuehrl-etal-2024-makes,
title = {What Makes Medical Claims (Un)Verifiable? Analyzing
Entity and Relation Properties for Fact
Verification},
author = {Wührl, Amelie and Menchaca Resendiz, Yarik and
Grimminger, Lara and Klinger, Roman},
editor = {Graham, Yvette and Purver, Matthew},
booktitle = {Proceedings of the 18th Conference of the European
Chapter of the Association for Computational
Linguistics (Volume 1: Long Papers)},
month = mar,
year = {2024},
address = {St. Julian{'}s, Malta},
publisher = {Association for Computational Linguistics},
url = {https://aclanthology.org/2024.eacl-long.124},
pages = {2046--2058},
abstract = {Verifying biomedical claims fails if no evidence can
be discovered. In these cases, the fact-checking
verdict remains unknown and the claim is
unverifiable. To improve this situation, we have to
understand if there are any claim properties that
impact its verifiability. In this work we assume
that entities and relations define the core
variables in a biomedical claim{'}s anatomy and
analyze if their properties help us to differentiate
verifiable from unverifiable claims. In a study with
trained annotation experts we prompt them to find
evidence for biomedical claims, and observe how they
refine search queries for their evidence
search. This leads to the first corpus for
scientific fact verification annotated with
subject{--}relation{--}object triplets, evidence
documents, and fact-checking verdicts (the BEAR-FACT
corpus). We find (1) that discovering evidence for
negated claims (e.g., X{--}does-not-cause{--}Y) is
particularly challenging. Further, we see that
annotators process queries mostly by adding
constraints to the search and by normalizing
entities to canonical names. (2) We compare our
in-house annotations with a small crowdsourcing
setting where we employ both medical experts and
laypeople. We find that domain expertise does not
have a substantial effect on the reliability of
annotations. Finally, (3), we demonstrate that it is
possible to reliably estimate the success of
evidence retrieval purely from the claim text
(.82F$_1$), whereas identifying unverifiable claims
proves more challenging (.27F$_1$)},
pdf = {https://www.romanklinger.de/publications/Wuehrl-etal-2024-EACL.pdf},
eprint = {2402.01360},
archiveprefix = {arXiv},
primaryclass = {cs.CL},
internaltype = {conferenceproc}
}