klinger.bib

@inproceedings{Wegge2022,
  title = {Experiencer-Specific Emotion and Appraisal
                  Prediction},
  author = {Maximilian Wegge and Enrica Troiano and Laura
                  Oberl\"ander and Roman Klinger},
  booktitle = {Proceedings of the Fifth Workshop on Natural
                  Language Processing and Computational Social
                  Science},
  month = dec,
  year = {2022},
  address = {Abu Dhabi, United Arab Emirates and online},
  publisher = {Association for Computational Linguistics},
  note = {accepted},
  pdf = {https://www.romanklinger.de/publications/WeggeTroianoOberlaenderKlinger.pdf},
  url = {https://arxiv.org/abs/2210.12078},
  internaltype = {workshop}
}
@inproceedings{Wuehrl2022,
  title = {Entity-based Claim Representation Improves
                  Fact-Checking of Medical Content in Tweets},
  author = {W{\"u}hrl, Amelie and Klinger, Roman},
  booktitle = {Proceedings of the 9th Workshop on Argument Mining},
  month = oct,
  year = {2022},
  address = {Online and in Gyeongju, Republic of Korea},
  publisher = {International Conference on Computational
                  Linguistics},
  url = {https://aclanthology.org/2022.argmining-1.18},
  pdf = {https://www.romanklinger.de/publications/WuehrlKlinger_Argmining2022.pdf},
  url = {https://arxiv.org/abs/2209.07834},
  pages = {187--198},
  abstract = {False medical information on social media poses harm
                  to people{'}s health. While the need for biomedical
                  fact-checking has been recognized in recent years,
                  user-generated medical content has received
                  comparably little attention. At the same time,
                  models for other text genres might not be reusable,
                  because the claims they have been trained with are
                  substantially different. For instance, claims in the
                  SciFact dataset are short and focused: {``}Side
                  effects associated with antidepressants increases
                  risk of stroke{''}. In contrast, social media holds
                  naturally-occurring claims, often embedded in
                  additional context: ''{`}If you take antidepressants
                  like SSRIs, you could be at risk of a condition
                  called serotonin syndrome{'} Serotonin syndrome
                  nearly killed me in 2010. Had symptoms of stroke and
                  seizure.{''} This showcases the mismatch between
                  real-world medical claims and the input that
                  existing fact-checking systems expect. To make
                  user-generated content checkable by existing models,
                  we propose to reformulate the social-media input in
                  such a way that the resulting claim mimics the claim
                  characteristics in established datasets. To
                  accomplish this, our method condenses the claim with
                  the help of relational entity information and either
                  compiles the claim out of an entity-relation-entity
                  triple or extracts the shortest phrase that contains
                  these elements. We show that the reformulated input
                  improves the performance of various fact-checking
                  models as opposed to checking the tweet text in its
                  entirety.},
  internaltype = {workshop},
  note = {###run###}
}
@inproceedings{Plazadelarco2022,
  title = {Natural Language Inference Prompts for Zero-shot
                  Emotion Classification in Text across Corpora},
  author = {Plaza-del-Arco, Flor Miriam and
                  Mart{\'\i}n-Valdivia, Mar{\'\i}a-Teresa and Klinger,
                  Roman},
  booktitle = {Proceedings of the 29th International Conference on
                  Computational Linguistics},
  month = oct,
  year = {2022},
  address = {Gyeongju, Republic of Korea},
  publisher = {International Committee on Computational
                  Linguistics},
  url = {https://aclanthology.org/2022.coling-1.592},
  pdf = {https://www.romanklinger.de/publications/PlazaDelArcoMartinValdiviaKlinger.pdf},
  url = {https://arxiv.org/abs/2209.06701},
  pages = {6805--6817},
  abstract = {Within textual emotion classification, the set of
                  relevant labels depends on the domain and
                  application scenario and might not be known at the
                  time of model development. This conflicts with the
                  classical paradigm of supervised learning in which
                  the labels need to be predefined. A solution to
                  obtain a model with a flexible set of labels is to
                  use the paradigm of zero-shot learning as a natural
                  language inference task, which in addition adds the
                  advantage of not needing any labeled training
                  data. This raises the question how to prompt a
                  natural language inference model for zero-shot
                  learning emotion classification. Options for prompt
                  formulations include the emotion name anger alone or
                  the statement {``}This text expresses
                  anger{''}. With this paper, we analyze how sensitive
                  a natural language inference-based
                  zero-shot-learning classifier is to such changes to
                  the prompt under consideration of the corpus: How
                  carefully does the prompt need to be selected? We
                  perform experiments on an established set of emotion
                  datasets presenting different language registers
                  according to different sources (tweets, events,
                  blogs) with three natural language inference models
                  and show that indeed the choice of a particular
                  prompt formulation needs to fit to the corpus. We
                  show that this challenge can be tackled with
                  combinations of multiple prompts. Such ensemble is
                  more robust across corpora than individual prompts
                  and shows nearly the same performance as the
                  individual best prompt for a particular corpus.},
  internaltype = {conf}
}
@inproceedings{mohr-whrl-klinger:2022:LREC,
  author = {Mohr, Isabelle  and W\"uhrl, Amelie  and  Klinger, Roman},
  title = {CoVERT: A Corpus of Fact-checked Biomedical COVID-19 Tweets},
  booktitle = {Proceedings of the Language Resources and Evaluation Conference},
  month = {June},
  year = {2022},
  address = {Marseille, France},
  publisher = {European Language Resources Association},
  pages = {244--257},
  abstract = {During the first two years of the COVID-19 pandemic, large volumes of biomedical information concerning this new disease have been published on social media. Some of this information can pose a real danger, particularly when false information is shared, for instance recommendations how to treat diseases without professional medical advice. Therefore, automatic fact-checking resources and systems developed specifically for medical domain are crucial. While existing fact-checking resources cover COVID-19 related information in news or quantify the amount of misinformation in tweets, there is no dataset providing fact-checked COVID-19 related Twitter posts with detailed annotations for biomedical entities, relations and relevant evidence. We contribute CoVERT, a fact-checked corpus of tweets with a focus on the domain of biomedicine and COVID-19 related (mis)information. The corpus consists of 300 tweets, each annotated with named entities and relations. We employ a novel crowdsourcing methodology to annotate all tweets with fact-checking labels and supporting evidence, which crowdworkers search for online. This methodology results in substantial inter-annotator agreement. Furthermore, we use the retrieved evidence extracts as part of a fact-checking pipeline, finding that the real-world evidence is more useful than the knowledge directly available in pretrained language models.},
  url = {https://aclanthology.org/2022.lrec-1.26},
  internaltype = {conf},
  pdf = {http://www.lrec-conf.org/proceedings/lrec2022/pdf/2022.lrec-1.26.pdf}
}
@inproceedings{whrl-klinger:2022:LREC,
  author = {W\"uhrl, Amelie  and  Klinger, Roman},
  title = {Recovering Patient Journeys: A Corpus of Biomedical Entities and Relations on Twitter (BEAR)},
  booktitle = {Proceedings of the Language Resources and Evaluation Conference},
  month = {June},
  year = {2022},
  address = {Marseille, France},
  publisher = {European Language Resources Association},
  pages = {4439--4450},
  abstract = {Text mining and information extraction for the medical domain has focused on scientific text generated by researchers. However, their access to individual patient experiences or patient-doctor interactions is limited. On social media, doctors, patients and their relatives also discuss medical information. Individual information provided by laypeople complements the knowledge available in scientific text. It reflects the patient's journey making the value of this type of data twofold: It offers direct access to people's perspectives, and it might cover information that is not available elsewhere, including self-treatment or self-diagnose. Named entity recognition and relation extraction are methods to structure information that is available in unstructured text. However, existing medical social media corpora focused on a comparably small set of entities and relations. In contrast, we provide rich annotation layers to model patients' experiences in detail. The corpus consists of medical tweets annotated with a fine-grained set of medical entities and relations between them, namely 14 entity (incl. environmental factors, diagnostics, biochemical processes, patients' quality-of-life descriptions, pathogens, medical conditions, and treatments) and 20 relation classes (incl. prevents, influences, interactions, causes). The dataset consists of 2,100 tweets with approx. 6,000 entities and 2,200 relations.},
  url = {https://aclanthology.org/2022.lrec-1.472},
  internaltype = {conf},
  pdf = {http://www.lrec-conf.org/proceedings/lrec2022/pdf/2022.lrec-1.472.pdf}
}
@inproceedings{troiano-EtAl:2022:LREC,
  author = {Troiano, Enrica  and  Oberlaender, Laura Ana Maria  and  Wegge, Maximilian  and  Klinger, Roman},
  title = {x-enVENT: A Corpus of Event Descriptions with Experiencer-specific Emotion and Appraisal Annotations},
  booktitle = {Proceedings of the Language Resources and Evaluation Conference},
  month = {June},
  year = {2022},
  address = {Marseille, France},
  publisher = {European Language Resources Association},
  pages = {1365--1375},
  abstract = {Emotion classification is often formulated as the task to categorize texts into a predefined set of emotion classes. So far, this task has been the recognition of the emotion of writers and readers, as well as that of entities mentioned in the text. We argue that a classification setup for emotion analysis should be performed in an integrated manner, including the different semantic roles that participate in an emotion episode. Based on appraisal theories in psychology, which treat emotions as reactions to events, we compile an English corpus of written event descriptions. The descriptions depict emotion-eliciting circumstances, and they contain mentions of people who responded emotionally. We annotate all experiencers, including the original author, with the emotions they likely felt. In addition, we link them to the event they found salient (which can be different for different experiencers in a text) by annotating event properties, or appraisals (e.g., the perceived event undesirability, the uncertainty of its outcome). Our analysis reveals patterns in the co-occurrence of people’s emotions in interaction. Hence, this richly-annotated resource provides useful data to study emotions and event evaluations from the perspective of different roles, and it enables the development of experiencer-specific emotion and appraisal classification systems.},
  url = {https://aclanthology.org/2022.lrec-1.146},
  internaltype = {conf},
  pdf = {http://www.lrec-conf.org/proceedings/lrec2022/pdf/2022.lrec-1.146.pdf}
}
@inproceedings{Sabbatino2022,
  title = {{``}splink{''} is happy and {``}phrouth{''} is scary: Emotion Intensity Analysis for Nonsense Words},
  author = {Sabbatino, Valentino  and
      Troiano, Enrica  and
      Schweitzer, Antje  and
      Klinger, Roman},
  booktitle = {Proceedings of the 12th Workshop on Computational Approaches to Subjectivity, Sentiment {\&} Social Media Analysis},
  month = may,
  year = {2022},
  address = {Dublin, Ireland},
  publisher = {Association for Computational Linguistics},
  url = {https://aclanthology.org/2022.wassa-1.4},
  pages = {37--50},
  internaltype = {workshop}
}
@inproceedings{Kadikis2022,
  title = {Embarrassingly Simple Performance Prediction for Abductive Natural Language Inference},
  author = {Kadi{\c{k}}is, Em{\=\i}ls  and
      Srivastav, Vaibhav  and
      Klinger, Roman},
  booktitle = {Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
  month = jul,
  year = {2022},
  address = {Seattle, United States},
  publisher = {Association for Computational Linguistics},
  url = {https://aclanthology.org/2022.naacl-main.441},
  pages = {6031--6037},
  abstract = {The task of natural language inference (NLI), to decide if a hypothesis entails or contradicts a premise, received considerable attention in recent years. All competitive systems build on top of contextualized representations and make use of transformer architectures for learning an NLI model. When somebody is faced with a particular NLI task, they need to select the best model that is available. This is a time-consuming and resource-intense endeavour. To solve this practical problem, we propose a simple method for predicting the performance without actually fine-tuning the model. We do this by testing how well the pre-trained models perform on the aNLI task when just comparing sentence embeddings with cosine similarity to what kind of performance is achieved when training a classifier on top of these embeddings. We show that the accuracy of the cosine similarity approach correlates strongly with the accuracy of the classification approach with a Pearson correlation coefficient of 0.65. Since the similarity is orders of magnitude faster to compute on a given dataset (less than a minute vs. hours), our method can lead to significant time savings in the process of model selection.},
  internaltype = {conf}
}
@inproceedings{Kreuter2022,
  title = {Items from Psychometric Tests as Training Data for Personality Profiling Models of {T}witter Users},
  author = {Kreuter, Anne  and
      Sassenberg, Kai  and
      Klinger, Roman},
  booktitle = {Proceedings of the 12th Workshop on Computational Approaches to Subjectivity, Sentiment {\&} Social Media Analysis},
  month = may,
  year = {2022},
  address = {Dublin, Ireland},
  publisher = {Association for Computational Linguistics},
  url = {https://aclanthology.org/2022.wassa-1.35},
  pages = {315--323},
  internaltype = {workshop}
}
@inproceedings{Khlyzova2022,
  title = {On the Complementarity of Images and Text for the Expression of Emotions in Social Media},
  author = {Khlyzova, Anna  and
      Silberer, Carina  and
      Klinger, Roman},
  booktitle = {Proceedings of the 12th Workshop on Computational Approaches to Subjectivity, Sentiment {\&} Social Media Analysis},
  month = may,
  year = {2022},
  address = {Dublin, Ireland},
  publisher = {Association for Computational Linguistics},
  url = {https://aclanthology.org/2022.wassa-1.1},
  pages = {1--15},
  internaltype = {workshop}
}
@inproceedings{Papay2022,
  title = {Constraining Linear-chain {CRF}s to Regular Languages},
  author = {Sean Papay and Roman Klinger and Sebastian Pado},
  booktitle = {International Conference on Learning Representations},
  year = {2022},
  url = {https://openreview.net/forum?id=jbrgwbv8nD},
  url = {https://arxiv.org/abs/2106.07306},
  internaltype = {conf}
}
@article{troiano2022theories,
  title = {From theories on styles to their transfer in text:
                  Bridging the gap with a hierarchical survey},
  doi = {10.1017/S1351324922000407},
  journal = {Natural Language Engineering},
  publisher = {Cambridge University Press},
  author = {Troiano, Enrica and Velutharambath, Aswathy and
                  Klinger, Roman},
  year = {2022},
  pages = {1–60},
  url = {https://arxiv.org/abs/2110.15871},
  internaltype = {journal}
}
@proceedings{wassa-2022-approaches,
  title = {Proceedings of the 12th Workshop on Computational Approaches to Subjectivity, Sentiment {\&} Social Media Analysis},
  editor = {Barnes, Jeremy  and
      De Clercq, Orph{\'e}e  and
      Barriere, Valentin  and
      Tafreshi, Shabnam  and
      Alqahtani, Sawsan  and
      Sedoc, Jo{\~a}o  and
      Klinger, Roman  and
      Balahur, Alexandra},
  month = may,
  year = {2022},
  address = {Dublin, Ireland},
  publisher = {Association for Computational Linguistics},
  url = {https://aclanthology.org/2022.wassa-1.0},
  internaltype = {edited}
}