klinger.bib

@inproceedings{Wegge2022,
  title = {Experiencer-Specific Emotion and Appraisal
                  Prediction},
  author = {Wegge, Maximilian and Troiano, Enrica and
                  Oberlaender, Laura Ana Maria and Klinger, Roman},
  booktitle = {Proceedings of the Fifth Workshop on Natural
                  Language Processing and Computational Social Science
                  (NLP+CSS)},
  month = nov,
  year = {2022},
  address = {Abu Dhabi, UAE},
  publisher = {Association for Computational Linguistics},
  url = {https://aclanthology.org/2022.nlpcss-1.3},
  pages = {25--32},
  abstract = {Emotion classification in NLP assigns emotions to
                  texts, such as sentences or paragraphs. With texts
                  like {``}I felt guilty when he cried{''}, focusing
                  on the sentence level disregards the standpoint of
                  each participant in the situation: the writer
                  ({``}I{''}) and the other entity ({``}he{''}) could
                  in fact have different affective states. The
                  emotions of different entities have been considered
                  only partially in emotion semantic role labeling, a
                  task that relates semantic roles to emotion cue
                  words. Proposing a related task, we narrow the focus
                  on the experiencers of events, and assign an emotion
                  (if any holds) to each of them. To this end, we
                  represent each emotion both categorically and with
                  appraisal variables, as a psychological access to
                  explaining why a person develops a particular
                  emotion. On an event description corpus, our
                  experiencer-aware models of emotions and appraisals
                  outperform the experiencer-agnostic baselines,
                  showing that disregarding event participants is an
                  oversimplification for the emotion detection task.},
  internaltype = {workshop},
  archiveprefix = {arXiv},
  eprint = {2210.12078}
}

@inproceedings{Wuehrl2022,
  title = {Entity-based Claim Representation Improves
                  Fact-Checking of Medical Content in Tweets},
  author = {W{\"u}hrl, Amelie and Klinger, Roman},
  booktitle = {Proceedings of the 9th Workshop on Argument Mining},
  month = oct,
  year = {2022},
  address = {Online and in Gyeongju, Republic of Korea},
  publisher = {International Conference on Computational
                  Linguistics},
  url = {https://aclanthology.org/2022.argmining-1.18},
  pdf = {https://www.romanklinger.de/publications/WuehrlKlinger_Argmining2022.pdf},
  archiveprefix = {arXiv},
  eprint = {2209.07834},
  pages = {187--198},
  abstract = {False medical information on social media poses harm
                  to people{'}s health. While the need for biomedical
                  fact-checking has been recognized in recent years,
                  user-generated medical content has received
                  comparably little attention. At the same time,
                  models for other text genres might not be reusable,
                  because the claims they have been trained with are
                  substantially different. For instance, claims in the
                  SciFact dataset are short and focused: {``}Side
                  effects associated with antidepressants increases
                  risk of stroke{''}. In contrast, social media holds
                  naturally-occurring claims, often embedded in
                  additional context: ''{`}If you take antidepressants
                  like SSRIs, you could be at risk of a condition
                  called serotonin syndrome{'} Serotonin syndrome
                  nearly killed me in 2010. Had symptoms of stroke and
                  seizure.{''} This showcases the mismatch between
                  real-world medical claims and the input that
                  existing fact-checking systems expect. To make
                  user-generated content checkable by existing models,
                  we propose to reformulate the social-media input in
                  such a way that the resulting claim mimics the claim
                  characteristics in established datasets. To
                  accomplish this, our method condenses the claim with
                  the help of relational entity information and either
                  compiles the claim out of an entity-relation-entity
                  triple or extracts the shortest phrase that contains
                  these elements. We show that the reformulated input
                  improves the performance of various fact-checking
                  models as opposed to checking the tweet text in its
                  entirety.},
  internaltype = {workshop},
  note = {###run###}
}

@inproceedings{Plazadelarco2022,
  title = {Natural Language Inference Prompts for Zero-shot
                  Emotion Classification in Text across Corpora},
  author = {Plaza-del-Arco, Flor Miriam and
                  Mart{\'\i}n-Valdivia, Mar{\'\i}a-Teresa and Klinger,
                  Roman},
  booktitle = {Proceedings of the 29th International Conference on
                  Computational Linguistics},
  month = oct,
  year = {2022},
  address = {Gyeongju, Republic of Korea},
  publisher = {International Committee on Computational
                  Linguistics},
  url = {https://aclanthology.org/2022.coling-1.592},
  pdf = {https://www.romanklinger.de/publications/PlazaDelArcoMartinValdiviaKlinger.pdf},
  archiveprefix = {arXiv},
  eprint = {2209.06701},
  pages = {6805--6817},
  abstract = {Within textual emotion classification, the set of
                  relevant labels depends on the domain and
                  application scenario and might not be known at the
                  time of model development. This conflicts with the
                  classical paradigm of supervised learning in which
                  the labels need to be predefined. A solution to
                  obtain a model with a flexible set of labels is to
                  use the paradigm of zero-shot learning as a natural
                  language inference task, which in addition adds the
                  advantage of not needing any labeled training
                  data. This raises the question how to prompt a
                  natural language inference model for zero-shot
                  learning emotion classification. Options for prompt
                  formulations include the emotion name anger alone or
                  the statement {``}This text expresses
                  anger{''}. With this paper, we analyze how sensitive
                  a natural language inference-based
                  zero-shot-learning classifier is to such changes to
                  the prompt under consideration of the corpus: How
                  carefully does the prompt need to be selected? We
                  perform experiments on an established set of emotion
                  datasets presenting different language registers
                  according to different sources (tweets, events,
                  blogs) with three natural language inference models
                  and show that indeed the choice of a particular
                  prompt formulation needs to fit to the corpus. We
                  show that this challenge can be tackled with
                  combinations of multiple prompts. Such ensemble is
                  more robust across corpora than individual prompts
                  and shows nearly the same performance as the
                  individual best prompt for a particular corpus.},
  internaltype = {conferenceproc}
}

@inproceedings{mohr-whrl-klinger:2022:LREC,
  author = {Mohr, Isabelle and W\"uhrl, Amelie and Klinger,
                  Roman},
  title = {CoVERT: A Corpus of Fact-checked Biomedical COVID-19
                  Tweets},
  booktitle = {Proceedings of the Language Resources and Evaluation
                  Conference},
  month = {June},
  year = {2022},
  address = {Marseille, France},
  publisher = {European Language Resources Association},
  pages = {244--257},
  abstract = {During the first two years of the COVID-19 pandemic,
                  large volumes of biomedical information concerning
                  this new disease have been published on social
                  media. Some of this information can pose a real
                  danger, particularly when false information is
                  shared, for instance recommendations how to treat
                  diseases without professional medical
                  advice. Therefore, automatic fact-checking resources
                  and systems developed specifically for medical
                  domain are crucial. While existing fact-checking
                  resources cover COVID-19 related information in news
                  or quantify the amount of misinformation in tweets,
                  there is no dataset providing fact-checked COVID-19
                  related Twitter posts with detailed annotations for
                  biomedical entities, relations and relevant
                  evidence. We contribute CoVERT, a fact-checked
                  corpus of tweets with a focus on the domain of
                  biomedicine and COVID-19 related
                  (mis)information. The corpus consists of 300 tweets,
                  each annotated with named entities and relations. We
                  employ a novel crowdsourcing methodology to annotate
                  all tweets with fact-checking labels and supporting
                  evidence, which crowdworkers search for online. This
                  methodology results in substantial inter-annotator
                  agreement. Furthermore, we use the retrieved
                  evidence extracts as part of a fact-checking
                  pipeline, finding that the real-world evidence is
                  more useful than the knowledge directly available in
                  pretrained language models.},
  url = {https://aclanthology.org/2022.lrec-1.26},
  internaltype = {conferenceproc},
  pdf = {http://www.lrec-conf.org/proceedings/lrec2022/pdf/2022.lrec-1.26.pdf},
  archiveprefix = {arXiv},
  eprint = {2204.12164}
}

@inproceedings{whrl-klinger:2022:LREC,
  author = {W\"uhrl, Amelie and Klinger, Roman},
  title = {Recovering Patient Journeys: A Corpus of Biomedical
                  Entities and Relations on Twitter (BEAR)},
  booktitle = {Proceedings of the Language Resources and Evaluation
                  Conference},
  month = {June},
  year = {2022},
  address = {Marseille, France},
  publisher = {European Language Resources Association},
  pages = {4439--4450},
  abstract = {Text mining and information extraction for the
                  medical domain has focused on scientific text
                  generated by researchers. However, their access to
                  individual patient experiences or patient-doctor
                  interactions is limited. On social media, doctors,
                  patients and their relatives also discuss medical
                  information. Individual information provided by
                  laypeople complements the knowledge available in
                  scientific text. It reflects the patient's journey
                  making the value of this type of data twofold: It
                  offers direct access to people's perspectives, and
                  it might cover information that is not available
                  elsewhere, including self-treatment or
                  self-diagnose. Named entity recognition and relation
                  extraction are methods to structure information that
                  is available in unstructured text. However, existing
                  medical social media corpora focused on a comparably
                  small set of entities and relations. In contrast, we
                  provide rich annotation layers to model patients'
                  experiences in detail. The corpus consists of
                  medical tweets annotated with a fine-grained set of
                  medical entities and relations between them, namely
                  14 entity (incl. environmental factors, diagnostics,
                  biochemical processes, patients' quality-of-life
                  descriptions, pathogens, medical conditions, and
                  treatments) and 20 relation classes (incl. prevents,
                  influences, interactions, causes). The dataset
                  consists of 2,100 tweets with approx. 6,000 entities
                  and 2,200 relations.},
  url = {https://aclanthology.org/2022.lrec-1.472},
  internaltype = {conferenceproc},
  pdf = {http://www.lrec-conf.org/proceedings/lrec2022/pdf/2022.lrec-1.472.pdf},
  archiveprefix = {arXiv},
  eprint = {2204.09952}
}

@inproceedings{troiano-EtAl:2022:LREC,
  author = {Troiano, Enrica  and  Oberlaender, Laura Ana Maria  and  Wegge, Maximilian  and  Klinger, Roman},
  title = {x-enVENT: A Corpus of Event Descriptions with Experiencer-specific Emotion and Appraisal Annotations},
  booktitle = {Proceedings of the Language Resources and Evaluation Conference},
  month = {June},
  year = {2022},
  address = {Marseille, France},
  publisher = {European Language Resources Association},
  pages = {1365--1375},
  abstract = {Emotion classification is often formulated as the task to categorize texts into a predefined set of emotion classes. So far, this task has been the recognition of the emotion of writers and readers, as well as that of entities mentioned in the text. We argue that a classification setup for emotion analysis should be performed in an integrated manner, including the different semantic roles that participate in an emotion episode. Based on appraisal theories in psychology, which treat emotions as reactions to events, we compile an English corpus of written event descriptions. The descriptions depict emotion-eliciting circumstances, and they contain mentions of people who responded emotionally. We annotate all experiencers, including the original author, with the emotions they likely felt. In addition, we link them to the event they found salient (which can be different for different experiencers in a text) by annotating event properties, or appraisals (e.g., the perceived event undesirability, the uncertainty of its outcome). Our analysis reveals patterns in the co-occurrence of peopleâ€™s emotions in interaction. Hence, this richly-annotated resource provides useful data to study emotions and event evaluations from the perspective of different roles, and it enables the development of experiencer-specific emotion and appraisal classification systems.},
  url = {https://aclanthology.org/2022.lrec-1.146},
  internaltype = {conferenceproc},
  pdf = {http://www.lrec-conf.org/proceedings/lrec2022/pdf/2022.lrec-1.146.pdf},
  archiveprefix = {arXiv},
  eprint = {2203.10909}
}

@inproceedings{Sabbatino2022,
  title = {{``}splink{''} is happy and {``}phrouth{''} is
                  scary: Emotion Intensity Analysis for Nonsense
                  Words},
  author = {Sabbatino, Valentino and Troiano, Enrica and
                  Schweitzer, Antje and Klinger, Roman},
  booktitle = {Proceedings of the 12th Workshop on Computational
                  Approaches to Subjectivity, Sentiment {\&} Social
                  Media Analysis},
  month = may,
  year = {2022},
  address = {Dublin, Ireland},
  publisher = {Association for Computational Linguistics},
  url = {https://aclanthology.org/2022.wassa-1.4},
  pages = {37--50},
  internaltype = {workshop},
  archiveprefix = {arXiv},
  eprint = {2202.12132}
}

@inproceedings{Kadikis2022,
  title = {Embarrassingly Simple Performance Prediction for
                  Abductive Natural Language Inference},
  author = {Kadi{\c{k}}is, Em{\=\i}ls and Srivastav, Vaibhav and
                  Klinger, Roman},
  booktitle = {Proceedings of the 2022 Conference of the North
                  American Chapter of the Association for
                  Computational Linguistics: Human Language
                  Technologies},
  month = jul,
  year = {2022},
  address = {Seattle, United States},
  publisher = {Association for Computational Linguistics},
  url = {https://aclanthology.org/2022.naacl-main.441},
  pages = {6031--6037},
  abstract = {The task of natural language inference (NLI), to
                  decide if a hypothesis entails or contradicts a
                  premise, received considerable attention in recent
                  years. All competitive systems build on top of
                  contextualized representations and make use of
                  transformer architectures for learning an NLI
                  model. When somebody is faced with a particular NLI
                  task, they need to select the best model that is
                  available. This is a time-consuming and
                  resource-intense endeavour. To solve this practical
                  problem, we propose a simple method for predicting
                  the performance without actually fine-tuning the
                  model. We do this by testing how well the
                  pre-trained models perform on the aNLI task when
                  just comparing sentence embeddings with cosine
                  similarity to what kind of performance is achieved
                  when training a classifier on top of these
                  embeddings. We show that the accuracy of the cosine
                  similarity approach correlates strongly with the
                  accuracy of the classification approach with a
                  Pearson correlation coefficient of 0.65. Since the
                  similarity is orders of magnitude faster to compute
                  on a given dataset (less than a minute vs. hours),
                  our method can lead to significant time savings in
                  the process of model selection.},
  internaltype = {conferenceproc},
  archiveprefix = {arXiv},
  eprint = {2202.10408}
}

@inproceedings{Kreuter2022,
  title = {Items from Psychometric Tests as Training Data for
                  Personality Profiling Models of {T}witter Users},
  author = {Kreuter, Anne and Sassenberg, Kai and Klinger,
                  Roman},
  booktitle = {Proceedings of the 12th Workshop on Computational
                  Approaches to Subjectivity, Sentiment {\&} Social
                  Media Analysis},
  month = may,
  year = {2022},
  address = {Dublin, Ireland},
  publisher = {Association for Computational Linguistics},
  url = {https://aclanthology.org/2022.wassa-1.35},
  pages = {315--323},
  internaltype = {workshop},
  archiveprefix = {arXiv},
  eprint = {2202.10415}
}

@inproceedings{Khlyzova2022,
  title = {On the Complementarity of Images and Text for the
                  Expression of Emotions in Social Media},
  author = {Khlyzova, Anna and Silberer, Carina and Klinger,
                  Roman},
  booktitle = {Proceedings of the 12th Workshop on Computational
                  Approaches to Subjectivity, Sentiment {\&} Social
                  Media Analysis},
  month = may,
  year = {2022},
  address = {Dublin, Ireland},
  publisher = {Association for Computational Linguistics},
  url = {https://aclanthology.org/2022.wassa-1.1},
  pages = {1--15},
  internaltype = {workshop},
  archiveprefix = {arXiv},
  eprint = {2202.07427}
}

@inproceedings{Papay2022,
  title = {Constraining Linear-chain {CRF}s to Regular
                  Languages},
  author = {Sean Papay and Roman Klinger and Sebastian Pado},
  booktitle = {International Conference on Learning
                  Representations},
  year = {2022},
  url = {https://openreview.net/forum?id=jbrgwbv8nD},
  archiveprefix = {arXiv},
  eprint = {2106.07306},
  internaltype = {conferenceproc}
}

@article{troiano2022theories,
  title = {From theories on styles to their transfer in text:
                  Bridging the gap with a hierarchical survey},
  doi = {10.1017/S1351324922000407},
  journal = {Natural Language Engineering},
  publisher = {Cambridge University Press},
  author = {Troiano, Enrica and Velutharambath, Aswathy and
                  Klinger, Roman},
  year = {2022},
  pages = {1–60},
  archiveprefix = {arXiv},
  eprint = {2110.15871},
  internaltype = {journal}
}

@proceedings{wassa-2022-approaches,
  title = {Proceedings of the 12th Workshop on Computational Approaches to Subjectivity, Sentiment {\&} Social Media Analysis},
  editor = {Barnes, Jeremy  and
      De Clercq, Orph{\'e}e  and
      Barriere, Valentin  and
      Tafreshi, Shabnam  and
      Alqahtani, Sawsan  and
      Sedoc, Jo{\~a}o  and
      Klinger, Roman  and
      Balahur, Alexandra},
  month = may,
  year = {2022},
  address = {Dublin, Ireland},
  publisher = {Association for Computational Linguistics},
  url = {https://aclanthology.org/2022.wassa-1.0},
  internaltype = {edited}
}