Kennedy, B., Golazizian, P., Trager, J., Atari, M., Hoover, J., Davani, A. M., & Dehghani, M. (2022). The (Moral) Language of Hate. PsyArXiv. https://doi.org/10.31234/osf.io/eqp34
@misc{kennedy_golazizian_trager_atari_hoover_davani_dehghani_2022,
title = {The (Moral) Language of Hate},
url = {psyarxiv.com/eqp34},
doi = {10.31234/osf.io/eqp34},
publisher = {PsyArXiv},
author = {Kennedy, Brendan and Golazizian, Preni and Trager, Jackson and Atari, Mohammad and Hoover, Joseph and Davani, Aida M and Dehghani, Morteza},
year = {2022},
month = sep,
note = {Under review at PNAS Nexus}
}
Davani, A. M., Yeh, L., Atari, M., Kennedy, B., Portillo-Wightman, G., Gonzalez, E., Delong, N., Bhatia, R., Mirinjian, A., Ren, X., & Dehghani, M. (2019). Reporting the Unreported: Event Extraction for Analyzing the Local Representation of Hate Crimes. Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and 9th International Joint Conference on Natural Language Processing (EMNLP-IJCAI).
@inproceedings{davani2019reporting,
title = {Reporting the Unreported: Event Extraction for Analyzing the Local Representation of Hate Crimes},
author = {Davani, Aida Mostafazadeh and Yeh, Leigh and Atari, Mohammad and Kennedy, Brendan and Portillo-Wightman, Gwenyth and Gonzalez, Elaine and Delong, Natalie and Bhatia, Rhea and Mirinjian, Arineh and Ren, Xiang and Dehghani, Morteza},
booktitle = {Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and 9th International Joint Conference on Natural Language Processing (EMNLP-IJCAI)},
year = {2019}
}
Official reports of hate crimes in the US are under-reported relative to the actual number of such incidents. Further, despite statistical approximations, there are no official reports from a large number of US cities regarding incidents of hate. Here, we first demonstrate that event extraction and multi-instance learning, applied to a corpus of local news articles, can be used to predict instances of hate crime. We then use the trained model to detect incidents of hate in cities for which the FBI lacks statistics. Lastly, we train models on predicting homicide and kidnapping, compare the predictions to FBI reports, and establish that incidents of hate are indeed under-reported, compared to other types of crimes, in local press.
Kennedy, B., Ashokkumar, A., Boyd, R., & Dehghani, M. (2022). Text Analysis for Psychology: Methods, Principles, and Practices. In M. Dehghani & R. Boyd (Eds.), The Handbook of Language Analysis in Psychology. Guilford Press. https://doi.org/10.31234/osf.io/h2b8t
@incollection{kennedy2020text,
title = {Text Analysis for Psychology: Methods, Principles, and Practices},
author = {Kennedy, Brendan and Ashokkumar, Ashwini and Boyd, Ryan and Dehghani, Morteza},
booktitle = {{The Handbook of Language Analysis in Psychology}},
publisher = {Guilford Press},
year = {2022},
url = {https://doi.org/10.31234/osf.io/h2b8t},
editor = {Dehghani, Morteza and Boyd, Ryan}
}
Kennedy, B., Atari, M., Mostafazadeh Davani, A., Yeh, L., Omrani, A., Kim, Y., Coombs Jr., K., Havaldar, S., Portillo-Wightman, G., Gonzalez, E., Hoover, J., Azatian, A., Cardenas, G., Hussain, A., Lara, A., Omary, A., Park, C., Wang, X., Wijaya, C., … Dehghani, M. (2022). Introducing the Gab Hate Corpus: Defining and applying hate-based rhetoric to social media posts at scale. Language Resources and Evaluation, 1(16), 79–108. https://doi.org/10.1007/s10579-021-09569-x
@article{kennedy2022introducing,
title = {Introducing the {Gab Hate Corpus}: Defining and applying hate-based rhetoric to social media posts at scale},
volume = {1},
issue = {16},
url = {https://doi.org/10.1007/s10579-021-09569-x},
pages = {79--108},
publisher = {Springer Netherlands},
journal = {Language Resources and Evaluation},
author = {Kennedy, Brendan and Atari, Mohammad and Mostafazadeh Davani, Aida and Yeh, Leigh and Omrani, Ali and Kim, Yehsong and Coombs Jr., Kris and Havaldar, Shreya and Portillo-Wightman, Gwenyth and Gonzalez, Elaine and Hoover, Joe and Azatian, Aida and Cardenas, Gabriel and Hussain, Alyzeh and Lara, Austin and Omary, Adam and Park, Christina and Wang, Xin and Wijaya, Clarisa and Zhang, Yong and Meyerowitz, Beth and Dehghani, Morteza},
year = {2022},
month = feb
}
The growing prominence of online hate speech is a threat to a safe and just society. This endangering phenomenon requires collaboration across the sciences in order to generate evidence-based knowledge of, and policies for, the dissemination of hatred in online spaces. To foster such collaborations, here we present the Gab Hate Corpus (GHC), consisting of 27,665 posts from the social network service gab.ai, each annotated by a minimum of three trained annotators. Annotators were trained to label posts according to a coding typology derived from a synthesis of hate speech definitions across legal, computational, psychological, and sociological research. We detail the development of the corpus, describe the resulting distributions of hate-based rhetoric, target group, and rhetorical framing labels, and establish baseline classification performance for each using standard natural language processing methods. The GHC, which is the largest theoretically-justified, annotated corpus of hate speech to date, provides opportunities for training and evaluating hate speech classifiers and for scientific inquiries into the linguistic and network components of hate speech.
Kennedy, B., Atari, M., Mostafazadeh Davani, A., Hoover, J., Omrani, A., Graham, J., & Dehghani, M. (2021). Moral Concerns are Differentially Observable in Language. Cognition, 212, 104696. https://doi.org/10.31234/osf.io/uqmty
@article{kennedy2021moral,
title = {Moral Concerns are Differentially Observable in Language},
url = {psyarxiv.com/uqmty},
doi = {10.31234/osf.io/uqmty},
journal = {Cognition},
author = {Kennedy, Brendan and Atari, Mohammad and Mostafazadeh Davani, Aida and Hoover, Joseph and Omrani, Ali and Graham, Jesse and Dehghani, Morteza},
year = {2021},
volume = {212},
pages = {104696}
}
Language is a psychologically rich medium for human expression and communication. While language usage has been shown to be a window into various aspects of people’s social worlds, including their personality traits and everyday environment, its correspondence to people’s moral concerns has yet to be considered. Here, we examine the relationship between language usage and the moral concerns of Care, Fairness, Loyalty, Authority, and Purity as conceptualized by Moral Foundations Theory. We collected Facebook status updates (N = 107,798) from English-speaking participants (n = 2,691) along with their responses on the Moral Foundations Questionnaire. Overall, results suggested that self-reported moral concerns may be traced in language usage, though the magnitude of this effect varied considerably among moral concerns. Across a diverse selection of Natural Language Processing methods, Fairness concerns were consistently least correlated with language usage whereas Purity concerns were found to be the most traceable. In exploratory follow-up analyses, each moral concern was found to be differentially related to distinct patterns of relational, emotional, and social language. Our results are the first to relate individual differences in moral concerns to language usage, and to uncover the signatures of moral concerns in language.
Garten, J., Kennedy, B., Hoover, J., Sagae, K., & Dehghani, M. (2019). Incorporating Demographic Embeddings Into Language Understanding. Cognitive Science, 43(1). https://doi.org/10.1111/cogs.12701
@article{garten2019demographic,
title = {Incorporating Demographic Embeddings Into Language Understanding},
journal = {Cognitive Science},
year = {2019},
volume = {43},
number = {1},
author = {Garten, J. and Kennedy, B. and Hoover, J. and Sagae, K. and Dehghani, M.},
doi = {10.1111/cogs.12701}
}
Meaning depends on context. This applies in obvious cases like deictics or sarcasm as well as more subtle situations like framing or persuasion. One key aspect of this is the identity of the participants in an interaction. Our interpretation of an utterance shifts based on a variety of factors, including personal history, background knowledge, and our relationship to the source. While obviously an incomplete model of individual differences, demographic factors provide a useful starting point and allow us to capture some of this variance. However, the relevance of specific demographic factors varies between situations—where age might be the key factor in one context, ideology might dominate in another. To address this challenge, we introduce a method for combining demographics and context into situated demographic embeddings—mapping representations into a continuous geometric space appropriate for the given domain, showing the resulting representations to be functional and interpretable. We further demonstrate how to make use of related external data so as to apply this approach in low‐resource situations. Finally, we show how these representations can be incorporated into improve modeling of real‐world natural language understanding tasks, improving model performance and helping with issues of data sparsity.
Hoover, J., Portillo-Wightman, G., Yeh, L., Havaldar, S., Davani, A. M., Lin, Y., Kennedy, B., Atari, M., Kamel, Z., Mendlen, M., Moreno, G., Park, C., Chang, T. E., Chin, J., Leong, C., Leung, J. Y., Mirinjian, A., & Dehghani, M. (2020). Moral Foundations Twitter Corpus: A Collection of 35k Tweets Annotated for Moral Sentiment. Social Psychological and Personality Science, 11(8), 1057–1071.
@article{hoover2020mftc,
title = {Moral Foundations Twitter Corpus: A Collection of 35k Tweets Annotated for Moral Sentiment},
journal = {Social Psychological and Personality Science},
year = {2020},
author = {Hoover, J. and Portillo-Wightman, G. and Yeh, L. and Havaldar, S. and Davani, A.M. and Lin, Y. and Kennedy, B. and Atari, M. and Kamel, Z. and Mendlen, M. and Moreno, G. and Park, C. and Chang, T.E. and Chin, J. and Leong, C. and Leung, J.Y. and Mirinjian, A. and Dehghani, M.},
volume = {11},
number = {8},
pages = {1057--1071}
}
Research has shown that accounting for moral sentiment in natural language can yield insight into a variety of on- and off-line phenomena such as message diffusion, protest dynamics, and social distancing. However, measuring moral sentiment in natural language is challenging, and the difficulty of this task is exacerbated by the limited availability of annotated data. To address this issue, we introduce the Moral Foundations Twitter Corpus, a collection of 35,108 tweets that have been curated from seven distinct domains of discourse and hand annotated by at least three trained annotators for 10 categories of moral sentiment. To facilitate investigations of annotator response dynamics, we also provide psychological and demographic metadata for each annotator. Finally, we report moral sentiment classification baselines for this corpus using a range of popular methodologies.
Mostafazadeh Davani, A., Atari, M., Kennedy, B., Havaldar, S., & Dehghani, M. (2020). Hatred is in the Eye of the Annotator: Hate Speech Classifiers Learn Human-Like Social Stereotypes. 31st Annual Conference of the Cognitive Science Society (CogSci).
@inproceedings{mostafazadeh2020hatred,
author = {Mostafazadeh Davani, Aida and Atari, Mohammad and Kennedy, Brendan and Havaldar, Shreya and Dehghani, Morteza},
title = {Hatred is in the Eye of the Annotator: Hate Speech Classifiers Learn Human-Like Social Stereotypes},
booktitle = {31st Annual Conference of the Cognitive Science Society (CogSci)},
year = {2020}
}
Social stereotypes impact individuals’ judgement about different social groups. One area where such stereotyping has a critical impact is in hate speech detection, in which human annotations of text are used to train machine learning models. Such models are likely to be biased in the same ways that humans are biased in their judgments of social groups. In this research, we investigate the effect of stereotypes of social groups on the performance of expert annotators in a large corpus of annotated hate speech. We also examine the effect of these stereotypes on unintended bias of hate speech classifiers. To this end, we show how language-encoded stereotypes, associated with social groups, lead to disagreements in identifying hate speech. Lastly, we analyze how inconsistencies in annotations propagate to a supervised classifier when human-generated labels are used to train a hate speech detection model.
Atari, M., Davani, A. M., Kogon, D., Kennedy, B., Ani Saxena, N., Anderson, I., & Dehghani, M. (2022). Morally homogeneous networks and radicalism. Social Psychological and Personality Science, 13(6), 999–1009.
@article{atari2022morally,
title = {Morally homogeneous networks and radicalism},
author = {Atari, Mohammad and Davani, Aida Mostafazadeh and Kogon, Drew and Kennedy, Brendan and Ani Saxena, Nripsuta and Anderson, Ian and Dehghani, Morteza},
journal = {Social Psychological and Personality Science},
volume = {13},
number = {6},
pages = {999--1009},
year = {2022},
publisher = {SAGE Publications Sage CA: Los Angeles, CA}
}
Atari, M., Reimer, N. K., Graham, J., Hoover, J., Kennedy, B., Davani, A. M., Karimi-Malekabadi, F., Birjandi, S., & Dehghani, M. (2022). Pathogens are linked to human moral systems across time and space. Current Research in Ecological and Social Psychology, 3, 100060. https://doi.org/https://doi.org/10.1016/j.cresp.2022.100060
@article{ATARI2022100060,
title = {Pathogens are linked to human moral systems across time and space},
journal = {Current Research in Ecological and Social Psychology},
volume = {3},
pages = {100060},
year = {2022},
issn = {2666-6227},
doi = {https://doi.org/10.1016/j.cresp.2022.100060},
url = {https://www.sciencedirect.com/science/article/pii/S2666622722000272},
author = {Atari, Mohammad and Reimer, Nils K. and Graham, Jesse and Hoover, Joe and Kennedy, Brendan and Davani, Aida Mostafazadeh and Karimi-Malekabadi, Farzan and Birjandi, Shirin and Dehghani, Morteza},
keywords = {Morality, Culture, Pathogen avoidance, Infectious diseases, Moral foundations theory, Behavioral immune system}
}
Infectious diseases have been an impending threat to the survival of individuals and groups throughout our evolutionary history. As a result, humans have developed psychological pathogen-avoidance mechanisms and groups have developed societal norms that respond to the presence of disease-causing microorganisms in the environment. In this work, we demonstrate that morality plays a central role in the cultural and psychological architectures that help humans avoid pathogens. We present a collection of studies which together provide an integrated understanding of the socio-ecological and psychological impacts of pathogens on human morality. Specifically, in Studies 1 (2,834 U.S. counties) and 2 (67 nations), we show that regional variation in pathogen prevalence is consistently related to aggregate moral Purity. In Study 3, we use computational linguistic methods to show that pathogen-related words co-occur with Purity words across multiple languages. In Studies 4 (n = 513) and 5 (n = 334), we used surveys and social psychological experimentation to show that pathogen-avoidance attitudes are correlated with Purity. Finally, in Study 6, we found that historical prevalence of pathogens is linked to Care, Loyalty, and Purity. We argue that particular adaptive moral systems are developed and maintained in response to the threat of pathogen occurrence in the environment. We draw on multiple methods to establish connections between pathogens and moral codes in multiple languages, experimentally induced situations, individual differences, U.S. counties, 67 countries, and historical periods over the last century.
Explainability \& Bias in NLP
Mostafazadeh Davani, A., Atari, M., Kennedy, B., Havaldar, S., & Dehghani, M. (2020). Hatred is in the Eye of the Annotator: Hate Speech Classifiers Learn Human-Like Social Stereotypes. Proceedings of the 42nd Annual Conference of the Cognitive Science Society.
@inproceedings{mostafazadeh2020hatred,
author = {Mostafazadeh Davani, Aida and Atari, Mohammad and Kennedy, Brendan and Havaldar, Shreya and Dehghani, Morteza},
title = {{Hatred is in the Eye of the Annotator}: Hate Speech Classifiers Learn Human-Like Social Stereotypes},
booktitle = {{Proceedings of the 42nd Annual Conference of the Cognitive Science Society}},
year = {2020}
}
Social stereotypes impact individuals’ judgement about different social groups. One area where such stereotyping has a critical impact is in hate speech detection, in which human annotations of text are used to train machine learning models. Such models are likely to be biased in the same ways that humans are biased in their judgments of social groups. In this research, we investigate the effect of stereotypes of social groups on the performance of expert annotators in a large corpus of annotated hate speech. We also examine the effect of these stereotypes on unintended bias of hate speech classifiers. To this end, we show how language-encoded stereotypes, associated with social groups, lead to disagreements in identifying hate speech. Lastly, we analyze how inconsistencies in annotations propagate to a supervised classifier when human-generated labels are used to train a hate speech detection model.
Omrani, A., Kennedy, B., Atari, M., & Dehghani, M. (2022). Social-Group-Agnostic Word Embedding Debiasing via the Stereotype Content Model. In arXiv preprint arXiv:2210.05831.
@misc{omrani2022social,
title = {Social-Group-Agnostic Word Embedding Debiasing via the Stereotype Content Model},
author = {Omrani, Ali and Kennedy, Brendan and Atari, Mohammad and Dehghani, Morteza},
journal = {arXiv preprint arXiv:2210.05831},
year = {2022},
note = {Under Review at EACL 2023}
}
Mostafazadeh Davani, A., Omrani, A., Kennedy, B., Atari, M., Ren, X., & Dehghani, M. (2021). Fair Hate Speech Detection through Evaluation of Social Group Counterfactuals. The Fifth Workshop on Online Abuse and Harms.
@inproceedings{davani2020fair,
title = {{Fair Hate Speech Detection through Evaluation of Social Group Counterfactuals}},
author = {Mostafazadeh Davani, Aida and Omrani, Ali and Kennedy, Brendan and Atari, Mohammad and Ren, Xiang and Dehghani, Morteza},
booktitle = {The Fifth Workshop on Online Abuse and Harms},
year = {2021},
author+an = {3=highlight}
}
Jin, X., Barbieri, F., Kennedy, B., Davani, A. M., Neves, L., & Ren, X. (2021). On Transferability of Bias Mitigation Effects in Language Model Fine-Tuning. Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, 3770–3783.
@inproceedings{jin2021transferability,
title = {On Transferability of Bias Mitigation Effects in Language Model Fine-Tuning},
author = {Jin, Xisen and Barbieri, Francesco and Kennedy, Brendan and Davani, Aida Mostafazadeh and Neves, Leonardo and Ren, Xiang},
booktitle = {Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
pages = {3770--3783},
year = {2021}
}
Fine-tuned language models have been shown to exhibit biases against protected groups in a host of modeling tasks such as text classification and coreference resolution.
Previous works focus on detecting these biases, reducing bias in data representations, and using auxiliary training objectives to mitigate bias during fine-tuning. Although these techniques achieve bias reduction for the task and domain at hand, the effects of bias mitigation may not directly transfer to new tasks, requiring additional data collection and customized annotation of sensitive attributes, and re-evaluation of appropriate fairness metrics. We explore the feasibility and benefits of upstream bias mitigation (UBM) for reducing bias on downstream tasks, by first applying bias mitigation to an upstream model through fine-tuning and subsequently using it for downstream fine-tuning. We find, in extensive experiments across hate speech detection, toxicity detection, occupation prediction, and coreference resolution tasks over various bias factors, that the effects of UBM are indeed transferable to new downstream tasks or domains via fine-tuning, creating less biased downstream models than directly fine-tuning on the downstream task or transferring from a vanilla upstream model. Though challenges remain, we show that UBM promises more efficient and accessible bias mitigation in LM fine-tuning.
Kennedy*, B., Jin*, X., Davani, A. M., Dehghani, M., & Ren, X. (2020). Contextualizing Hate Speech Classifiers with Post-hoc Explanation. Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, 5435–5442.
@inproceedings{kennedy2020contextualizing,
title = {Contextualizing Hate Speech Classifiers with Post-hoc Explanation},
author = {Kennedy*, Brendan and Jin*, Xisen and Davani, Aida Mostafazadeh and Dehghani, Morteza and Ren, Xiang},
booktitle = {Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics},
pages = {5435--5442},
year = {2020}
}
Hate speech classifiers trained on imbalanced datasets struggle to determine if group identifiers like "gay" or "black" are used in offensive or prejudiced ways. Such biases manifest in false positives when these identifiers are present, due to models’ inability to learn the contexts which constitute a hateful usage of identifiers. We extract post-hoc explanations from fine-tuned BERT classifiers to detect bias towards identity terms. Then, we propose a novel regularization technique based on these explanations that encourages models to learn from the context of group identifiers in addition to the identifiers themselves. Our approach improved over baselines in limiting false positives on out-of-domain data while maintaining or improving in-domain performance.
Garten*, J., Kennedy*, B., Sagae, K., & Dehghani, M. (2019). Measuring the Importance of Context when Modeling Language Comprehension. Behavior Research Methods, 51(2), 480–492.
@article{garten2019measuring,
title = {Measuring the Importance of Context when Modeling Language Comprehension},
journal = {Behavior Research Methods},
year = {2019},
volume = {51},
number = {2},
pages = {480-492},
author = {Garten*, J. and Kennedy*, B. and Sagae, K. and Dehghani, M.}
}
It is widely accepted that language requires context in order to function as communication between speakers and listeners. As listeners, we make use of background knowledge — about the speaker, about entities and concepts, about previous utterances — in order to infer the speaker’s intended meaning. But even if there is consensus that these sources of information are a necessary component of linguistic communication, it is another matter entirely to provide a thorough, quantitative accounting for context’s interaction with language. When does context matter? What kinds of context matter in which kinds of domains? The empirical investigation of these questions is inhibited by a number of factors: the challenge of quantifying language, the boundless combinations of domains and types of context to be measured, and the challenge of selecting and applying a given construct to natural language data. In response to these factors, we introduce and demonstrate a methodological framework for testing the importance of contextual information in inferring speaker intentions from text. We apply Long Short-term Memory (LSTM) networks, a standard for representing language in its natural, sequential state, and conduct a set of experiments for predicting the persuasive intentions of speakers in political debates using different combinations of text and background information about the speaker. We show, in our modeling and discussion, that the proposed framework is suitable for empirically evaluating the manner and magnitude of context’s relevance for any number of domains and constructs.
Kennedy, B., Reimer, N. K., & Dehghani, M. (2021). Explaining Explainability: Interpretable machine learning for the behavioral sciences. PsyArXiv. https://doi.org/10.31234/osf.io/9h6qr
@misc{kennedy_reimer_dehghani_2021,
title = {Explaining Explainability: Interpretable machine learning for the behavioral sciences},
url = {psyarxiv.com/9h6qr},
doi = {10.31234/osf.io/9h6qr},
publisher = {PsyArXiv},
author = {Kennedy, Brendan and Reimer, Nils K and Dehghani, Morteza},
year = {2021},
month = aug
}
Kennedy, B., Omrani, A., & Dehghani, M. (2022). Exemplar-based Explanations of Speaker-Language Relationships.
@misc{kennedy_omrani_dehghani_2022,
title = {Exemplar-based Explanations of Speaker-Language Relationships},
author = {Kennedy, Brendan and Omrani, Ali and Dehghani, Morteza},
year = {2022},
month = apr
}
Text Annotation and Data Curation
Kennedy, B., Atari, M., Mostafazadeh Davani, A., Yeh, L., Omrani, A., Kim, Y., Coombs Jr., K., Havaldar, S., Portillo-Wightman, G., Gonzalez, E., Hoover, J., Azatian, A., Cardenas, G., Hussain, A., Lara, A., Omary, A., Park, C., Wang, X., Wijaya, C., … Dehghani, M. (2022). Introducing the Gab Hate Corpus: Defining and applying hate-based rhetoric to social media posts at scale. Language Resources and Evaluation, 1(16), 79–108. https://doi.org/10.1007/s10579-021-09569-x
@article{kennedy2022introducing,
title = {Introducing the {Gab Hate Corpus}: Defining and applying hate-based rhetoric to social media posts at scale},
volume = {1},
issue = {16},
url = {https://doi.org/10.1007/s10579-021-09569-x},
pages = {79--108},
publisher = {Springer Netherlands},
journal = {Language Resources and Evaluation},
author = {Kennedy, Brendan and Atari, Mohammad and Mostafazadeh Davani, Aida and Yeh, Leigh and Omrani, Ali and Kim, Yehsong and Coombs Jr., Kris and Havaldar, Shreya and Portillo-Wightman, Gwenyth and Gonzalez, Elaine and Hoover, Joe and Azatian, Aida and Cardenas, Gabriel and Hussain, Alyzeh and Lara, Austin and Omary, Adam and Park, Christina and Wang, Xin and Wijaya, Clarisa and Zhang, Yong and Meyerowitz, Beth and Dehghani, Morteza},
year = {2022},
month = feb
}
The growing prominence of online hate speech is a threat to a safe and just society. This endangering phenomenon requires collaboration across the sciences in order to generate evidence-based knowledge of, and policies for, the dissemination of hatred in online spaces. To foster such collaborations, here we present the Gab Hate Corpus (GHC), consisting of 27,665 posts from the social network service gab.ai, each annotated by a minimum of three trained annotators. Annotators were trained to label posts according to a coding typology derived from a synthesis of hate speech definitions across legal, computational, psychological, and sociological research. We detail the development of the corpus, describe the resulting distributions of hate-based rhetoric, target group, and rhetorical framing labels, and establish baseline classification performance for each using standard natural language processing methods. The GHC, which is the largest theoretically-justified, annotated corpus of hate speech to date, provides opportunities for training and evaluating hate speech classifiers and for scientific inquiries into the linguistic and network components of hate speech.
Hoover, J., Portillo-Wightman, G., Yeh, L., Havaldar, S., Davani, A. M., Lin, Y., Kennedy, B., Atari, M., Kamel, Z., Mendlen, M., Moreno, G., Park, C., Chang, T. E., Chin, J., Leong, C., Leung, J. Y., Mirinjian, A., & Dehghani, M. (2020). Moral Foundations Twitter Corpus: A Collection of 35k Tweets Annotated for Moral Sentiment. Social Psychological and Personality Science, 11(8), 1057–1071.
@article{hoover2020mftc,
title = {Moral Foundations Twitter Corpus: A Collection of 35k Tweets Annotated for Moral Sentiment},
journal = {Social Psychological and Personality Science},
year = {2020},
author = {Hoover, J. and Portillo-Wightman, G. and Yeh, L. and Havaldar, S. and Davani, A.M. and Lin, Y. and Kennedy, B. and Atari, M. and Kamel, Z. and Mendlen, M. and Moreno, G. and Park, C. and Chang, T.E. and Chin, J. and Leong, C. and Leung, J.Y. and Mirinjian, A. and Dehghani, M.},
volume = {11},
issue = {8},
pages = {1057--1071}
}
Research has shown that accounting for moral sentiment in natural language can yield insight into a variety of on- and off-line phenomena such as message diffusion, protest dynamics, and social distancing. However, measuring moral sentiment in natural language is challenging, and the difficulty of this task is exacerbated by the limited availability of annotated data. To address this issue, we introduce the Moral Foundations Twitter Corpus, a collection of 35,108 tweets that have been curated from seven distinct domains of discourse and hand annotated by at least three trained annotators for 10 categories of moral sentiment. To facilitate investigations of annotator response dynamics, we also provide psychological and demographic metadata for each annotator. Finally, we report moral sentiment classification baselines for this corpus using a range of popular methodologies.
Miscellaneous Published Works
Courtland, M., Davani, A., Reyes, M., Yeh, L., Leung, J., Kennedy, B., Dehghani, M., & Zevin, J. (2019). Modeling performance differences on cognitive tests using LSTMs and skip-thought vectors trained on reported media consumption. Proceedings of the Third Workshop on Natural Language Processing and Computational Social Science, 47–53.
@inproceedings{courtland2019modeling,
title = {Modeling performance differences on cognitive tests using LSTMs and skip-thought vectors trained on reported media consumption.},
author = {Courtland, Maury and Davani, Aida and Reyes, Melissa and Yeh, Leigh and Leung, Jun and Kennedy, Brendan and Dehghani, Morteza and Zevin, Jason},
booktitle = {Proceedings of the Third Workshop on Natural Language Processing and Computational Social Science},
pages = {47--53},
year = {2019}
}
Courtland, M., Davani, A., Reyes, M., Yeh, L., Leung, J., Kennedy, B., Dehghani, M., & Zevin, J. (2019). Subtle differences in language experience moderate performance on language-based cognitive tests. Proceedings of the 41st Annual Conference of the Cognitive Science Society, Austin, Texas. Cognitive Science Society.
@inproceedings{courtland2019subtle,
title = {Subtle differences in language experience moderate performance on language-based cognitive tests},
author = {Courtland, Maury and Davani, Aida and Reyes, Melissa and Yeh, Leigh and Leung, Jun and Kennedy, Brendan and Dehghani, Morteza and Zevin, Jason},
booktitle = {Proceedings of the 41st Annual Conference of the Cognitive Science Society, Austin, Texas. Cognitive Science Society},
year = {2019}
}
Hossain, K. S. M. T., Harutyunyan, H., Ning, Y., Kennedy, B., Ramakrishnan, N., & Galstyan, A. (2022). Identifying geopolitical event precursors using attention-based LSTMs. Frontiers in Artificial Intelligence, 5. https://doi.org/10.3389/frai.2022.893875
@article{10.3389/frai.2022.893875,
author = {Hossain, K. S. M. Tozammel and Harutyunyan, Hrayr and Ning, Yue and Kennedy, Brendan and Ramakrishnan, Naren and Galstyan, Aram},
title = {Identifying geopolitical event precursors using attention-based LSTMs},
journal = {Frontiers in Artificial Intelligence},
volume = {5},
year = {2022},
url = {https://www.frontiersin.org/articles/10.3389/frai.2022.893875},
doi = {10.3389/frai.2022.893875},
issn = {2624-8212}
}
Forecasting societal events such as civil unrest, mass protests, and violent conflicts is a challenging problem with several important real-world applications in planning and policy making. While traditional forecasting approaches have typically relied on historical time series for generating such forecasts, recent research has focused on using open source surrogate data for more accurate and timely forecasts. Furthermore, leveraging such data can also help to identify precursors of those events that can be used to gain insights into the generated forecasts. The key challenge is to develop a unified framework for forecasting and precursor identification that can deal with missing historical data. Other challenges include sufficient flexibility in handling different types of events and providing interpretable representations of identified precursors. Although existing methods exhibit promising performance for predictive modeling in event detection, these models do not adequately address the above challenges. Here, we propose a unified framework based on an attention-based long short-term memory (LSTM) model to simultaneously forecast events with sequential text datasets as well as identify precursors at different granularity such as documents and document excerpts. The key idea is to leverage word context in sequential and time-stamped documents such as news articles and blogs for learning a rich set of precursors. We validate the proposed framework by conducting extensive experiments with two real-world datasets—military action and violent conflicts in the Middle East and mass protests in Latin America. Our results show that overall, the proposed approach generates more accurate forecasts compared to the existing state-of-the-art methods, while at the same time producing a rich set of precursors for the forecasted events.
Kennedy, B., Ashokkumar, A., Boyd, R., & Dehghani, M. (2022). Text Analysis for Psychology: Methods, Principles, and Practices. In M. Dehghani & R. Boyd (Eds.), The Handbook of Language Analysis in Psychology. Guilford Press. https://doi.org/10.31234/osf.io/h2b8t
@incollection{kennedy2020text,
title = {Text Analysis for Psychology: Methods, Principles, and Practices},
author = {Kennedy, Brendan and Ashokkumar, Ashwini and Boyd, Ryan and Dehghani, Morteza},
booktitle = {{The Handbook of Language Analysis in Psychology}},
publisher = {Guilford Press},
year = {2022},
url = {https://doi.org/10.31234/osf.io/h2b8t},
editor = {Dehghani, Morteza and Boyd, Ryan}
}
Reimer, N. K., Atari, M., Karimi-Malekabadi, F., Trager, J., Kennedy, B., Graham, J., & Dehghani, M. (2022). Moral values predict county-level COVID-19 vaccination rates in the United States. American Psychologist, 77(6), 743.
@article{reimer2022moral,
title = {Moral values predict county-level COVID-19 vaccination rates in the United States.},
author = {Reimer, Nils Karl and Atari, Mohammad and Karimi-Malekabadi, Farzan and Trager, Jackson and Kennedy, Brendan and Graham, Jesse and Dehghani, Morteza},
journal = {American Psychologist},
volume = {77},
number = {6},
pages = {743},
year = {2022},
author+an = {5=highlight},
publisher = {American Psychological Association}
}
Hoover, J., Atari, M., Davani, A. M., Kennedy, B., Portillo-Wightman, G., Yeh, L., & Dehghani, M. (2021). Investigating the role of group-based morality in extreme behavioral expressions of prejudice. Nature Communications, 12(1), 1–13.
@article{hoover2021investigating,
title = {Investigating the role of group-based morality in extreme behavioral expressions of prejudice},
author = {Hoover, Joe and Atari, Mohammad and Davani, Aida Mostafazadeh and Kennedy, Brendan and Portillo-Wightman, Gwenyth and Yeh, Leigh and Dehghani, Morteza},
journal = {Nature Communications},
volume = {12},
number = {1},
pages = {1--13},
year = {2021},
publisher = {Nature Publishing Group}
}