matous-volf
/

political-leaning-deberta-large

+---
+language:
+  - "en"
+license: "cc-by-nc-4.0"
+library_name: "transformers"
+pipeline_tag: text-classification
+tags:
+  - "text"
+  - "politics"
+  - "political"
+  - "leaning"
+  - "bias"
+  - "politicalness"
+base_model: "microsoft/deberta-v3-large"
+datasets:
+  - "mlburnham/dem_rep_party_platform_topics"
+  - "cajcodes/political-bias"
+  - "JyotiNayak/political_ideologies"
+  - "Jacobvs/PoliticalTweets"
+widget:
+  - example_title: "Taxes 1"
+    text: "The government should raise taxes on the rich so it can give more money to the homeless."
+    output:
+      - label: left
+        score: 1.00
+      - label: center
+        score: 0.00
+      - label: right
+        score: 0.00
+  - example_title: "Taxes 2"
+    text: "The government should cut taxes because it is not using them efficiently anyway."
+    output:
+      - label: left
+        score: 0.00
+      - label: center
+        score: 0.00
+      - label: right
+        score: 1.00
+  - example_title: "Abortion 1"
+    text: "Opting for abortion is an inalienable right of every individual."
+    output:
+      - label: left
+        score: 1.00
+      - label: center
+        score: 0.00
+      - label: right
+        score: 0.00
+  - example_title: "Abortion 2"
+    text: "Terminating a pregnancy is equivalent to committing homicide."
+    output:
+      - label: left
+        score: 0.42
+      - label: center
+        score: 0.00
+      - label: right
+        score: 0.58
+  - example_title: "Immigration 1"
+    text: "Mass detention of undocumented persons is an unjust practice that disproportionately harms vulnerable populations and must end."
+    output:
+      - label: left
+        score: 1.00
+      - label: center
+        score: 0.00
+      - label: right
+        score: 0.00
+  - example_title: "Immigration 2"
+    text: "Immigration must be strictly controlled to protect national security, as it increases the risk of terrorism."
+    output:
+      - label: left
+        score: 0.00
+      - label: center
+        score: 0.00
+      - label: right
+        score: 1.00
+model-index:
+  - name: "political-leaning-deberta-large"
+    results:
+      - task:
+          type: "text-classification"
+          name: "text political leaning classification"
+        dataset:
+          type: "-"
+          name: "Article bias prediction"
+        metrics:
+          - type: "f1"
+            value: 89
+            name: "F1 score"
+            args:
+              average: "weighted"
+        source:
+          name: "the paper"
+          url: "https://github.com/matous-volf/political-leaning-prediction/blob/main/paper.pdf"
+      - task:
+          type: "text-classification"
+          name: "text political leaning classification"
+        dataset:
+          type: "-"
+          name: "BIGNEWSBLN"
+        metrics:
+          - type: "f1"
+            value: 88.6
+            name: "F1 score"
+            args:
+              average: "weighted"
+        source:
+          name: "the paper"
+          url: "https://github.com/matous-volf/political-leaning-prediction/blob/main/paper.pdf"
+      - task:
+          type: "text-classification"
+          name: "text political leaning classification"
+        dataset:
+          type: "-"
+          name: "CommonCrawl news articles"
+        metrics:
+          - type: "f1"
+            value: 88.9
+            name: "F1 score"
+            args:
+              average: "weighted"
+        source:
+          name: "the paper"
+          url: "https://github.com/matous-volf/political-leaning-prediction/blob/main/paper.pdf"
+      - task:
+          type: "text-classification"
+          name: "text political leaning classification"
+        dataset:
+          type: "-"
+          name: "Dem., rep. party platform topics"
+        metrics:
+          - type: "f1"
+            value: 85.6
+            name: "F1 score"
+            args:
+              average: "weighted"
+        source:
+          name: "the paper"
+          url: "https://github.com/matous-volf/political-leaning-prediction/blob/main/paper.pdf"
+      - task:
+          type: "text-classification"
+          name: "text political leaning classification"
+        dataset:
+          type: "cajcodes/political-bias"
+          name: "GPT-4 political bias"
+        metrics:
+          - type: "f1"
+            value: 86.9
+            name: "F1 score"
+            args:
+              average: "weighted"
+        source:
+          name: "the paper"
+          url: "https://github.com/matous-volf/political-leaning-prediction/blob/main/paper.pdf"
+      - task:
+          type: "text-classification"
+          name: "text political leaning classification"
+        dataset:
+          type: "JyotiNayak/political_ideologies"
+          name: "GPT-4 political ideologies"
+        metrics:
+          - type: "f1"
+            value: 99.6
+            name: "F1 score"
+            args:
+              average: "weighted"
+        source:
+          name: "the paper"
+          url: "https://github.com/matous-volf/political-leaning-prediction/blob/main/paper.pdf"
+      - task:
+          type: "text-classification"
+          name: "text political leaning classification"
+        dataset:
+          type: "-"
+          name: "Media political stance"
+        metrics:
+          - type: "f1"
+            value: 93.1
+            name: "F1 score"
+            args:
+              average: "weighted"
+        source:
+          name: "the paper"
+          url: "https://github.com/matous-volf/political-leaning-prediction/blob/main/paper.pdf"
+      - task:
+          type: "text-classification"
+          name: "text political leaning classification"
+        dataset:
+          type: "-"
+          name: "Political podcasts"
+        metrics:
+          - type: "f1"
+            value: 99.8
+            name: "F1 score"
+            args:
+              average: "weighted"
+        source:
+          name: "the paper"
+          url: "https://github.com/matous-volf/political-leaning-prediction/blob/main/paper.pdf"
+      - task:
+          type: "text-classification"
+          name: "text political leaning classification"
+        dataset:
+          type: "Jacobvs/PoliticalTweets"
+          name: "Political tweets"
+        metrics:
+          - type: "f1"
+            value: 82.1
+            name: "F1 score"
+            args:
+              average: "weighted"
+        source:
+          name: "the paper"
+          url: "https://github.com/matous-volf/political-leaning-prediction/blob/main/paper.pdf"
+      - task:
+          type: "text-classification"
+          name: "text political leaning classification"
+        dataset:
+          type: "-"
+          name: "Qbias"
+        metrics:
+          - type: "f1"
+            value: 57.9
+            name: "F1 score"
+            args:
+              average: "weighted"
+        source:
+          name: "the paper"
+          url: "https://github.com/matous-volf/political-leaning-prediction/blob/main/paper.pdf"
+---
+# Text political leaning classifier based on DeBERTa V3 large
+This model classifies text by its political leaning into three classes: left, center, right. It has been trained on news
+articles, social network posts and LLM-generated politological statements. The training data comes from the context of
+the United States, and so the left class is mostly defined by the liberal ideology and democratic party views, and the
+same applies for the right class being closely tied to the conservative and republican views.
+The model is a part of the research done in the paper
+[Predicting political leaning and politicalness of text using transformer models](https://github.com/matous-volf/political-leaning-prediction/blob/main/paper.pdf).
+It focuses on predicting political leaning as well as politicalness – a binary class indicating whether a text even is
+about politics or not. We have benchmarked the existing models for politicalness and shown that one of them –
+[Political DEBATE](https://huggingface.co/mlburnham/Political_DEBATE_large_v1.0) – achieves an \\(F_1\\) score of over
+90 %. This makes it suitable for filtering non-political texts in front of a political leaning classifier like this
+one. We recommend doing so if the input to this model is not guaranteed to be about politics.
+Our paper addresses the challenge of automatically classifying text according to political leaning and politicalness
+using transformer models. We compose a comprehensive overview of existing datasets and models for these tasks, finding
+that current approaches create siloed solutions that perform poorly on out-of-distribution texts. To address this
+limitation, we compile a diverse dataset by combining 12 datasets for political leaning classification and creating a
+new dataset for politicalness by extending 18 existing datasets with the appropriate label. Through extensive
+benchmarking with leave-one-in and leave-one-out methodologies, we evaluate the performance of existing models and train
+new ones with enhanced generalization capabilities.
+Alongside the paper, we release the complete
+[source code and results](https://github.com/matous-volf/political-leaning-prediction). This model is deployed in
+a [demo web app](https://political-leaning.matousvolf.cz).
+A [second, smaller model](https://huggingface.co/matous-volf/political-leaning-politics) has also been produced.
+## Usage
+The model outputs 0 for the left, 1 for the center and 2 for the right leaning. The score of the predicted class is
+between \\(\frac{1}{3}\\) and 1.
+To use the model, you can either utilize the high-level Hugging Face
+[pipeline](https://huggingface.co/docs/transformers/main_classes/pipelines):
+```py
+from transformers import pipeline
+pipe = pipeline(
+    "text-classification",
+    model="matous-volf/political-leaning-deberta-large",
+    tokenizer="microsoft/deberta-v3-large",
+)
+text = "The government should raise taxes on the rich so it can give more money to the homeless."
+output = pipe(text)
+print(output)
+```
+Or load it [directly](https://huggingface.co/docs/transformers/en/models):
+```py
+from torch import argmax
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+from torch.nn.functional import softmax
+tokenizer = AutoTokenizer.from_pretrained("microsoft/deberta-v3-large")
+model = AutoModelForSequenceClassification.from_pretrained("matous-volf/political-leaning-deberta-large")
+text = "The government should cut taxes because it is not using them efficiently anyway."
+tokens = tokenizer(text, return_tensors="pt")
+output = model(**tokens)
+logits = output.logits
+political_leaning = argmax(logits, dim=1).item()
+probabilities = softmax(logits, dim=1)
+score = probabilities[0, political_leaning].item()
+print(political_leaning, score)
+```
+## Evaluation
+The following table displays the performance of the model on test sets (15 %) of the datasets used for training.
+| dataset                          | accuracy | \\(F_1\\) score |
+|:---------------------------------|:---------|:----------------|
+| Article bias prediction          | 89       | 89              |
+| BIGNEWSBLN                       | 88.6     | 88.6            |
+| CommonCrawl news articles        | 88.9     | 88.9            |
+| Dem., rep. party platform topics | 85.5     | 85.6            |
+| GPT-4 political bias             | 87       | 86.9            |
+| GPT-4 political ideologies       | 99.6     | 99.6            |
+| Media political stance           | 91.6     | 93.1            |
+| Political podcasts               | 99.8     | 99.8            |
+| Political tweets                 | 82.1     | 82.1            |
+| Qbias                            | 58       | 57.9            |
+| **average**                      | **87**   | **87.2**        |
+The following is an example of a confusion matrix, after evaluating the model on a test set from the CommonCrawl news
+articles dataset.
+<img src="confusion_matrix.svg" alt="a confusion matrix example" height="350rem"/>
+The complete results of all our measurements are available in the source code repository.
+## Training
+This model is based on [DeBERTa V3 large](https://huggingface.co/microsoft/deberta-v3-large). All the datasets used for
+fine-tuning are listed in the paper, as well as a detailed description of the preprocessing, training and evaluation
+methodology. In summary, we have manually tweaked the hyperparameters with a setup designed for maximizing performance
+on unseen types of text (out-of-distribution) to increase the model's generalization abilities. In this setup, we have
+left one of the datasets at a time out of the training sample and used it as the validation set. Then, we have taken the
+resulting optimal hyperparameters and trained this model on all the available datasets.
+## Authors
+- Matous Volf ([[email protected]](mailto:[email protected])),
+  [DELTA – High school of computer science and economics](https://www.delta-skola.cz), Pardubice, Czechia
+- Jakub Simko ([[email protected]](mailto:[email protected])),
+  [Kempelen Institute of Intelligent Technologies](https://kinit.sk), Bratislava, Slovakia
+## Citation
+### BibTeX
+```
+@article{volf-simko-2025-political-leaning,
+  title        = {Predicting political leaning and politicalness of text using transformer models},
+  author       = {Volf, Matous and Simko, Jakub},
+  year         = 2025,
+  institution  = {DELTA – High school of computer science and economics, Pardubice, Czechia; Kempelen Institute of Intelligent Technologies, Bratislava, Slovakia}
+}
+```
+### APA
+Volf, M. and Simko, J. (2025). Predicting political leaning and politicalness of text using transformer models. DELTA –
+High school of computer science and economics, Pardubice, Czechia; Kempelen Institute of Intelligent Technologies,
+Bratislava, Slovakia.

confusion_matrix.svg ADDED Viewed