han7ter commited on
Commit
25d2d36
·
1 Parent(s): bc36b2e
Files changed (2) hide show
  1. app.py +6 -1
  2. requirements.txt +2 -1
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import datasets
2
  import evaluate
 
3
  import pandas as pd
4
  import numpy as np
5
  from datasets import Dataset
@@ -7,6 +8,8 @@ from sklearn.model_selection import train_test_split
7
  from transformers import (AutoTokenizer, AutoModelForSequenceClassification,
8
  TrainingArguments, Trainer)
9
 
 
 
10
  model_name = "cointegrated/rubert-tiny2"
11
 
12
  # Login using e.g. `huggingface-cli login` to access this dataset
@@ -19,7 +22,7 @@ train = Dataset.from_pandas(train)
19
  test = Dataset.from_pandas(test)
20
 
21
  # Выполняем предобработку текста
22
- tokenizer = AutoTokenizer.from_pretrained(model_name, max_len=512)
23
 
24
  def tokenize_function(examples):
25
  return tokenizer(examples['text'], padding='max_length', truncation=True)
@@ -32,6 +35,8 @@ model = AutoModelForSequenceClassification.from_pretrained(
32
  model_name,
33
  num_labels=4)
34
 
 
 
35
  # Задаем параметры обучения
36
  training_args = TrainingArguments(
37
  output_dir='test_trainer_log',
 
1
  import datasets
2
  import evaluate
3
+ import os
4
  import pandas as pd
5
  import numpy as np
6
  from datasets import Dataset
 
8
  from transformers import (AutoTokenizer, AutoModelForSequenceClassification,
9
  TrainingArguments, Trainer)
10
 
11
+ os.environ["CUDA_VISIBLE_DEVICES"] = ""
12
+
13
  model_name = "cointegrated/rubert-tiny2"
14
 
15
  # Login using e.g. `huggingface-cli login` to access this dataset
 
22
  test = Dataset.from_pandas(test)
23
 
24
  # Выполняем предобработку текста
25
+ tokenizer = AutoTokenizer.from_pretrained(model_name, max_len=400)
26
 
27
  def tokenize_function(examples):
28
  return tokenizer(examples['text'], padding='max_length', truncation=True)
 
35
  model_name,
36
  num_labels=4)
37
 
38
+ model.to("cpu")
39
+
40
  # Задаем параметры обучения
41
  training_args = TrainingArguments(
42
  output_dir='test_trainer_log',
requirements.txt CHANGED
@@ -6,4 +6,5 @@ datasets
6
  evaluate
7
  pandas
8
  numpy
9
- scikit-learn
 
 
6
  evaluate
7
  pandas
8
  numpy
9
+ scikit-learn
10
+ os