{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "44972eb1-639a-4c22-9984-cb097ebf788d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import os\n", "os.environ[\"CUDA_DEVICE_ORDER\"]=\"PCI_BUS_ID\"\n", "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"3\"\n", "import torch\n", "torch.cuda.set_device(0)\n", "torch.cuda.current_device()" ] }, { "cell_type": "code", "execution_count": 2, "id": "a1de668b-1a19-4e2b-bdb6-18288c9f7198", "metadata": {}, "outputs": [], "source": [ "import torch\n", "from transformers import T5Tokenizer, T5ForConditionalGeneration\n", "\n", "from accelerate import Accelerator\n", "\n", "class Summarizer:\n", " def __init__(self, device=\"cpu\"):\n", " model_name = \"sarahai/ruT5-base-summarizer\"\n", " self.device = device\n", " self.tokenizer = T5Tokenizer.from_pretrained(model_name, device_map=device)\n", " self.model = T5ForConditionalGeneration.from_pretrained(model_name, device_map=device)\n", "\n", " def summarize(self, text, max_length=100, min_length=50, num_beams=5):\n", " input_ids = self.tokenizer(text, return_tensors=\"pt\").input_ids.to(self.device)\n", " # input_ids = torch.nn.utils.rnn.pad_sequence(input_ids.squeeze().chunk(chunk_num, 0), batch_first=True, padding_value=-100)\n", " outputs = self.model.generate(input_ids, max_length=max_length,\n", " min_length=min_length,\n", " length_penalty=2.0,\n", " num_beams=num_beams, early_stopping=True)\n", "\n", " return self.tokenizer.decode(outputs[0], skip_special_tokens=True)\n", "\n", " def generate(self, indexes, max_length=100, min_length=50, num_beams=5):\n", " return self.model.generate(indexes.unsqueeze(0), max_length=max_length,\n", " min_length=min_length,\n", " length_penalty=2.0,\n", " num_beams=num_beams, early_stopping=True).squeeze()" ] }, { "cell_type": "code", "execution_count": 3, "id": "827f5fff-1c35-43fb-a560-0506b6a9b270", "metadata": {}, "outputs": [], "source": [ "# summarizer = Summarizer(\"cuda:3\")" ] }, { "cell_type": "code", "execution_count": 51, "id": "9be5b5b4-7ff1-4c5f-9457-caa9ee16f6cd", "metadata": {}, "outputs": [], "source": [ "from bs4 import BeautifulSoup\n", "from pydantic import BaseModel\n", "import requests\n", "from typing import Optional\n", "\n", "class Pager(BaseModel):\n", " title: str\n", " text: str\n", " original_tags: list[str]\n", "\n", "def is_valid_page(url):\n", " return True\n", "\n", "def get_pager(url)->Optional[Pager]:\n", " try:\n", " req = requests.get(url)\n", " soup = BeautifulSoup(req.text, 'lxml')\n", " query = soup.find(\"div\", class_=\"article-formatted-body\")\n", " title = soup.title.string\n", " tags = []\n", " for tag in soup.find_all(\"meta\"):\n", " if tag.get(\"name\", None) == \"keywords\":\n", " tags = [x for x in re.split(',| ', tag[\"content\"]) if len(x) > 0]\n", " return Pager(title=title, text=query.get_text(), original_tags=tags)\n", " except:\n", " return None" ] }, { "cell_type": "code", "execution_count": 52, "id": "32a286e5-4511-4951-bcb0-b53fe4808897", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['https://habr.com/ru/articles/897282/',\n", " 'https://habr.com/ru/articles/897472/',\n", " 'https://habr.com/ru/articles/891132/',\n", " 'https://habr.com/ru/articles/897224/',\n", " 'https://habr.com/ru/articles/897636/',\n", " 'https://habr.com/ru/articles/897496/',\n", " 'https://habr.com/ru/articles/897630/',\n", " 'https://habr.com/ru/articles/897518/',\n", " 'https://habr.com/ru/articles/897640/',\n", " 'https://habr.com/ru/articles/897574/',\n", " 'https://habr.com/ru/articles/897632/',\n", " 'https://habr.com/ru/articles/891488/',\n", " 'https://habr.com/ru/articles/896972/',\n", " 'https://habr.com/ru/articles/897624/',\n", " 'https://habr.com/ru/articles/897534/',\n", " 'https://habr.com/ru/articles/897620/',\n", " 'https://habr.com/ru/articles/897654/',\n", " 'https://habr.com/ru/articles/897648/',\n", " 'https://habr.com/ru/articles/897642/',\n", " 'https://habr.com/ru/articles/897634/',\n", " 'https://habr.com/ru/articles/897282/',\n", " 'https://habr.com/ru/articles/897472/',\n", " 'https://habr.com/ru/articles/891132/',\n", " 'https://habr.com/ru/articles/897224/',\n", " 'https://habr.com/ru/articles/897636/',\n", " 'https://habr.com/ru/articles/897496/',\n", " 'https://habr.com/ru/articles/897630/',\n", " 'https://habr.com/ru/articles/897518/',\n", " 'https://habr.com/ru/articles/897640/',\n", " 'https://habr.com/ru/articles/897574/',\n", " 'https://habr.com/ru/articles/897632/',\n", " 'https://habr.com/ru/articles/891488/',\n", " 'https://habr.com/ru/articles/896972/',\n", " 'https://habr.com/ru/articles/897624/',\n", " 'https://habr.com/ru/articles/897534/',\n", " 'https://habr.com/ru/articles/897620/',\n", " 'https://habr.com/ru/articles/897654/',\n", " 'https://habr.com/ru/articles/897648/',\n", " 'https://habr.com/ru/articles/897642/',\n", " 'https://habr.com/ru/articles/897634/',\n", " 'https://habr.com/ru/articles/897572/',\n", " 'https://habr.com/ru/articles/897300/',\n", " 'https://habr.com/ru/articles/897566/',\n", " 'https://habr.com/ru/articles/897570/',\n", " 'https://habr.com/ru/articles/897606/',\n", " 'https://habr.com/ru/articles/895412/',\n", " 'https://habr.com/ru/articles/897074/',\n", " 'https://habr.com/ru/articles/897568/',\n", " 'https://habr.com/ru/articles/897546/',\n", " 'https://habr.com/ru/articles/897062/',\n", " 'https://habr.com/ru/articles/878678/',\n", " 'https://habr.com/ru/articles/897594/',\n", " 'https://habr.com/ru/articles/897564/',\n", " 'https://habr.com/ru/articles/897256/',\n", " 'https://habr.com/ru/articles/897526/',\n", " 'https://habr.com/ru/articles/897538/',\n", " 'https://habr.com/ru/articles/897236/',\n", " 'https://habr.com/ru/articles/897608/',\n", " 'https://habr.com/ru/articles/897604/',\n", " 'https://habr.com/ru/articles/897474/',\n", " 'https://habr.com/ru/articles/897500/',\n", " 'https://habr.com/ru/articles/897430/',\n", " 'https://habr.com/ru/articles/897448/',\n", " 'https://habr.com/ru/articles/897484/',\n", " 'https://habr.com/ru/articles/889928/',\n", " 'https://habr.com/ru/articles/897476/',\n", " 'https://habr.com/ru/articles/897328/',\n", " 'https://habr.com/ru/articles/897338/',\n", " 'https://habr.com/ru/articles/896430/',\n", " 'https://habr.com/ru/articles/897420/',\n", " 'https://habr.com/ru/articles/897486/',\n", " 'https://habr.com/ru/articles/897422/',\n", " 'https://habr.com/ru/articles/897478/',\n", " 'https://habr.com/ru/articles/897454/',\n", " 'https://habr.com/ru/articles/897424/',\n", " 'https://habr.com/ru/articles/897446/',\n", " 'https://habr.com/ru/articles/893702/',\n", " 'https://habr.com/ru/articles/897306/',\n", " 'https://habr.com/ru/articles/897510/',\n", " 'https://habr.com/ru/articles/897468/',\n", " 'https://habr.com/ru/articles/897388/',\n", " 'https://habr.com/ru/articles/897100/',\n", " 'https://habr.com/ru/articles/897266/',\n", " 'https://habr.com/ru/articles/897400/',\n", " 'https://habr.com/ru/articles/897380/',\n", " 'https://habr.com/ru/articles/897248/',\n", " 'https://habr.com/ru/articles/896106/',\n", " 'https://habr.com/ru/articles/897374/',\n", " 'https://habr.com/ru/articles/897376/',\n", " 'https://habr.com/ru/articles/897410/',\n", " 'https://habr.com/ru/articles/897396/',\n", " 'https://habr.com/ru/articles/897360/',\n", " 'https://habr.com/ru/articles/897102/',\n", " 'https://habr.com/ru/articles/897042/',\n", " 'https://habr.com/ru/articles/896722/',\n", " 'https://habr.com/ru/articles/897402/',\n", " 'https://habr.com/ru/articles/894744/',\n", " 'https://habr.com/ru/articles/897326/',\n", " 'https://habr.com/ru/articles/897302/',\n", " 'https://habr.com/ru/articles/896980/',\n", " 'https://habr.com/ru/articles/897226/',\n", " 'https://habr.com/ru/articles/896958/',\n", " 'https://habr.com/ru/articles/897288/',\n", " 'https://habr.com/ru/articles/897334/',\n", " 'https://habr.com/ru/articles/896626/',\n", " 'https://habr.com/ru/articles/897292/',\n", " 'https://habr.com/ru/articles/896886/',\n", " 'https://habr.com/ru/articles/897246/',\n", " 'https://habr.com/ru/articles/897304/',\n", " 'https://habr.com/ru/articles/897298/',\n", " 'https://habr.com/ru/articles/896210/',\n", " 'https://habr.com/ru/articles/897310/',\n", " 'https://habr.com/ru/articles/897240/',\n", " 'https://habr.com/ru/articles/897324/',\n", " 'https://habr.com/ru/articles/894994/',\n", " 'https://habr.com/ru/articles/897296/',\n", " 'https://habr.com/ru/articles/897274/',\n", " 'https://habr.com/ru/articles/897204/',\n", " 'https://habr.com/ru/articles/897316/',\n", " 'https://habr.com/ru/articles/897314/',\n", " 'https://habr.com/ru/articles/897210/',\n", " 'https://habr.com/ru/articles/893992/',\n", " 'https://habr.com/ru/articles/897200/',\n", " 'https://habr.com/ru/articles/897202/',\n", " 'https://habr.com/ru/articles/897160/',\n", " 'https://habr.com/ru/articles/896410/',\n", " 'https://habr.com/ru/articles/897122/',\n", " 'https://habr.com/ru/articles/896362/',\n", " 'https://habr.com/ru/articles/897180/',\n", " 'https://habr.com/ru/articles/896870/',\n", " 'https://habr.com/ru/articles/897218/',\n", " 'https://habr.com/ru/articles/897212/',\n", " 'https://habr.com/ru/articles/896190/',\n", " 'https://habr.com/ru/articles/895998/',\n", " 'https://habr.com/ru/articles/897206/',\n", " 'https://habr.com/ru/articles/896894/',\n", " 'https://habr.com/ru/articles/897222/',\n", " 'https://habr.com/ru/articles/895672/',\n", " 'https://habr.com/ru/articles/897198/',\n", " 'https://habr.com/ru/articles/897068/',\n", " 'https://habr.com/ru/articles/897144/',\n", " 'https://habr.com/ru/articles/897142/',\n", " 'https://habr.com/ru/articles/896458/',\n", " 'https://habr.com/ru/articles/897174/',\n", " 'https://habr.com/ru/articles/897120/',\n", " 'https://habr.com/ru/articles/896872/',\n", " 'https://habr.com/ru/articles/897116/',\n", " 'https://habr.com/ru/articles/887794/',\n", " 'https://habr.com/ru/articles/897132/',\n", " 'https://habr.com/ru/articles/897170/',\n", " 'https://habr.com/ru/articles/897148/',\n", " 'https://habr.com/ru/articles/896182/',\n", " 'https://habr.com/ru/articles/897158/',\n", " 'https://habr.com/ru/articles/897164/',\n", " 'https://habr.com/ru/articles/896772/',\n", " 'https://habr.com/ru/articles/897118/',\n", " 'https://habr.com/ru/articles/897124/',\n", " 'https://habr.com/ru/articles/897146/',\n", " 'https://habr.com/ru/articles/897136/',\n", " 'https://habr.com/ru/articles/897126/',\n", " 'https://habr.com/ru/articles/897076/',\n", " 'https://habr.com/ru/articles/858040/',\n", " 'https://habr.com/ru/articles/897080/',\n", " 'https://habr.com/ru/articles/883770/',\n", " 'https://habr.com/ru/articles/897022/',\n", " 'https://habr.com/ru/articles/886814/',\n", " 'https://habr.com/ru/articles/896056/',\n", " 'https://habr.com/ru/articles/896498/',\n", " 'https://habr.com/ru/articles/897082/',\n", " 'https://habr.com/ru/articles/896078/',\n", " 'https://habr.com/ru/articles/896586/',\n", " 'https://habr.com/ru/articles/895032/',\n", " 'https://habr.com/ru/articles/897064/',\n", " 'https://habr.com/ru/articles/896714/',\n", " 'https://habr.com/ru/articles/896802/',\n", " 'https://habr.com/ru/articles/896800/',\n", " 'https://habr.com/ru/articles/895796/',\n", " 'https://habr.com/ru/articles/867696/',\n", " 'https://habr.com/ru/articles/896814/',\n", " 'https://habr.com/ru/articles/897072/',\n", " 'https://habr.com/ru/articles/896560/',\n", " 'https://habr.com/ru/articles/896986/',\n", " 'https://habr.com/ru/articles/895538/',\n", " 'https://habr.com/ru/articles/897056/',\n", " 'https://habr.com/ru/articles/897014/',\n", " 'https://habr.com/ru/articles/896966/',\n", " 'https://habr.com/ru/articles/896954/',\n", " 'https://habr.com/ru/articles/897020/',\n", " 'https://habr.com/ru/articles/896956/',\n", " 'https://habr.com/ru/articles/896846/',\n", " 'https://habr.com/ru/articles/896622/',\n", " 'https://habr.com/ru/articles/896998/',\n", " 'https://habr.com/ru/articles/897028/',\n", " 'https://habr.com/ru/articles/896906/',\n", " 'https://habr.com/ru/articles/896964/',\n", " 'https://habr.com/ru/articles/896978/',\n", " 'https://habr.com/ru/articles/897008/',\n", " 'https://habr.com/ru/articles/886022/',\n", " 'https://habr.com/ru/articles/897048/',\n", " 'https://habr.com/ru/articles/896968/',\n", " 'https://habr.com/ru/articles/896880/',\n", " 'https://habr.com/ru/articles/896896/',\n", " 'https://habr.com/ru/articles/896940/',\n", " 'https://habr.com/ru/articles/896916/',\n", " 'https://habr.com/ru/articles/896734/',\n", " 'https://habr.com/ru/articles/896392/',\n", " 'https://habr.com/ru/articles/894096/',\n", " 'https://habr.com/ru/articles/896888/',\n", " 'https://habr.com/ru/articles/893182/',\n", " 'https://habr.com/ru/articles/896792/',\n", " 'https://habr.com/ru/articles/896912/',\n", " 'https://habr.com/ru/articles/896902/',\n", " 'https://habr.com/ru/articles/895328/',\n", " 'https://habr.com/ru/articles/895104/',\n", " 'https://habr.com/ru/articles/896898/',\n", " 'https://habr.com/ru/articles/896818/',\n", " 'https://habr.com/ru/articles/896918/',\n", " 'https://habr.com/ru/articles/896704/',\n", " 'https://habr.com/ru/articles/896922/',\n", " 'https://habr.com/ru/articles/896748/',\n", " 'https://habr.com/ru/articles/896856/',\n", " 'https://habr.com/ru/articles/896860/',\n", " 'https://habr.com/ru/articles/896570/',\n", " 'https://habr.com/ru/articles/896864/',\n", " 'https://habr.com/ru/articles/896824/',\n", " 'https://habr.com/ru/articles/896836/',\n", " 'https://habr.com/ru/articles/896862/',\n", " 'https://habr.com/ru/articles/896692/',\n", " 'https://habr.com/ru/articles/896868/',\n", " 'https://habr.com/ru/articles/895360/',\n", " 'https://habr.com/ru/articles/896844/',\n", " 'https://habr.com/ru/articles/895766/',\n", " 'https://habr.com/ru/articles/892770/',\n", " 'https://habr.com/ru/articles/896060/',\n", " 'https://habr.com/ru/articles/896742/',\n", " 'https://habr.com/ru/articles/896826/',\n", " 'https://habr.com/ru/articles/896816/',\n", " 'https://habr.com/ru/articles/896866/',\n", " 'https://habr.com/ru/articles/895084/',\n", " 'https://habr.com/ru/articles/896820/',\n", " 'https://habr.com/ru/articles/896776/',\n", " 'https://habr.com/ru/articles/896652/',\n", " 'https://habr.com/ru/articles/896784/',\n", " 'https://habr.com/ru/articles/889420/',\n", " 'https://habr.com/ru/articles/896768/',\n", " 'https://habr.com/ru/articles/896756/',\n", " 'https://habr.com/ru/articles/896738/',\n", " 'https://habr.com/ru/articles/896790/',\n", " 'https://habr.com/ru/articles/896134/',\n", " 'https://habr.com/ru/articles/874486/',\n", " 'https://habr.com/ru/articles/896750/',\n", " 'https://habr.com/ru/articles/896010/',\n", " 'https://habr.com/ru/articles/896554/',\n", " 'https://habr.com/ru/articles/896758/',\n", " 'https://habr.com/ru/articles/895286/',\n", " 'https://habr.com/ru/articles/896794/',\n", " 'https://habr.com/ru/articles/896636/',\n", " 'https://habr.com/ru/articles/896762/',\n", " 'https://habr.com/ru/articles/896766/',\n", " 'https://habr.com/ru/articles/896082/',\n", " 'https://habr.com/ru/articles/896368/',\n", " 'https://habr.com/ru/articles/896730/',\n", " 'https://habr.com/ru/articles/896092/',\n", " 'https://habr.com/ru/articles/896684/',\n", " 'https://habr.com/ru/articles/894746/',\n", " 'https://habr.com/ru/articles/896738/',\n", " 'https://habr.com/ru/articles/896732/',\n", " 'https://habr.com/ru/articles/896680/',\n", " 'https://habr.com/ru/articles/894294/',\n", " 'https://habr.com/ru/articles/896624/',\n", " 'https://habr.com/ru/articles/895340/',\n", " 'https://habr.com/ru/articles/896706/',\n", " 'https://habr.com/ru/articles/896014/',\n", " 'https://habr.com/ru/articles/896728/',\n", " 'https://habr.com/ru/articles/896686/',\n", " 'https://habr.com/ru/articles/896536/',\n", " 'https://habr.com/ru/articles/896736/',\n", " 'https://habr.com/ru/articles/895682/',\n", " 'https://habr.com/ru/articles/896726/',\n", " 'https://habr.com/ru/articles/896710/',\n", " 'https://habr.com/ru/articles/896662/',\n", " 'https://habr.com/ru/articles/896332/',\n", " 'https://habr.com/ru/articles/896668/',\n", " 'https://habr.com/ru/articles/896644/',\n", " 'https://habr.com/ru/articles/896638/',\n", " 'https://habr.com/ru/articles/896670/',\n", " 'https://habr.com/ru/articles/896632/',\n", " 'https://habr.com/ru/articles/896620/',\n", " 'https://habr.com/ru/articles/896630/',\n", " 'https://habr.com/ru/articles/896598/',\n", " 'https://habr.com/ru/articles/896178/',\n", " 'https://habr.com/ru/articles/896650/',\n", " 'https://habr.com/ru/articles/896660/',\n", " 'https://habr.com/ru/articles/896658/',\n", " 'https://habr.com/ru/articles/896664/',\n", " 'https://habr.com/ru/articles/896606/',\n", " 'https://habr.com/ru/articles/896604/',\n", " 'https://habr.com/ru/articles/892784/',\n", " 'https://habr.com/ru/articles/150091/',\n", " 'https://habr.com/ru/articles/896654/',\n", " 'https://habr.com/ru/articles/896500/',\n", " 'https://habr.com/ru/articles/896580/',\n", " 'https://habr.com/ru/articles/896446/',\n", " 'https://habr.com/ru/articles/896594/',\n", " 'https://habr.com/ru/articles/896496/',\n", " 'https://habr.com/ru/articles/896502/',\n", " 'https://habr.com/ru/articles/893244/',\n", " 'https://habr.com/ru/articles/896490/',\n", " 'https://habr.com/ru/articles/896390/',\n", " 'https://habr.com/ru/articles/893178/',\n", " 'https://habr.com/ru/articles/866154/',\n", " 'https://habr.com/ru/articles/895090/',\n", " 'https://habr.com/ru/articles/896320/',\n", " 'https://habr.com/ru/articles/896514/',\n", " 'https://habr.com/ru/articles/896582/',\n", " 'https://habr.com/ru/articles/896550/',\n", " 'https://habr.com/ru/articles/896492/',\n", " 'https://habr.com/ru/articles/896504/',\n", " 'https://habr.com/ru/articles/896548/',\n", " 'https://habr.com/ru/articles/896568/',\n", " 'https://habr.com/ru/articles/896470/',\n", " 'https://habr.com/ru/articles/896348/',\n", " 'https://habr.com/ru/articles/895986/',\n", " 'https://habr.com/ru/articles/896438/',\n", " 'https://habr.com/ru/articles/896416/',\n", " 'https://habr.com/ru/articles/896456/',\n", " 'https://habr.com/ru/articles/896244/',\n", " 'https://habr.com/ru/articles/896454/',\n", " 'https://habr.com/ru/articles/896296/',\n", " 'https://habr.com/ru/articles/895316/',\n", " 'https://habr.com/ru/articles/896412/',\n", " 'https://habr.com/ru/articles/896468/',\n", " 'https://habr.com/ru/articles/892836/',\n", " 'https://habr.com/ru/articles/896448/',\n", " 'https://habr.com/ru/articles/896442/',\n", " 'https://habr.com/ru/articles/896424/',\n", " 'https://habr.com/ru/articles/896486/',\n", " 'https://habr.com/ru/articles/896484/',\n", " 'https://habr.com/ru/articles/896478/',\n", " 'https://habr.com/ru/articles/896436/',\n", " 'https://habr.com/ru/articles/896274/',\n", " 'https://habr.com/ru/articles/719750/',\n", " 'https://habr.com/ru/articles/896350/',\n", " 'https://habr.com/ru/articles/896342/',\n", " 'https://habr.com/ru/articles/896304/',\n", " 'https://habr.com/ru/articles/896388/',\n", " 'https://habr.com/ru/articles/891018/',\n", " 'https://habr.com/ru/articles/896358/',\n", " 'https://habr.com/ru/articles/892112/',\n", " 'https://habr.com/ru/articles/895584/',\n", " 'https://habr.com/ru/articles/885068/',\n", " 'https://habr.com/ru/articles/896344/',\n", " 'https://habr.com/ru/articles/896090/',\n", " 'https://habr.com/ru/articles/896398/',\n", " 'https://habr.com/ru/articles/895974/',\n", " 'https://habr.com/ru/articles/896414/',\n", " 'https://habr.com/ru/articles/896354/',\n", " 'https://habr.com/ru/articles/890000/',\n", " 'https://habr.com/ru/articles/896212/',\n", " 'https://habr.com/ru/articles/895992/',\n", " 'https://habr.com/ru/articles/892176/',\n", " 'https://habr.com/ru/articles/894954/',\n", " 'https://habr.com/ru/articles/894898/',\n", " 'https://habr.com/ru/articles/894992/',\n", " 'https://habr.com/ru/articles/895424/',\n", " 'https://habr.com/ru/articles/895452/',\n", " 'https://habr.com/ru/articles/896314/',\n", " 'https://habr.com/ru/articles/895804/',\n", " 'https://habr.com/ru/articles/896326/',\n", " 'https://habr.com/ru/articles/895848/',\n", " 'https://habr.com/ru/articles/896200/',\n", " 'https://habr.com/ru/articles/896118/',\n", " 'https://habr.com/ru/articles/896328/',\n", " 'https://habr.com/ru/articles/895250/',\n", " 'https://habr.com/ru/articles/896176/',\n", " 'https://habr.com/ru/articles/896116/',\n", " 'https://habr.com/ru/articles/896270/',\n", " 'https://habr.com/ru/articles/896308/',\n", " 'https://habr.com/ru/articles/895984/',\n", " 'https://habr.com/ru/articles/896312/',\n", " 'https://habr.com/ru/articles/894942/',\n", " 'https://habr.com/ru/articles/892444/',\n", " 'https://habr.com/ru/articles/896272/',\n", " 'https://habr.com/ru/articles/893084/',\n", " 'https://habr.com/ru/articles/895882/',\n", " 'https://habr.com/ru/articles/895664/',\n", " 'https://habr.com/ru/articles/896266/',\n", " 'https://habr.com/ru/articles/896122/',\n", " 'https://habr.com/ru/articles/896298/',\n", " 'https://habr.com/ru/articles/895982/',\n", " 'https://habr.com/ru/articles/896288/',\n", " 'https://habr.com/ru/articles/896306/',\n", " 'https://habr.com/ru/articles/896294/',\n", " 'https://habr.com/ru/articles/894928/',\n", " 'https://habr.com/ru/articles/896302/',\n", " 'https://habr.com/ru/articles/895206/',\n", " 'https://habr.com/ru/articles/896310/',\n", " 'https://habr.com/ru/articles/896152/',\n", " 'https://habr.com/ru/articles/896240/',\n", " 'https://habr.com/ru/articles/896276/',\n", " 'https://habr.com/ru/articles/896238/',\n", " 'https://habr.com/ru/articles/896166/',\n", " 'https://habr.com/ru/articles/896234/',\n", " 'https://habr.com/ru/articles/896236/',\n", " 'https://habr.com/ru/articles/896112/',\n", " 'https://habr.com/ru/articles/896066/',\n", " 'https://habr.com/ru/articles/896222/',\n", " 'https://habr.com/ru/articles/890538/',\n", " 'https://habr.com/ru/articles/895972/',\n", " 'https://habr.com/ru/articles/896072/',\n", " 'https://habr.com/ru/articles/896028/',\n", " 'https://habr.com/ru/articles/896110/',\n", " 'https://habr.com/ru/articles/896220/',\n", " 'https://habr.com/ru/articles/896204/',\n", " 'https://habr.com/ru/articles/896162/',\n", " 'https://habr.com/ru/articles/896208/',\n", " 'https://habr.com/ru/articles/896224/',\n", " 'https://habr.com/ru/articles/895956/',\n", " 'https://habr.com/ru/articles/896002/',\n", " 'https://habr.com/ru/articles/896160/',\n", " 'https://habr.com/ru/articles/896124/',\n", " 'https://habr.com/ru/articles/895338/',\n", " 'https://habr.com/ru/articles/895732/',\n", " 'https://habr.com/ru/articles/896094/',\n", " 'https://habr.com/ru/articles/896096/',\n", " 'https://habr.com/ru/articles/896120/',\n", " 'https://habr.com/ru/articles/896088/',\n", " 'https://habr.com/ru/articles/895760/',\n", " 'https://habr.com/ru/articles/896102/',\n", " 'https://habr.com/ru/articles/896084/',\n", " 'https://habr.com/ru/articles/896126/',\n", " 'https://habr.com/ru/articles/893432/',\n", " 'https://habr.com/ru/articles/896046/',\n", " 'https://habr.com/ru/articles/896140/',\n", " 'https://habr.com/ru/articles/896130/',\n", " 'https://habr.com/ru/articles/896074/',\n", " 'https://habr.com/ru/articles/896086/',\n", " 'https://habr.com/ru/articles/896064/',\n", " 'https://habr.com/ru/articles/896108/',\n", " 'https://habr.com/ru/articles/896070/',\n", " 'https://habr.com/ru/articles/896030/',\n", " 'https://habr.com/ru/articles/896022/',\n", " 'https://habr.com/ru/articles/896012/',\n", " 'https://habr.com/ru/articles/893050/',\n", " 'https://habr.com/ru/articles/895994/',\n", " 'https://habr.com/ru/articles/896054/',\n", " 'https://habr.com/ru/articles/895390/',\n", " 'https://habr.com/ru/articles/895676/',\n", " 'https://habr.com/ru/articles/895180/',\n", " 'https://habr.com/ru/articles/895810/',\n", " 'https://habr.com/ru/articles/895860/',\n", " 'https://habr.com/ru/articles/895980/',\n", " 'https://habr.com/ru/articles/895954/',\n", " 'https://habr.com/ru/articles/887726/',\n", " 'https://habr.com/ru/articles/896044/',\n", " 'https://habr.com/ru/articles/896020/',\n", " 'https://habr.com/ru/articles/883954/',\n", " 'https://habr.com/ru/articles/896024/',\n", " 'https://habr.com/ru/articles/896006/',\n", " 'https://habr.com/ru/articles/895068/',\n", " 'https://habr.com/ru/articles/895946/',\n", " 'https://habr.com/ru/articles/894688/',\n", " 'https://habr.com/ru/articles/895930/',\n", " 'https://habr.com/ru/articles/895950/',\n", " 'https://habr.com/ru/articles/895774/',\n", " 'https://habr.com/ru/articles/895942/',\n", " 'https://habr.com/ru/articles/895306/',\n", " 'https://habr.com/ru/articles/895928/',\n", " 'https://habr.com/ru/articles/895966/',\n", " 'https://habr.com/ru/articles/895902/',\n", " 'https://habr.com/ru/articles/895892/',\n", " 'https://habr.com/ru/articles/893430/',\n", " 'https://habr.com/ru/articles/895968/',\n", " 'https://habr.com/ru/articles/895960/',\n", " 'https://habr.com/ru/articles/892718/',\n", " 'https://habr.com/ru/articles/895024/',\n", " 'https://habr.com/ru/articles/895978/',\n", " 'https://habr.com/ru/articles/895926/',\n", " 'https://habr.com/ru/articles/895688/',\n", " 'https://habr.com/ru/articles/895938/',\n", " 'https://habr.com/ru/articles/895866/',\n", " 'https://habr.com/ru/articles/895900/',\n", " 'https://habr.com/ru/articles/892396/',\n", " 'https://habr.com/ru/articles/895858/',\n", " 'https://habr.com/ru/articles/895876/',\n", " 'https://habr.com/ru/articles/895854/',\n", " 'https://habr.com/ru/articles/895840/',\n", " 'https://habr.com/ru/articles/895864/',\n", " 'https://habr.com/ru/articles/895914/',\n", " 'https://habr.com/ru/articles/895868/',\n", " 'https://habr.com/ru/articles/895856/',\n", " 'https://habr.com/ru/articles/895580/',\n", " 'https://habr.com/ru/articles/895622/',\n", " 'https://habr.com/ru/articles/895922/',\n", " 'https://habr.com/ru/articles/895852/',\n", " 'https://habr.com/ru/articles/895906/',\n", " 'https://habr.com/ru/articles/895636/',\n", " 'https://habr.com/ru/articles/895428/',\n", " 'https://habr.com/ru/articles/895850/',\n", " 'https://habr.com/ru/articles/895822/',\n", " 'https://habr.com/ru/articles/895792/',\n", " 'https://habr.com/ru/articles/895790/',\n", " 'https://habr.com/ru/articles/895640/',\n", " 'https://habr.com/ru/articles/895784/',\n", " 'https://habr.com/ru/articles/894472/',\n", " 'https://habr.com/ru/articles/895782/',\n", " 'https://habr.com/ru/articles/895802/',\n", " 'https://habr.com/ru/articles/895826/',\n", " 'https://habr.com/ru/articles/895598/',\n", " 'https://habr.com/ru/articles/895832/',\n", " 'https://habr.com/ru/articles/895824/',\n", " 'https://habr.com/ru/articles/895798/',\n", " 'https://habr.com/ru/articles/895778/',\n", " 'https://habr.com/ru/articles/895800/',\n", " 'https://habr.com/ru/articles/895830/',\n", " 'https://habr.com/ru/articles/895818/',\n", " 'https://habr.com/ru/articles/895806/',\n", " 'https://habr.com/ru/articles/895780/',\n", " 'https://habr.com/ru/articles/895768/',\n", " 'https://habr.com/ru/articles/895692/',\n", " 'https://habr.com/ru/articles/894500/',\n", " 'https://habr.com/ru/articles/895638/',\n", " 'https://habr.com/ru/articles/895742/',\n", " 'https://habr.com/ru/articles/895696/',\n", " 'https://habr.com/ru/articles/895764/',\n", " 'https://habr.com/ru/articles/895118/',\n", " 'https://habr.com/ru/articles/895674/',\n", " 'https://habr.com/ru/articles/895756/',\n", " 'https://habr.com/ru/articles/895718/',\n", " 'https://habr.com/ru/articles/895722/',\n", " 'https://habr.com/ru/articles/895138/',\n", " 'https://habr.com/ru/articles/892786/',\n", " 'https://habr.com/ru/articles/895654/',\n", " 'https://habr.com/ru/articles/895678/',\n", " 'https://habr.com/ru/articles/895754/',\n", " 'https://habr.com/ru/articles/893720/',\n", " 'https://habr.com/ru/articles/895684/',\n", " 'https://habr.com/ru/articles/895750/',\n", " 'https://habr.com/ru/articles/895694/',\n", " 'https://habr.com/ru/articles/895220/',\n", " 'https://habr.com/ru/articles/895560/',\n", " 'https://habr.com/ru/articles/895606/',\n", " 'https://habr.com/ru/articles/895568/',\n", " 'https://habr.com/ru/articles/895634/',\n", " 'https://habr.com/ru/articles/895540/',\n", " 'https://habr.com/ru/articles/895658/',\n", " 'https://habr.com/ru/articles/895432/',\n", " 'https://habr.com/ru/articles/895550/',\n", " 'https://habr.com/ru/articles/895582/',\n", " 'https://habr.com/ru/articles/894742/',\n", " 'https://habr.com/ru/articles/895668/',\n", " 'https://habr.com/ru/articles/895590/',\n", " 'https://habr.com/ru/articles/895556/',\n", " 'https://habr.com/ru/articles/895632/',\n", " 'https://habr.com/ru/articles/895662/',\n", " 'https://habr.com/ru/articles/895608/',\n", " 'https://habr.com/ru/articles/895596/',\n", " 'https://habr.com/ru/articles/895588/',\n", " 'https://habr.com/ru/articles/895666/',\n", " 'https://habr.com/ru/articles/895610/',\n", " 'https://habr.com/ru/articles/895532/',\n", " 'https://habr.com/ru/articles/895524/',\n", " 'https://habr.com/ru/articles/895498/',\n", " 'https://habr.com/ru/articles/895534/',\n", " 'https://habr.com/ru/articles/895508/',\n", " 'https://habr.com/ru/articles/895542/',\n", " 'https://habr.com/ru/articles/895416/',\n", " 'https://habr.com/ru/articles/895536/',\n", " 'https://habr.com/ru/articles/895346/',\n", " 'https://habr.com/ru/articles/895496/',\n", " 'https://habr.com/ru/articles/881918/',\n", " 'https://habr.com/ru/articles/895512/',\n", " 'https://habr.com/ru/articles/895282/',\n", " 'https://habr.com/ru/articles/895458/',\n", " 'https://habr.com/ru/articles/895410/',\n", " 'https://habr.com/ru/articles/895544/',\n", " 'https://habr.com/ru/articles/894604/',\n", " 'https://habr.com/ru/articles/895530/',\n", " 'https://habr.com/ru/articles/895464/',\n", " 'https://habr.com/ru/articles/895454/',\n", " 'https://habr.com/ru/articles/895448/',\n", " 'https://habr.com/ru/articles/895404/',\n", " 'https://habr.com/ru/articles/895488/',\n", " 'https://habr.com/ru/articles/895436/',\n", " 'https://habr.com/ru/articles/895462/',\n", " 'https://habr.com/ru/articles/893168/',\n", " 'https://habr.com/ru/articles/895450/',\n", " 'https://habr.com/ru/articles/895494/',\n", " 'https://habr.com/ru/articles/895490/',\n", " 'https://habr.com/ru/articles/895444/',\n", " 'https://habr.com/ru/articles/895440/',\n", " 'https://habr.com/ru/articles/894754/',\n", " 'https://habr.com/ru/articles/895446/',\n", " 'https://habr.com/ru/articles/894850/',\n", " 'https://habr.com/ru/articles/895408/',\n", " 'https://habr.com/ru/articles/895478/',\n", " 'https://habr.com/ru/articles/895482/',\n", " 'https://habr.com/ru/articles/895486/',\n", " 'https://habr.com/ru/articles/895426/',\n", " 'https://habr.com/ru/articles/893722/',\n", " 'https://habr.com/ru/articles/895148/',\n", " 'https://habr.com/ru/articles/895362/',\n", " 'https://habr.com/ru/articles/895332/',\n", " 'https://habr.com/ru/articles/895252/',\n", " 'https://habr.com/ru/articles/895344/',\n", " 'https://habr.com/ru/articles/895376/',\n", " 'https://habr.com/ru/articles/891416/',\n", " 'https://habr.com/ru/articles/895396/',\n", " 'https://habr.com/ru/articles/895174/',\n", " 'https://habr.com/ru/articles/895402/',\n", " 'https://habr.com/ru/articles/894652/',\n", " 'https://habr.com/ru/articles/895380/',\n", " 'https://habr.com/ru/articles/894134/',\n", " 'https://habr.com/ru/articles/895330/',\n", " 'https://habr.com/ru/articles/895382/',\n", " 'https://habr.com/ru/articles/895348/',\n", " 'https://habr.com/ru/articles/895368/',\n", " 'https://habr.com/ru/articles/895366/',\n", " 'https://habr.com/ru/articles/895272/',\n", " 'https://habr.com/ru/articles/895342/',\n", " 'https://habr.com/ru/articles/895276/',\n", " 'https://habr.com/ru/articles/895274/',\n", " 'https://habr.com/ru/articles/895132/',\n", " 'https://habr.com/ru/articles/895292/',\n", " 'https://habr.com/ru/articles/895312/',\n", " 'https://habr.com/ru/articles/892726/',\n", " 'https://habr.com/ru/articles/895296/',\n", " 'https://habr.com/ru/articles/895300/',\n", " 'https://habr.com/ru/articles/895294/',\n", " 'https://habr.com/ru/articles/895298/',\n", " 'https://habr.com/ru/articles/895302/',\n", " 'https://habr.com/ru/articles/888222/',\n", " 'https://habr.com/ru/articles/895230/',\n", " 'https://habr.com/ru/articles/894364/',\n", " 'https://habr.com/ru/articles/894854/',\n", " 'https://habr.com/ru/articles/889404/',\n", " 'https://habr.com/ru/articles/895214/',\n", " 'https://habr.com/ru/articles/895278/',\n", " 'https://habr.com/ru/articles/895268/',\n", " 'https://habr.com/ru/articles/895314/',\n", " 'https://habr.com/ru/articles/876900/',\n", " 'https://habr.com/ru/articles/895242/',\n", " 'https://habr.com/ru/articles/895002/',\n", " 'https://habr.com/ru/articles/895022/',\n", " 'https://habr.com/ru/articles/895212/',\n", " 'https://habr.com/ru/articles/895222/',\n", " 'https://habr.com/ru/articles/895012/',\n", " 'https://habr.com/ru/articles/894484/',\n", " 'https://habr.com/ru/articles/895262/',\n", " 'https://habr.com/ru/articles/895224/',\n", " 'https://habr.com/ru/articles/895226/',\n", " 'https://habr.com/ru/articles/895238/',\n", " 'https://habr.com/ru/articles/894808/',\n", " 'https://habr.com/ru/articles/895236/',\n", " 'https://habr.com/ru/articles/895208/',\n", " 'https://habr.com/ru/articles/895264/',\n", " 'https://habr.com/ru/articles/895108/',\n", " 'https://habr.com/ru/articles/895256/',\n", " 'https://habr.com/ru/articles/894360/',\n", " 'https://habr.com/ru/articles/895218/',\n", " 'https://habr.com/ru/articles/894676/',\n", " 'https://habr.com/ru/articles/885602/',\n", " 'https://habr.com/ru/articles/894936/',\n", " 'https://habr.com/ru/articles/895186/',\n", " 'https://habr.com/ru/articles/894862/',\n", " 'https://habr.com/ru/articles/895176/',\n", " 'https://habr.com/ru/articles/895162/',\n", " 'https://habr.com/ru/articles/894950/',\n", " 'https://habr.com/ru/articles/895034/',\n", " 'https://habr.com/ru/articles/895184/',\n", " 'https://habr.com/ru/articles/895202/',\n", " 'https://habr.com/ru/articles/894678/',\n", " 'https://habr.com/ru/articles/895196/',\n", " 'https://habr.com/ru/articles/895154/',\n", " 'https://habr.com/ru/articles/894614/',\n", " 'https://habr.com/ru/articles/895058/',\n", " 'https://habr.com/ru/articles/895172/',\n", " 'https://habr.com/ru/articles/895164/',\n", " 'https://habr.com/ru/articles/894906/',\n", " 'https://habr.com/ru/articles/895158/',\n", " 'https://habr.com/ru/articles/892920/',\n", " 'https://habr.com/ru/articles/895046/',\n", " 'https://habr.com/ru/articles/895066/',\n", " 'https://habr.com/ru/articles/895092/',\n", " 'https://habr.com/ru/articles/895080/',\n", " 'https://habr.com/ru/articles/895076/',\n", " 'https://habr.com/ru/articles/894514/',\n", " 'https://habr.com/ru/articles/895114/',\n", " 'https://habr.com/ru/articles/894790/',\n", " 'https://habr.com/ru/articles/895060/',\n", " 'https://habr.com/ru/articles/895056/',\n", " 'https://habr.com/ru/articles/894834/',\n", " 'https://habr.com/ru/articles/895048/',\n", " 'https://habr.com/ru/articles/895150/',\n", " 'https://habr.com/ru/articles/895016/',\n", " 'https://habr.com/ru/articles/895146/',\n", " 'https://habr.com/ru/articles/895102/',\n", " 'https://habr.com/ru/articles/895096/',\n", " 'https://habr.com/ru/articles/895098/',\n", " 'https://habr.com/ru/articles/895082/',\n", " 'https://habr.com/ru/articles/894996/',\n", " 'https://habr.com/ru/articles/894984/',\n", " 'https://habr.com/ru/articles/894986/',\n", " 'https://habr.com/ru/articles/894998/',\n", " 'https://habr.com/ru/articles/895018/',\n", " 'https://habr.com/ru/articles/894968/',\n", " 'https://habr.com/ru/articles/894978/',\n", " 'https://habr.com/ru/articles/894980/',\n", " 'https://habr.com/ru/articles/894792/',\n", " 'https://habr.com/ru/articles/894956/',\n", " 'https://habr.com/ru/articles/894220/',\n", " 'https://habr.com/ru/articles/894952/',\n", " 'https://habr.com/ru/articles/895006/',\n", " 'https://habr.com/ru/articles/894418/',\n", " 'https://habr.com/ru/articles/890278/',\n", " 'https://habr.com/ru/articles/895008/',\n", " 'https://habr.com/ru/articles/894924/',\n", " 'https://habr.com/ru/articles/894830/',\n", " 'https://habr.com/ru/articles/892852/',\n", " 'https://habr.com/ru/articles/893484/',\n", " 'https://habr.com/ru/articles/894902/',\n", " 'https://habr.com/ru/articles/894914/',\n", " 'https://habr.com/ru/articles/894162/',\n", " 'https://habr.com/ru/articles/893970/',\n", " 'https://habr.com/ru/articles/894966/',\n", " 'https://habr.com/ru/articles/893856/',\n", " 'https://habr.com/ru/articles/894118/',\n", " 'https://habr.com/ru/articles/894940/',\n", " 'https://habr.com/ru/articles/894684/',\n", " 'https://habr.com/ru/articles/894916/',\n", " 'https://habr.com/ru/articles/894642/',\n", " 'https://habr.com/ru/articles/894780/',\n", " 'https://habr.com/ru/articles/893644/',\n", " 'https://habr.com/ru/articles/894768/',\n", " 'https://habr.com/ru/articles/894922/',\n", " 'https://habr.com/ru/articles/894910/',\n", " 'https://habr.com/ru/articles/892234/',\n", " 'https://habr.com/ru/articles/894884/',\n", " 'https://habr.com/ru/articles/894930/',\n", " 'https://habr.com/ru/articles/894892/',\n", " 'https://habr.com/ru/articles/893990/',\n", " 'https://habr.com/ru/articles/894552/',\n", " 'https://habr.com/ru/articles/893164/',\n", " 'https://habr.com/ru/articles/894760/',\n", " 'https://habr.com/ru/articles/894800/',\n", " 'https://habr.com/ru/articles/892158/',\n", " 'https://habr.com/ru/articles/893634/',\n", " 'https://habr.com/ru/articles/893800/',\n", " 'https://habr.com/ru/articles/894876/',\n", " 'https://habr.com/ru/articles/894242/',\n", " 'https://habr.com/ru/articles/894560/',\n", " 'https://habr.com/ru/articles/889734/',\n", " 'https://habr.com/ru/articles/894838/',\n", " 'https://habr.com/ru/articles/894872/',\n", " 'https://habr.com/ru/articles/894866/',\n", " 'https://habr.com/ru/articles/894478/',\n", " 'https://habr.com/ru/articles/892502/',\n", " 'https://habr.com/ru/articles/880606/',\n", " 'https://habr.com/ru/articles/894400/',\n", " 'https://habr.com/ru/articles/894568/',\n", " 'https://habr.com/ru/articles/894654/',\n", " 'https://habr.com/ru/articles/894804/',\n", " 'https://habr.com/ru/articles/894706/',\n", " 'https://habr.com/ru/articles/894748/',\n", " 'https://habr.com/ru/articles/894770/',\n", " 'https://habr.com/ru/articles/894816/',\n", " 'https://habr.com/ru/articles/894774/',\n", " 'https://habr.com/ru/articles/894818/',\n", " 'https://habr.com/ru/articles/894736/',\n", " 'https://habr.com/ru/articles/894304/',\n", " 'https://habr.com/ru/articles/894756/',\n", " 'https://habr.com/ru/articles/894752/',\n", " 'https://habr.com/ru/articles/894738/',\n", " 'https://habr.com/ru/articles/894788/',\n", " 'https://habr.com/ru/articles/894802/',\n", " 'https://habr.com/ru/articles/894766/',\n", " 'https://habr.com/ru/articles/894740/',\n", " 'https://habr.com/ru/articles/894578/',\n", " 'https://habr.com/ru/articles/894786/',\n", " 'https://habr.com/ru/articles/894750/',\n", " 'https://habr.com/ru/articles/894724/',\n", " 'https://habr.com/ru/articles/894666/',\n", " 'https://habr.com/ru/articles/894690/',\n", " 'https://habr.com/ru/articles/894650/',\n", " 'https://habr.com/ru/articles/894722/',\n", " 'https://habr.com/ru/articles/894672/',\n", " 'https://habr.com/ru/articles/894658/',\n", " 'https://habr.com/ru/articles/894686/',\n", " 'https://habr.com/ru/articles/894698/',\n", " 'https://habr.com/ru/articles/894704/',\n", " 'https://habr.com/ru/articles/894174/',\n", " 'https://habr.com/ru/articles/894408/',\n", " 'https://habr.com/ru/articles/894720/',\n", " 'https://habr.com/ru/articles/894728/',\n", " 'https://habr.com/ru/articles/894664/',\n", " 'https://habr.com/ru/articles/894680/',\n", " 'https://habr.com/ru/articles/894670/',\n", " 'https://habr.com/ru/articles/894692/',\n", " 'https://habr.com/ru/articles/894702/',\n", " 'https://habr.com/ru/articles/894730/',\n", " 'https://habr.com/ru/articles/894656/',\n", " 'https://habr.com/ru/articles/894346/',\n", " 'https://habr.com/ru/articles/894606/',\n", " 'https://habr.com/ru/articles/894618/',\n", " 'https://habr.com/ru/articles/894306/',\n", " 'https://habr.com/ru/articles/894648/',\n", " 'https://habr.com/ru/articles/894588/',\n", " 'https://habr.com/ru/articles/893824/',\n", " 'https://habr.com/ru/articles/894580/',\n", " 'https://habr.com/ru/articles/894582/',\n", " 'https://habr.com/ru/articles/894646/',\n", " 'https://habr.com/ru/articles/891928/',\n", " 'https://habr.com/ru/articles/893964/',\n", " 'https://habr.com/ru/articles/894590/',\n", " 'https://habr.com/ru/articles/894628/',\n", " 'https://habr.com/ru/articles/894624/',\n", " 'https://habr.com/ru/articles/894546/',\n", " 'https://habr.com/ru/articles/894612/',\n", " 'https://habr.com/ru/articles/892882/',\n", " 'https://habr.com/ru/articles/894602/',\n", " 'https://habr.com/ru/articles/894556/',\n", " 'https://habr.com/ru/articles/894530/',\n", " 'https://habr.com/ru/articles/878282/',\n", " 'https://habr.com/ru/articles/894570/',\n", " 'https://habr.com/ru/articles/894508/',\n", " 'https://habr.com/ru/articles/894502/',\n", " 'https://habr.com/ru/articles/893852/',\n", " 'https://habr.com/ru/articles/894348/',\n", " 'https://habr.com/ru/articles/889822/',\n", " 'https://habr.com/ru/articles/894554/',\n", " 'https://habr.com/ru/articles/894528/',\n", " 'https://habr.com/ru/articles/894462/',\n", " 'https://habr.com/ru/articles/894558/',\n", " 'https://habr.com/ru/articles/894562/',\n", " 'https://habr.com/ru/articles/894320/',\n", " 'https://habr.com/ru/articles/894524/',\n", " 'https://habr.com/ru/articles/894510/',\n", " 'https://habr.com/ru/articles/894544/',\n", " 'https://habr.com/ru/articles/894572/',\n", " 'https://habr.com/ru/articles/891988/',\n", " 'https://habr.com/ru/articles/894480/',\n", " 'https://habr.com/ru/articles/894458/',\n", " 'https://habr.com/ru/articles/894446/',\n", " 'https://habr.com/ru/articles/894282/',\n", " 'https://habr.com/ru/articles/894260/',\n", " 'https://habr.com/ru/articles/894456/',\n", " 'https://habr.com/ru/articles/894426/',\n", " 'https://habr.com/ru/articles/894448/',\n", " 'https://habr.com/ru/articles/894486/',\n", " 'https://habr.com/ru/articles/894438/',\n", " 'https://habr.com/ru/articles/894442/',\n", " 'https://habr.com/ru/articles/894176/',\n", " 'https://habr.com/ru/articles/894430/',\n", " 'https://habr.com/ru/articles/894432/',\n", " 'https://habr.com/ru/articles/881424/',\n", " 'https://habr.com/ru/articles/893506/',\n", " 'https://habr.com/ru/articles/894314/',\n", " 'https://habr.com/ru/articles/894122/',\n", " 'https://habr.com/ru/articles/891702/',\n", " 'https://habr.com/ru/articles/894390/',\n", " 'https://habr.com/ru/articles/894420/',\n", " 'https://habr.com/ru/articles/894398/',\n", " 'https://habr.com/ru/articles/894370/',\n", " 'https://habr.com/ru/articles/894338/',\n", " 'https://habr.com/ru/articles/894368/',\n", " 'https://habr.com/ru/articles/894392/',\n", " 'https://habr.com/ru/articles/894236/',\n", " 'https://habr.com/ru/articles/894412/',\n", " 'https://habr.com/ru/articles/894382/',\n", " 'https://habr.com/ru/articles/893470/',\n", " 'https://habr.com/ru/articles/894366/',\n", " 'https://habr.com/ru/articles/893848/',\n", " 'https://habr.com/ru/articles/894402/',\n", " 'https://habr.com/ru/articles/894254/',\n", " 'https://habr.com/ru/articles/894334/',\n", " 'https://habr.com/ru/articles/894384/',\n", " 'https://habr.com/ru/articles/893966/',\n", " 'https://habr.com/ru/articles/894372/',\n", " 'https://habr.com/ru/articles/894356/',\n", " 'https://habr.com/ru/articles/894376/',\n", " 'https://habr.com/ru/articles/893130/',\n", " 'https://habr.com/ru/articles/894214/',\n", " 'https://habr.com/ru/articles/894180/',\n", " 'https://habr.com/ru/articles/892890/',\n", " 'https://habr.com/ru/articles/894286/',\n", " 'https://habr.com/ru/articles/894262/',\n", " 'https://habr.com/ru/articles/894280/',\n", " 'https://habr.com/ru/articles/894146/',\n", " 'https://habr.com/ru/articles/894140/',\n", " 'https://habr.com/ru/articles/894182/',\n", " 'https://habr.com/ru/articles/894284/',\n", " 'https://habr.com/ru/articles/894248/',\n", " 'https://habr.com/ru/articles/894350/',\n", " 'https://habr.com/ru/articles/894316/',\n", " 'https://habr.com/ru/articles/894258/',\n", " 'https://habr.com/ru/articles/893988/',\n", " 'https://habr.com/ru/articles/894266/',\n", " 'https://habr.com/ru/articles/892118/',\n", " 'https://habr.com/ru/articles/894292/',\n", " 'https://habr.com/ru/articles/894216/',\n", " 'https://habr.com/ru/articles/894170/',\n", " 'https://habr.com/ru/articles/894214/',\n", " 'https://habr.com/ru/articles/894228/',\n", " 'https://habr.com/ru/articles/893846/',\n", " 'https://habr.com/ru/articles/894222/',\n", " 'https://habr.com/ru/articles/894104/',\n", " 'https://habr.com/ru/articles/893416/',\n", " 'https://habr.com/ru/articles/894234/',\n", " 'https://habr.com/ru/articles/894136/',\n", " 'https://habr.com/ru/articles/893460/',\n", " 'https://habr.com/ru/articles/894062/',\n", " 'https://habr.com/ru/articles/894168/',\n", " 'https://habr.com/ru/articles/894154/',\n", " 'https://habr.com/ru/articles/894152/',\n", " 'https://habr.com/ru/articles/893402/',\n", " 'https://habr.com/ru/articles/894142/',\n", " 'https://habr.com/ru/articles/893916/',\n", " 'https://habr.com/ru/articles/894226/',\n", " 'https://habr.com/ru/articles/893956/',\n", " 'https://habr.com/ru/articles/884072/',\n", " 'https://habr.com/ru/articles/894090/',\n", " 'https://habr.com/ru/articles/892268/',\n", " 'https://habr.com/ru/articles/893838/',\n", " 'https://habr.com/ru/articles/894014/',\n", " 'https://habr.com/ru/articles/894114/',\n", " 'https://habr.com/ru/articles/894088/',\n", " 'https://habr.com/ru/articles/894074/',\n", " 'https://habr.com/ru/articles/893508/',\n", " 'https://habr.com/ru/articles/894082/',\n", " 'https://habr.com/ru/articles/885924/',\n", " 'https://habr.com/ru/articles/894110/',\n", " 'https://habr.com/ru/articles/893744/',\n", " 'https://habr.com/ru/articles/894138/',\n", " 'https://habr.com/ru/articles/894004/',\n", " 'https://habr.com/ru/articles/893948/',\n", " 'https://habr.com/ru/articles/894100/',\n", " 'https://habr.com/ru/articles/893900/',\n", " 'https://habr.com/ru/articles/894126/',\n", " 'https://habr.com/ru/articles/894086/',\n", " 'https://habr.com/ru/articles/891624/',\n", " 'https://habr.com/ru/articles/892348/',\n", " 'https://habr.com/ru/articles/894050/',\n", " 'https://habr.com/ru/articles/893936/',\n", " 'https://habr.com/ru/articles/894046/',\n", " 'https://habr.com/ru/articles/893012/',\n", " 'https://habr.com/ru/articles/892654/',\n", " 'https://habr.com/ru/articles/893802/',\n", " 'https://habr.com/ru/articles/893694/',\n", " 'https://habr.com/ru/articles/891690/',\n", " 'https://habr.com/ru/articles/892952/',\n", " 'https://habr.com/ru/articles/894056/',\n", " 'https://habr.com/ru/articles/894068/',\n", " 'https://habr.com/ru/articles/893914/',\n", " 'https://habr.com/ru/articles/894042/',\n", " 'https://habr.com/ru/articles/893572/',\n", " 'https://habr.com/ru/articles/894006/',\n", " 'https://habr.com/ru/articles/894028/',\n", " 'https://habr.com/ru/articles/893822/',\n", " 'https://habr.com/ru/articles/893612/',\n", " 'https://habr.com/ru/articles/894036/',\n", " 'https://habr.com/ru/articles/893994/',\n", " 'https://habr.com/ru/articles/893874/',\n", " 'https://habr.com/ru/articles/893250/',\n", " 'https://habr.com/ru/articles/894008/',\n", " 'https://habr.com/ru/articles/893006/',\n", " 'https://habr.com/ru/articles/892140/',\n", " 'https://habr.com/ru/articles/893958/',\n", " 'https://habr.com/ru/articles/893910/',\n", " 'https://habr.com/ru/articles/893652/',\n", " 'https://habr.com/ru/articles/893960/',\n", " 'https://habr.com/ru/articles/893536/',\n", " 'https://habr.com/ru/articles/893876/',\n", " 'https://habr.com/ru/articles/893922/',\n", " 'https://habr.com/ru/articles/893950/',\n", " 'https://habr.com/ru/articles/894026/',\n", " 'https://habr.com/ru/articles/893882/',\n", " 'https://habr.com/ru/articles/893968/',\n", " 'https://habr.com/ru/articles/887276/',\n", " 'https://habr.com/ru/articles/893628/',\n", " 'https://habr.com/ru/articles/894032/',\n", " 'https://habr.com/ru/articles/893866/',\n", " 'https://habr.com/ru/articles/893890/',\n", " 'https://habr.com/ru/articles/893854/',\n", " 'https://habr.com/ru/articles/893926/',\n", " 'https://habr.com/ru/articles/893896/',\n", " 'https://habr.com/ru/articles/893892/',\n", " 'https://habr.com/ru/articles/893862/',\n", " 'https://habr.com/ru/articles/893870/',\n", " 'https://habr.com/ru/articles/893894/',\n", " 'https://habr.com/ru/articles/893860/',\n", " 'https://habr.com/ru/articles/893804/',\n", " 'https://habr.com/ru/articles/893918/',\n", " 'https://habr.com/ru/articles/893908/',\n", " 'https://habr.com/ru/articles/893542/',\n", " 'https://habr.com/ru/articles/893616/',\n", " 'https://habr.com/ru/articles/893938/',\n", " 'https://habr.com/ru/articles/893942/',\n", " 'https://habr.com/ru/articles/893932/',\n", " 'https://habr.com/ru/articles/893920/',\n", " 'https://habr.com/ru/articles/893806/']" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import re\n", "\n", "habr_prefix = \"https://habr.com/ru/articles/\"\n", "article_prefix = \"https://habr.com/ru/\"\n", "\n", "articles_urls = []\n", "\n", "\n", "for page_suffix in [\"\"] + [f\"page{i}/\" for i in range(50)]:\n", " page_url = habr_prefix + page_suffix\n", " src = requests.get(page_url).text\n", " for article_suffix in set(re.findall('articles/\\d+/', src)):\n", " articles_urls.append(article_prefix + article_suffix)\n", "\n", "articles_urls" ] }, { "cell_type": "code", "execution_count": 53, "id": "cf76dc3b-ef5e-45b2-b8f5-20d7164a0444", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1000" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(articles_urls)" ] }, { "cell_type": "code", "execution_count": 54, "id": "cb4a4c34-46e9-4c58-b987-a0583fe479e8", "metadata": { "scrolled": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████████████████████| 1000/1000 [09:35<00:00, 1.74it/s]\n" ] } ], "source": [ "import time\n", "from tqdm import tqdm\n", "\n", "pagers = []\n", "\n", "for url in tqdm(articles_urls):\n", " pagers.append(get_pager(url))\n", " " ] }, { "cell_type": "code", "execution_count": 55, "id": "46ff7a3f-0ffd-4b30-86f8-7ae41dfd999f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1000" ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pagers = [x for x in pagers if x is not None]\n", "len(pagers)" ] }, { "cell_type": "code", "execution_count": 56, "id": "f96feb7a-e4ef-4376-8561-71b93b45e07d", "metadata": {}, "outputs": [], "source": [ "from nltk.corpus import stopwords\n", "from nltk.tokenize import word_tokenize\n", "stop_words = set(stopwords.words('russian'))\n", "\n", "stop_words\n", "\n", "for pager in pagers:\n", " pager.original_tags = list(set([x for x in pager.original_tags if x not in stop_words]))" ] }, { "cell_type": "code", "execution_count": 57, "id": "29f38543-e898-4d57-b4cd-54a1766208dc", "metadata": {}, "outputs": [], "source": [ "# summarizer = Summarizer(\"cuda\")\n", "\n", "for pager in pagers:\n", " pager.text = pager.text[:3000]" ] }, { "cell_type": "code", "execution_count": 58, "id": "2b7ea244-93cb-4c62-8da9-d6c530f3059e", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_57433/3926614870.py:3: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/\n", " df = pd.DataFrame([pager.dict() for pager in pagers])\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
titletextoriginal_tags
0Раскраска листинга процедуры T-SQL значениями ...Сразу покажу, о чем идет речь, чтобы вы решили...[sql, tsql, markup, profiler, performance]
1Искусственный интеллект и алгоритмы в энергети...Энергетические системы — одни из самых сложных...[алгоритмы, ии, ит, интеллект, лэп, энергетика...
2«Эти фильмы были ужасны» — короткий рассказ о ...Концепт-арт к «Ксеногенезису» (1978) — к так и...[кино, рисование, фильмы, фантастика, эффекты,...
3Что будет, если не использовать TCP или UDP? /...Коммутаторы, маршрутизаторы, брандмауэры — все...[эксперимент, данных, tcp, передача, протоколы...
4Почему тренд на аэрошоссеры возвращаются / ХабрПочему аэрошоссеры возвращаются в 2025 годуНес...[скорость, вес, шоссейный, аэротруба, велосипе...
............
995Service Mesh в дикой природе или как не стать ...ВведениеУгрозы безопасности в Service Mesh1. О...[безопасность, приложений, микросервисы, микро...
996Apple Pro Weekly News (17.03 – 23.03.25) / ХабрЧто сломали в последнем обновлении iOS, от чег...[Apple, Siri, iMazing, iPhone, App, iPad, iOS,...
997Как не завязнуть в болоте рутины и оставаться ...С того момента, как я начал работать IT менедж...[командой, проектов, руководство, проектами, л...
998Релиз Linux 6.14 / Хабр24 марта 2025 года Линус Торвальдс представил ...[6.14, Linux, торвальдс]
999Чтобы побеждать, достаточно одной книги / Хабр\\nЕсть два правдивых утверждения.\\n\\nПервое пр...[ruvds_перевод, техническая, программиста, лит...
\n", "

1000 rows × 3 columns

\n", "
" ], "text/plain": [ " title \\\n", "0 Раскраска листинга процедуры T-SQL значениями ... \n", "1 Искусственный интеллект и алгоритмы в энергети... \n", "2 «Эти фильмы были ужасны» — короткий рассказ о ... \n", "3 Что будет, если не использовать TCP или UDP? /... \n", "4 Почему тренд на аэрошоссеры возвращаются / Хабр \n", ".. ... \n", "995 Service Mesh в дикой природе или как не стать ... \n", "996 Apple Pro Weekly News (17.03 – 23.03.25) / Хабр \n", "997 Как не завязнуть в болоте рутины и оставаться ... \n", "998 Релиз Linux 6.14 / Хабр \n", "999 Чтобы побеждать, достаточно одной книги / Хабр \n", "\n", " text \\\n", "0 Сразу покажу, о чем идет речь, чтобы вы решили... \n", "1 Энергетические системы — одни из самых сложных... \n", "2 Концепт-арт к «Ксеногенезису» (1978) — к так и... \n", "3 Коммутаторы, маршрутизаторы, брандмауэры — все... \n", "4 Почему аэрошоссеры возвращаются в 2025 годуНес... \n", ".. ... \n", "995 ВведениеУгрозы безопасности в Service Mesh1. О... \n", "996 Что сломали в последнем обновлении iOS, от чег... \n", "997 С того момента, как я начал работать IT менедж... \n", "998 24 марта 2025 года Линус Торвальдс представил ... \n", "999 \\nЕсть два правдивых утверждения.\\n\\nПервое пр... \n", "\n", " original_tags \n", "0 [sql, tsql, markup, profiler, performance] \n", "1 [алгоритмы, ии, ит, интеллект, лэп, энергетика... \n", "2 [кино, рисование, фильмы, фантастика, эффекты,... \n", "3 [эксперимент, данных, tcp, передача, протоколы... \n", "4 [скорость, вес, шоссейный, аэротруба, велосипе... \n", ".. ... \n", "995 [безопасность, приложений, микросервисы, микро... \n", "996 [Apple, Siri, iMazing, iPhone, App, iPad, iOS,... \n", "997 [командой, проектов, руководство, проектами, л... \n", "998 [6.14, Linux, торвальдс] \n", "999 [ruvds_перевод, техническая, программиста, лит... \n", "\n", "[1000 rows x 3 columns]" ] }, "execution_count": 58, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "\n", "df = pd.DataFrame([pager.dict() for pager in pagers])\n", "df" ] }, { "cell_type": "code", "execution_count": 62, "id": "a3959537-82d9-42b8-91de-ebdc565d35c9", "metadata": {}, "outputs": [], "source": [ "df.to_csv(\"pagers.csv\")" ] }, { "cell_type": "code", "execution_count": 4, "id": "a2c52756-6a8e-4175-8665-673462275627", "metadata": {}, "outputs": [], "source": [ "model_name = \"sarahai/ruT5-base-summarizer\"\n", "tokenizer = T5Tokenizer.from_pretrained(model_name, device_map=\"cpu\")" ] }, { "cell_type": "code", "execution_count": 5, "id": "c90dc27d-6c90-4102-b16b-d1470b73b4e5", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "df = pd.read_csv(\"pagers.csv\")" ] }, { "cell_type": "code", "execution_count": 6, "id": "61f1ee2d-03f6-4294-8e62-7b0aa11d6584", "metadata": {}, "outputs": [], "source": [ "from torch.nn.utils.rnn import pad_sequence\n", "\n", "class PagerDataset:\n", " def __init__(self, device=\"cpu\", pagers=df):\n", " model_name = \"sarahai/ruT5-base-summarizer\"\n", " self.device = device\n", " self.tokenizer = T5Tokenizer.from_pretrained(model_name, device_map=device)\n", " self.texts = [self.tokenizer(pager, return_tensors=\"pt\").input_ids[0] for pager in pagers['text']]\n", " self.tags = [self.tokenizer(' '.join(eval(pager)), return_tensors=\"pt\").input_ids[0] for pager in pagers['original_tags']]\n", "\n", " def __len__(self):\n", " return len(self.tags)\n", "\n", " def __getitem__(self, index):\n", " return self.texts[index], self.tags[index]\n", "\n", "class PagerCollator:\n", " @staticmethod\n", " def collate_tokens(tokens_batch, padding_value):\n", " return pad_sequence(tokens_batch, batch_first=True, padding_value=padding_value).to(torch.long)\n", "\n", " def __call__(self, batch):\n", " values = [item[0] for item in batch]\n", " labels = [item[1] for item in batch]\n", " input_values = PagerCollator.collate_tokens(values, 0)\n", " mask = (input_values == PagerCollator.collate_tokens(values, 1)).to(torch.long)\n", " labels = PagerCollator.collate_tokens(labels, -100)\n", "\n", " return {\n", " 'input_ids': input_values, \n", " 'labels': labels,\n", " 'attention_mask': mask\n", " }\n" ] }, { "cell_type": "code", "execution_count": 7, "id": "d34516f2-fb6b-4799-a2d9-977fa9582d56", "metadata": {}, "outputs": [], "source": [ "threshold = int(len(df) * 0.9)\n", "\n", "train_set = df.sample(threshold)\n", "\n", "\n", "train = PagerDataset(pagers=train_set)\n", "test = PagerDataset(pagers=df.drop(index = train_set.index))" ] }, { "cell_type": "code", "execution_count": 8, "id": "055adf6e-c1fd-44c8-90d1-9985a85b2895", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "45 Привет! Меня зовут Ирина, уже более пяти лет я...\n", "562 Всем привет! На связи Николай Едомский, руково...\n", "448 В одном из больших кластеров S3 в Точке хранит...\n", "727 Моя история проста как три копейки. В конце 20...\n", "282 Недавно в Общественной палате России состоялся...\n", " ... \n", "802 Arcana OPS — это современный подход к управлен...\n", "2 Концепт-арт к «Ксеногенезису» (1978) — к так и...\n", "583 По прогнозу Gartner, запросы на естественном я...\n", "100 Все современные средства разработки — практиче...\n", "470 \\nЯ слушаю аудиокниги с 2014 года. За это врем...\n", "Name: text, Length: 900, dtype: object" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_set['text']" ] }, { "cell_type": "code", "execution_count": 9, "id": "3b1d9351-db15-440b-9ed2-3265363e8423", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Embedding(32100, 768)" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from transformers import T5Tokenizer, T5ForConditionalGeneration\n", "\n", "model_name = \"sarahai/ruT5-base-summarizer\"\n", "model = T5ForConditionalGeneration.from_pretrained(model_name, device_map=\"cuda\")\n", "\n", "model.resize_token_embeddings(len(tokenizer))" ] }, { "cell_type": "code", "execution_count": 10, "id": "f39f757d-6920-496d-b54c-f2d2abd235ff", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/semikrasal/.local/lib/python3.10/site-packages/transformers/training_args.py:1594: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n", " warnings.warn(\n" ] } ], "source": [ "from transformers import TrainingArguments\n", "\n", "training_args = TrainingArguments(\n", " output_dir=\"test\",\n", " per_device_train_batch_size=8,\n", " gradient_accumulation_steps=1,\n", " evaluation_strategy=\"steps\",\n", " max_steps=4000,\n", " fp16=False,\n", " save_steps=2000,\n", " eval_steps=32,\n", " logging_steps=30,\n", " learning_rate=1e-3,\n", " weight_decay=0.007,\n", " warmup_steps=250,\n", " gradient_checkpointing=True,\n", " report_to=[],\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "d0adcefb-7f8a-47e4-a5b3-1a6784e31df9", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 11, "id": "f5f9d1bf-c4c0-488a-93b7-a69c0f2fcca1", "metadata": {}, "outputs": [], "source": [ "from transformers import Trainer\n", "\n", "trainer = Trainer(\n", " model=model,\n", " data_collator=PagerCollator(),\n", " args=training_args,\n", " train_dataset=train,\n", " eval_dataset=test,\n", ")" ] }, { "cell_type": "code", "execution_count": 12, "id": "76f97845-99d8-4952-9a76-880ae61ff69d", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...\n" ] }, { "data": { "text/html": [ "\n", "
\n", " \n", " \n", " [4000/4000 1:29:52, Epoch 35/36]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StepTraining LossValidation Loss
325.4164004.127873
644.4260004.022820
964.4103003.992565
1284.2236004.147720
1604.0100004.239121
1924.0416004.379041
2244.2064004.391558
2563.9877004.585879
2883.9088004.618155
3204.0041004.647362
3524.1545004.636343
3843.4074004.534647
4163.4281004.678164
4483.4029004.636761
4802.6657004.763720
5122.8316004.805240
5442.9981004.617594
5762.8044005.175800
6082.1274005.117918
6402.2724004.671577
6722.3031004.919456
7042.1581005.571861
7361.6842005.425070
7681.7579005.110906
8001.8620005.411929
8321.4592005.478015
8641.3048005.500659
8961.3650005.509354
9281.4446005.719732
9600.9843006.151366
9921.0176005.967303
10241.0983005.882926
10560.7763006.050797
10880.8032005.942257
11200.8990006.097410
11520.7512006.110172
11840.6676006.307382
12160.7010006.199901
12480.6882006.502533
12800.5893006.538973
13120.4801006.469034
13440.5947006.437945
13760.5516006.482977
14080.4556006.612985
14400.4804006.619091
14720.4470006.681647
15040.3931006.687038
15360.3741006.817354
15680.3484006.522055
16000.3491006.925219
16320.3007006.731413
16640.3169006.704000
16960.3358006.584372
17280.2865006.806914
17600.2610006.755325
17920.2722006.745789
18240.2818007.027082
18560.2328006.984172
18880.2303007.019201
19200.2661007.003294
19520.1847007.019387
19840.1860007.056015
20160.1804007.068249
20480.2036007.093792
20800.1823007.032248
21120.1512007.065526
21440.1612007.156117
21760.1410007.302792
22080.1222007.313677
22400.1324007.292119
22720.1432007.382527
23040.1280007.396447
23360.1029007.321751
23680.1362007.299072
24000.0832007.346662
24320.1092007.440024
24640.0990007.494668
24960.0923007.354113
25280.0778007.522041
25600.0697007.423881
25920.0764007.300383
26240.0683007.523688
26560.0651007.270813
26880.0595007.290957
27200.0619007.378780
27520.0490007.473153
27840.0468007.502200
28160.0479007.495668
28480.0346007.529656
28800.0267007.580919
29120.0283007.630338
29440.0253007.725150
29760.0270007.529447
30080.0148007.630457
30400.0195007.620898
30720.0116007.675141
31040.0134007.726362
31360.0148007.773665
31680.0129007.706628
32000.0101007.726365
32320.0073007.754138
32640.0048007.734880
32960.0071007.717532
33280.0088007.734691
33600.0040007.723871
33920.0061007.772519
34240.0060007.810041
34560.0039007.762656
34880.0055007.769754
35200.0045007.806248
35520.0050007.815495
35840.0050007.834338
36160.0041007.856540
36480.0061007.847775
36800.0033007.834288
37120.0044007.798307
37440.0020007.792634
37760.0042007.783415
38080.0028007.783332
38400.0025007.783207
38720.0033007.791155
39040.0027007.788821
39360.0033007.790948
39680.0029007.795428
40000.0024007.796586

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.\n" ] }, { "data": { "text/plain": [ "TrainOutput(global_step=4000, training_loss=0.8493731454242952, metrics={'train_runtime': 5394.6282, 'train_samples_per_second': 5.932, 'train_steps_per_second': 0.741, 'total_flos': 2.281669114761216e+16, 'train_loss': 0.8493731454242952, 'epoch': 35.39823008849557})" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "trainer.train()" ] }, { "cell_type": "code", "execution_count": 16, "id": "d23b3b5f-a43c-47d6-b60b-9481fcfae616", "metadata": {}, "outputs": [], "source": [ "result = model.generate(test[2][0].unsqueeze(0).to(\"cuda\"), max_length=20,\n", " min_length=20,\n", " length_penalty=2.0,\n", " num_beams=10, early_stopping=False)" ] }, { "cell_type": "code", "execution_count": 17, "id": "80509438-b94c-4db3-ab49-7027c9d9a8ec", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Однажды зимним вечером тусили в хорошей дружеской компании и между делом затронули тему про «серые будни ИТ-шника». Про то, что многие концептуальные вещи (а не чисто ремесленные) плохо гуглятся и вызывают ступор у большинства начинающих процессных аналитиков и разработчиков. Ну и как-то так, слово за слово, пришли к идее о том, чтобы было бы здорово разбавить яркими красками и эмоциональными переживаниями сухой язык и весьма условные пиктограммы промышленных спецификаций. Так появился наш новый уникальный мерч, ну и прикольная динамичная настолка. Настолки весьма популярны среди команд разработки, такой формат многим заходит и оказывает благоприятное влияние на формирование коллективов. Но для их закрепления нужна какая-то понятная объединяющая идея. ИдеяМы решили попробовать простыми словами объяснить путь построения процессной компании, и при этом поиграться с описанными в спецификации BPMN 2.0 инструментами, посмотреть, какие они могут дать выгоды и как их можно эффективно комбинировать между собой. А условием победы пусть станет вывод своей компании на максимальный уровень процессной зрелости (условно, конечно) через достижение 4-х глобальных целей: Обеспечить масштабируемость Освоить архитектуру на основе событий Повысить общую инженерную культуру в компании Стандартизировать подходы и обеспечить постоянное улучшение процессов Игровая механика Команды разработки обычно включают от 3 до 10 человек, с поправкой на то, что сложно бывает собрать всех в одном месте и в одно время. Поэтому оптимальным представляется вариант, при котором играть смогут от 2-х до 5 человек. Игровое поле предполагалось, что будет состоять из трех областей: Область процессных спецификаций, в которой расположим стопки игровых карточек, сгруппированных по типам. Область ресурсов, из которой игроки смогут черпать ресурсы в виде монеток — «story-поинтов» по ходу игры, и куда будут возвращаться потраченные игроками ресурсы. Области компаний игроков, состоящие из поставленных в начале игр'" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tokenizer.decode(test[2][0], skip_special_tokens=True)" ] }, { "cell_type": "code", "execution_count": 18, "id": "ce9753cb-ed08-4bdb-a89a-e3f8af8ec25c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'процессов BPM Fab Доходность график gamedev свободный unreal Разработка'" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tokenizer.decode(result[0], skip_special_tokens=True)" ] }, { "cell_type": "code", "execution_count": 19, "id": "fffdd4f3-1081-44aa-93b8-019cc599da4b", "metadata": {}, "outputs": [], "source": [ "loaded_model = T5ForConditionalGeneration.from_pretrained(\"test/checkpoint-4000\", device_map=\"cpu\")" ] }, { "cell_type": "code", "execution_count": 22, "id": "2874241c-59f7-480b-961c-7a2dbc50f01c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 18 µs, sys: 2 µs, total: 20 µs\n", "Wall time: 477 µs\n" ] } ], "source": [ "%time\n", "\n", "result = loaded_model.generate(test[2][0].unsqueeze(0), max_length=20,\n", " min_length=20,\n", " length_penalty=2.0,\n", " num_beams=10, early_stopping=False)" ] }, { "cell_type": "code", "execution_count": 21, "id": "819b2bfc-dc88-4607-963e-0bbda624a84a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[ 0, 6830, 850, 856, 934, 897, 4877, 422, 986, 370,\n", " 5293, 8, 4252, 20543, 6568, 17276, 5888, 2045, 700, 23894]])" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "result" ] }, { "cell_type": "code", "execution_count": null, "id": "c4c4cd73-4439-4485-89ed-92a1297c4295", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 5 }