{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "44972eb1-639a-4c22-9984-cb097ebf788d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import os\n", "os.environ[\"CUDA_DEVICE_ORDER\"]=\"PCI_BUS_ID\"\n", "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"3\"\n", "import torch\n", "torch.cuda.set_device(0)\n", "torch.cuda.current_device()" ] }, { "cell_type": "code", "execution_count": 2, "id": "a1de668b-1a19-4e2b-bdb6-18288c9f7198", "metadata": {}, "outputs": [], "source": [ "import torch\n", "from transformers import T5Tokenizer, T5ForConditionalGeneration\n", "\n", "from accelerate import Accelerator\n", "\n", "class Summarizer:\n", " def __init__(self, device=\"cpu\"):\n", " model_name = \"sarahai/ruT5-base-summarizer\"\n", " self.device = device\n", " self.tokenizer = T5Tokenizer.from_pretrained(model_name, device_map=device)\n", " self.model = T5ForConditionalGeneration.from_pretrained(model_name, device_map=device)\n", "\n", " def summarize(self, text, max_length=100, min_length=50, num_beams=5):\n", " input_ids = self.tokenizer(text, return_tensors=\"pt\").input_ids.to(self.device)\n", " # input_ids = torch.nn.utils.rnn.pad_sequence(input_ids.squeeze().chunk(chunk_num, 0), batch_first=True, padding_value=-100)\n", " outputs = self.model.generate(input_ids, max_length=max_length,\n", " min_length=min_length,\n", " length_penalty=2.0,\n", " num_beams=num_beams, early_stopping=True)\n", "\n", " return self.tokenizer.decode(outputs[0], skip_special_tokens=True)\n", "\n", " def generate(self, indexes, max_length=100, min_length=50, num_beams=5):\n", " return self.model.generate(indexes.unsqueeze(0), max_length=max_length,\n", " min_length=min_length,\n", " length_penalty=2.0,\n", " num_beams=num_beams, early_stopping=True).squeeze()" ] }, { "cell_type": "code", "execution_count": 3, "id": "827f5fff-1c35-43fb-a560-0506b6a9b270", "metadata": {}, "outputs": [], "source": [ "# summarizer = Summarizer(\"cuda:3\")" ] }, { "cell_type": "code", "execution_count": 51, "id": "9be5b5b4-7ff1-4c5f-9457-caa9ee16f6cd", "metadata": {}, "outputs": [], "source": [ "from bs4 import BeautifulSoup\n", "from pydantic import BaseModel\n", "import requests\n", "from typing import Optional\n", "\n", "class Pager(BaseModel):\n", " title: str\n", " text: str\n", " original_tags: list[str]\n", "\n", "def is_valid_page(url):\n", " return True\n", "\n", "def get_pager(url)->Optional[Pager]:\n", " try:\n", " req = requests.get(url)\n", " soup = BeautifulSoup(req.text, 'lxml')\n", " query = soup.find(\"div\", class_=\"article-formatted-body\")\n", " title = soup.title.string\n", " tags = []\n", " for tag in soup.find_all(\"meta\"):\n", " if tag.get(\"name\", None) == \"keywords\":\n", " tags = [x for x in re.split(',| ', tag[\"content\"]) if len(x) > 0]\n", " return Pager(title=title, text=query.get_text(), original_tags=tags)\n", " except:\n", " return None" ] }, { "cell_type": "code", "execution_count": 52, "id": "32a286e5-4511-4951-bcb0-b53fe4808897", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['https://habr.com/ru/articles/897282/',\n", " 'https://habr.com/ru/articles/897472/',\n", " 'https://habr.com/ru/articles/891132/',\n", " 'https://habr.com/ru/articles/897224/',\n", " 'https://habr.com/ru/articles/897636/',\n", " 'https://habr.com/ru/articles/897496/',\n", " 'https://habr.com/ru/articles/897630/',\n", " 'https://habr.com/ru/articles/897518/',\n", " 'https://habr.com/ru/articles/897640/',\n", " 'https://habr.com/ru/articles/897574/',\n", " 'https://habr.com/ru/articles/897632/',\n", " 'https://habr.com/ru/articles/891488/',\n", " 'https://habr.com/ru/articles/896972/',\n", " 'https://habr.com/ru/articles/897624/',\n", " 'https://habr.com/ru/articles/897534/',\n", " 'https://habr.com/ru/articles/897620/',\n", " 'https://habr.com/ru/articles/897654/',\n", " 'https://habr.com/ru/articles/897648/',\n", " 'https://habr.com/ru/articles/897642/',\n", " 'https://habr.com/ru/articles/897634/',\n", " 'https://habr.com/ru/articles/897282/',\n", " 'https://habr.com/ru/articles/897472/',\n", " 'https://habr.com/ru/articles/891132/',\n", " 'https://habr.com/ru/articles/897224/',\n", " 'https://habr.com/ru/articles/897636/',\n", " 'https://habr.com/ru/articles/897496/',\n", " 'https://habr.com/ru/articles/897630/',\n", " 'https://habr.com/ru/articles/897518/',\n", " 'https://habr.com/ru/articles/897640/',\n", " 'https://habr.com/ru/articles/897574/',\n", " 'https://habr.com/ru/articles/897632/',\n", " 'https://habr.com/ru/articles/891488/',\n", " 'https://habr.com/ru/articles/896972/',\n", " 'https://habr.com/ru/articles/897624/',\n", " 'https://habr.com/ru/articles/897534/',\n", " 'https://habr.com/ru/articles/897620/',\n", " 'https://habr.com/ru/articles/897654/',\n", " 'https://habr.com/ru/articles/897648/',\n", " 'https://habr.com/ru/articles/897642/',\n", " 'https://habr.com/ru/articles/897634/',\n", " 'https://habr.com/ru/articles/897572/',\n", " 'https://habr.com/ru/articles/897300/',\n", " 'https://habr.com/ru/articles/897566/',\n", " 'https://habr.com/ru/articles/897570/',\n", " 'https://habr.com/ru/articles/897606/',\n", " 'https://habr.com/ru/articles/895412/',\n", " 'https://habr.com/ru/articles/897074/',\n", " 'https://habr.com/ru/articles/897568/',\n", " 'https://habr.com/ru/articles/897546/',\n", " 'https://habr.com/ru/articles/897062/',\n", " 'https://habr.com/ru/articles/878678/',\n", " 'https://habr.com/ru/articles/897594/',\n", " 'https://habr.com/ru/articles/897564/',\n", " 'https://habr.com/ru/articles/897256/',\n", " 'https://habr.com/ru/articles/897526/',\n", " 'https://habr.com/ru/articles/897538/',\n", " 'https://habr.com/ru/articles/897236/',\n", " 'https://habr.com/ru/articles/897608/',\n", " 'https://habr.com/ru/articles/897604/',\n", " 'https://habr.com/ru/articles/897474/',\n", " 'https://habr.com/ru/articles/897500/',\n", " 'https://habr.com/ru/articles/897430/',\n", " 'https://habr.com/ru/articles/897448/',\n", " 'https://habr.com/ru/articles/897484/',\n", " 'https://habr.com/ru/articles/889928/',\n", " 'https://habr.com/ru/articles/897476/',\n", " 'https://habr.com/ru/articles/897328/',\n", " 'https://habr.com/ru/articles/897338/',\n", " 'https://habr.com/ru/articles/896430/',\n", " 'https://habr.com/ru/articles/897420/',\n", " 'https://habr.com/ru/articles/897486/',\n", " 'https://habr.com/ru/articles/897422/',\n", " 'https://habr.com/ru/articles/897478/',\n", " 'https://habr.com/ru/articles/897454/',\n", " 'https://habr.com/ru/articles/897424/',\n", " 'https://habr.com/ru/articles/897446/',\n", " 'https://habr.com/ru/articles/893702/',\n", " 'https://habr.com/ru/articles/897306/',\n", " 'https://habr.com/ru/articles/897510/',\n", " 'https://habr.com/ru/articles/897468/',\n", " 'https://habr.com/ru/articles/897388/',\n", " 'https://habr.com/ru/articles/897100/',\n", " 'https://habr.com/ru/articles/897266/',\n", " 'https://habr.com/ru/articles/897400/',\n", " 'https://habr.com/ru/articles/897380/',\n", " 'https://habr.com/ru/articles/897248/',\n", " 'https://habr.com/ru/articles/896106/',\n", " 'https://habr.com/ru/articles/897374/',\n", " 'https://habr.com/ru/articles/897376/',\n", " 'https://habr.com/ru/articles/897410/',\n", " 'https://habr.com/ru/articles/897396/',\n", " 'https://habr.com/ru/articles/897360/',\n", " 'https://habr.com/ru/articles/897102/',\n", " 'https://habr.com/ru/articles/897042/',\n", " 'https://habr.com/ru/articles/896722/',\n", " 'https://habr.com/ru/articles/897402/',\n", " 'https://habr.com/ru/articles/894744/',\n", " 'https://habr.com/ru/articles/897326/',\n", " 'https://habr.com/ru/articles/897302/',\n", " 'https://habr.com/ru/articles/896980/',\n", " 'https://habr.com/ru/articles/897226/',\n", " 'https://habr.com/ru/articles/896958/',\n", " 'https://habr.com/ru/articles/897288/',\n", " 'https://habr.com/ru/articles/897334/',\n", " 'https://habr.com/ru/articles/896626/',\n", " 'https://habr.com/ru/articles/897292/',\n", " 'https://habr.com/ru/articles/896886/',\n", " 'https://habr.com/ru/articles/897246/',\n", " 'https://habr.com/ru/articles/897304/',\n", " 'https://habr.com/ru/articles/897298/',\n", " 'https://habr.com/ru/articles/896210/',\n", " 'https://habr.com/ru/articles/897310/',\n", " 'https://habr.com/ru/articles/897240/',\n", " 'https://habr.com/ru/articles/897324/',\n", " 'https://habr.com/ru/articles/894994/',\n", " 'https://habr.com/ru/articles/897296/',\n", " 'https://habr.com/ru/articles/897274/',\n", " 'https://habr.com/ru/articles/897204/',\n", " 'https://habr.com/ru/articles/897316/',\n", " 'https://habr.com/ru/articles/897314/',\n", " 'https://habr.com/ru/articles/897210/',\n", " 'https://habr.com/ru/articles/893992/',\n", " 'https://habr.com/ru/articles/897200/',\n", " 'https://habr.com/ru/articles/897202/',\n", " 'https://habr.com/ru/articles/897160/',\n", " 'https://habr.com/ru/articles/896410/',\n", " 'https://habr.com/ru/articles/897122/',\n", " 'https://habr.com/ru/articles/896362/',\n", " 'https://habr.com/ru/articles/897180/',\n", " 'https://habr.com/ru/articles/896870/',\n", " 'https://habr.com/ru/articles/897218/',\n", " 'https://habr.com/ru/articles/897212/',\n", " 'https://habr.com/ru/articles/896190/',\n", " 'https://habr.com/ru/articles/895998/',\n", " 'https://habr.com/ru/articles/897206/',\n", " 'https://habr.com/ru/articles/896894/',\n", " 'https://habr.com/ru/articles/897222/',\n", " 'https://habr.com/ru/articles/895672/',\n", " 'https://habr.com/ru/articles/897198/',\n", " 'https://habr.com/ru/articles/897068/',\n", " 'https://habr.com/ru/articles/897144/',\n", " 'https://habr.com/ru/articles/897142/',\n", " 'https://habr.com/ru/articles/896458/',\n", " 'https://habr.com/ru/articles/897174/',\n", " 'https://habr.com/ru/articles/897120/',\n", " 'https://habr.com/ru/articles/896872/',\n", " 'https://habr.com/ru/articles/897116/',\n", " 'https://habr.com/ru/articles/887794/',\n", " 'https://habr.com/ru/articles/897132/',\n", " 'https://habr.com/ru/articles/897170/',\n", " 'https://habr.com/ru/articles/897148/',\n", " 'https://habr.com/ru/articles/896182/',\n", " 'https://habr.com/ru/articles/897158/',\n", " 'https://habr.com/ru/articles/897164/',\n", " 'https://habr.com/ru/articles/896772/',\n", " 'https://habr.com/ru/articles/897118/',\n", " 'https://habr.com/ru/articles/897124/',\n", " 'https://habr.com/ru/articles/897146/',\n", " 'https://habr.com/ru/articles/897136/',\n", " 'https://habr.com/ru/articles/897126/',\n", " 'https://habr.com/ru/articles/897076/',\n", " 'https://habr.com/ru/articles/858040/',\n", " 'https://habr.com/ru/articles/897080/',\n", " 'https://habr.com/ru/articles/883770/',\n", " 'https://habr.com/ru/articles/897022/',\n", " 'https://habr.com/ru/articles/886814/',\n", " 'https://habr.com/ru/articles/896056/',\n", " 'https://habr.com/ru/articles/896498/',\n", " 'https://habr.com/ru/articles/897082/',\n", " 'https://habr.com/ru/articles/896078/',\n", " 'https://habr.com/ru/articles/896586/',\n", " 'https://habr.com/ru/articles/895032/',\n", " 'https://habr.com/ru/articles/897064/',\n", " 'https://habr.com/ru/articles/896714/',\n", " 'https://habr.com/ru/articles/896802/',\n", " 'https://habr.com/ru/articles/896800/',\n", " 'https://habr.com/ru/articles/895796/',\n", " 'https://habr.com/ru/articles/867696/',\n", " 'https://habr.com/ru/articles/896814/',\n", " 'https://habr.com/ru/articles/897072/',\n", " 'https://habr.com/ru/articles/896560/',\n", " 'https://habr.com/ru/articles/896986/',\n", " 'https://habr.com/ru/articles/895538/',\n", " 'https://habr.com/ru/articles/897056/',\n", " 'https://habr.com/ru/articles/897014/',\n", " 'https://habr.com/ru/articles/896966/',\n", " 'https://habr.com/ru/articles/896954/',\n", " 'https://habr.com/ru/articles/897020/',\n", " 'https://habr.com/ru/articles/896956/',\n", " 'https://habr.com/ru/articles/896846/',\n", " 'https://habr.com/ru/articles/896622/',\n", " 'https://habr.com/ru/articles/896998/',\n", " 'https://habr.com/ru/articles/897028/',\n", " 'https://habr.com/ru/articles/896906/',\n", " 'https://habr.com/ru/articles/896964/',\n", " 'https://habr.com/ru/articles/896978/',\n", " 'https://habr.com/ru/articles/897008/',\n", " 'https://habr.com/ru/articles/886022/',\n", " 'https://habr.com/ru/articles/897048/',\n", " 'https://habr.com/ru/articles/896968/',\n", " 'https://habr.com/ru/articles/896880/',\n", " 'https://habr.com/ru/articles/896896/',\n", " 'https://habr.com/ru/articles/896940/',\n", " 'https://habr.com/ru/articles/896916/',\n", " 'https://habr.com/ru/articles/896734/',\n", " 'https://habr.com/ru/articles/896392/',\n", " 'https://habr.com/ru/articles/894096/',\n", " 'https://habr.com/ru/articles/896888/',\n", " 'https://habr.com/ru/articles/893182/',\n", " 'https://habr.com/ru/articles/896792/',\n", " 'https://habr.com/ru/articles/896912/',\n", " 'https://habr.com/ru/articles/896902/',\n", " 'https://habr.com/ru/articles/895328/',\n", " 'https://habr.com/ru/articles/895104/',\n", " 'https://habr.com/ru/articles/896898/',\n", " 'https://habr.com/ru/articles/896818/',\n", " 'https://habr.com/ru/articles/896918/',\n", " 'https://habr.com/ru/articles/896704/',\n", " 'https://habr.com/ru/articles/896922/',\n", " 'https://habr.com/ru/articles/896748/',\n", " 'https://habr.com/ru/articles/896856/',\n", " 'https://habr.com/ru/articles/896860/',\n", " 'https://habr.com/ru/articles/896570/',\n", " 'https://habr.com/ru/articles/896864/',\n", " 'https://habr.com/ru/articles/896824/',\n", " 'https://habr.com/ru/articles/896836/',\n", " 'https://habr.com/ru/articles/896862/',\n", " 'https://habr.com/ru/articles/896692/',\n", " 'https://habr.com/ru/articles/896868/',\n", " 'https://habr.com/ru/articles/895360/',\n", " 'https://habr.com/ru/articles/896844/',\n", " 'https://habr.com/ru/articles/895766/',\n", " 'https://habr.com/ru/articles/892770/',\n", " 'https://habr.com/ru/articles/896060/',\n", " 'https://habr.com/ru/articles/896742/',\n", " 'https://habr.com/ru/articles/896826/',\n", " 'https://habr.com/ru/articles/896816/',\n", " 'https://habr.com/ru/articles/896866/',\n", " 'https://habr.com/ru/articles/895084/',\n", " 'https://habr.com/ru/articles/896820/',\n", " 'https://habr.com/ru/articles/896776/',\n", " 'https://habr.com/ru/articles/896652/',\n", " 'https://habr.com/ru/articles/896784/',\n", " 'https://habr.com/ru/articles/889420/',\n", " 'https://habr.com/ru/articles/896768/',\n", " 'https://habr.com/ru/articles/896756/',\n", " 'https://habr.com/ru/articles/896738/',\n", " 'https://habr.com/ru/articles/896790/',\n", " 'https://habr.com/ru/articles/896134/',\n", " 'https://habr.com/ru/articles/874486/',\n", " 'https://habr.com/ru/articles/896750/',\n", " 'https://habr.com/ru/articles/896010/',\n", " 'https://habr.com/ru/articles/896554/',\n", " 'https://habr.com/ru/articles/896758/',\n", " 'https://habr.com/ru/articles/895286/',\n", " 'https://habr.com/ru/articles/896794/',\n", " 'https://habr.com/ru/articles/896636/',\n", " 'https://habr.com/ru/articles/896762/',\n", " 'https://habr.com/ru/articles/896766/',\n", " 'https://habr.com/ru/articles/896082/',\n", " 'https://habr.com/ru/articles/896368/',\n", " 'https://habr.com/ru/articles/896730/',\n", " 'https://habr.com/ru/articles/896092/',\n", " 'https://habr.com/ru/articles/896684/',\n", " 'https://habr.com/ru/articles/894746/',\n", " 'https://habr.com/ru/articles/896738/',\n", " 'https://habr.com/ru/articles/896732/',\n", " 'https://habr.com/ru/articles/896680/',\n", " 'https://habr.com/ru/articles/894294/',\n", " 'https://habr.com/ru/articles/896624/',\n", " 'https://habr.com/ru/articles/895340/',\n", " 'https://habr.com/ru/articles/896706/',\n", " 'https://habr.com/ru/articles/896014/',\n", " 'https://habr.com/ru/articles/896728/',\n", " 'https://habr.com/ru/articles/896686/',\n", " 'https://habr.com/ru/articles/896536/',\n", " 'https://habr.com/ru/articles/896736/',\n", " 'https://habr.com/ru/articles/895682/',\n", " 'https://habr.com/ru/articles/896726/',\n", " 'https://habr.com/ru/articles/896710/',\n", " 'https://habr.com/ru/articles/896662/',\n", " 'https://habr.com/ru/articles/896332/',\n", " 'https://habr.com/ru/articles/896668/',\n", " 'https://habr.com/ru/articles/896644/',\n", " 'https://habr.com/ru/articles/896638/',\n", " 'https://habr.com/ru/articles/896670/',\n", " 'https://habr.com/ru/articles/896632/',\n", " 'https://habr.com/ru/articles/896620/',\n", " 'https://habr.com/ru/articles/896630/',\n", " 'https://habr.com/ru/articles/896598/',\n", " 'https://habr.com/ru/articles/896178/',\n", " 'https://habr.com/ru/articles/896650/',\n", " 'https://habr.com/ru/articles/896660/',\n", " 'https://habr.com/ru/articles/896658/',\n", " 'https://habr.com/ru/articles/896664/',\n", " 'https://habr.com/ru/articles/896606/',\n", " 'https://habr.com/ru/articles/896604/',\n", " 'https://habr.com/ru/articles/892784/',\n", " 'https://habr.com/ru/articles/150091/',\n", " 'https://habr.com/ru/articles/896654/',\n", " 'https://habr.com/ru/articles/896500/',\n", " 'https://habr.com/ru/articles/896580/',\n", " 'https://habr.com/ru/articles/896446/',\n", " 'https://habr.com/ru/articles/896594/',\n", " 'https://habr.com/ru/articles/896496/',\n", " 'https://habr.com/ru/articles/896502/',\n", " 'https://habr.com/ru/articles/893244/',\n", " 'https://habr.com/ru/articles/896490/',\n", " 'https://habr.com/ru/articles/896390/',\n", " 'https://habr.com/ru/articles/893178/',\n", " 'https://habr.com/ru/articles/866154/',\n", " 'https://habr.com/ru/articles/895090/',\n", " 'https://habr.com/ru/articles/896320/',\n", " 'https://habr.com/ru/articles/896514/',\n", " 'https://habr.com/ru/articles/896582/',\n", " 'https://habr.com/ru/articles/896550/',\n", " 'https://habr.com/ru/articles/896492/',\n", " 'https://habr.com/ru/articles/896504/',\n", " 'https://habr.com/ru/articles/896548/',\n", " 'https://habr.com/ru/articles/896568/',\n", " 'https://habr.com/ru/articles/896470/',\n", " 'https://habr.com/ru/articles/896348/',\n", " 'https://habr.com/ru/articles/895986/',\n", " 'https://habr.com/ru/articles/896438/',\n", " 'https://habr.com/ru/articles/896416/',\n", " 'https://habr.com/ru/articles/896456/',\n", " 'https://habr.com/ru/articles/896244/',\n", " 'https://habr.com/ru/articles/896454/',\n", " 'https://habr.com/ru/articles/896296/',\n", " 'https://habr.com/ru/articles/895316/',\n", " 'https://habr.com/ru/articles/896412/',\n", " 'https://habr.com/ru/articles/896468/',\n", " 'https://habr.com/ru/articles/892836/',\n", " 'https://habr.com/ru/articles/896448/',\n", " 'https://habr.com/ru/articles/896442/',\n", " 'https://habr.com/ru/articles/896424/',\n", " 'https://habr.com/ru/articles/896486/',\n", " 'https://habr.com/ru/articles/896484/',\n", " 'https://habr.com/ru/articles/896478/',\n", " 'https://habr.com/ru/articles/896436/',\n", " 'https://habr.com/ru/articles/896274/',\n", " 'https://habr.com/ru/articles/719750/',\n", " 'https://habr.com/ru/articles/896350/',\n", " 'https://habr.com/ru/articles/896342/',\n", " 'https://habr.com/ru/articles/896304/',\n", " 'https://habr.com/ru/articles/896388/',\n", " 'https://habr.com/ru/articles/891018/',\n", " 'https://habr.com/ru/articles/896358/',\n", " 'https://habr.com/ru/articles/892112/',\n", " 'https://habr.com/ru/articles/895584/',\n", " 'https://habr.com/ru/articles/885068/',\n", " 'https://habr.com/ru/articles/896344/',\n", " 'https://habr.com/ru/articles/896090/',\n", " 'https://habr.com/ru/articles/896398/',\n", " 'https://habr.com/ru/articles/895974/',\n", " 'https://habr.com/ru/articles/896414/',\n", " 'https://habr.com/ru/articles/896354/',\n", " 'https://habr.com/ru/articles/890000/',\n", " 'https://habr.com/ru/articles/896212/',\n", " 'https://habr.com/ru/articles/895992/',\n", " 'https://habr.com/ru/articles/892176/',\n", " 'https://habr.com/ru/articles/894954/',\n", " 'https://habr.com/ru/articles/894898/',\n", " 'https://habr.com/ru/articles/894992/',\n", " 'https://habr.com/ru/articles/895424/',\n", " 'https://habr.com/ru/articles/895452/',\n", " 'https://habr.com/ru/articles/896314/',\n", " 'https://habr.com/ru/articles/895804/',\n", " 'https://habr.com/ru/articles/896326/',\n", " 'https://habr.com/ru/articles/895848/',\n", " 'https://habr.com/ru/articles/896200/',\n", " 'https://habr.com/ru/articles/896118/',\n", " 'https://habr.com/ru/articles/896328/',\n", " 'https://habr.com/ru/articles/895250/',\n", " 'https://habr.com/ru/articles/896176/',\n", " 'https://habr.com/ru/articles/896116/',\n", " 'https://habr.com/ru/articles/896270/',\n", " 'https://habr.com/ru/articles/896308/',\n", " 'https://habr.com/ru/articles/895984/',\n", " 'https://habr.com/ru/articles/896312/',\n", " 'https://habr.com/ru/articles/894942/',\n", " 'https://habr.com/ru/articles/892444/',\n", " 'https://habr.com/ru/articles/896272/',\n", " 'https://habr.com/ru/articles/893084/',\n", " 'https://habr.com/ru/articles/895882/',\n", " 'https://habr.com/ru/articles/895664/',\n", " 'https://habr.com/ru/articles/896266/',\n", " 'https://habr.com/ru/articles/896122/',\n", " 'https://habr.com/ru/articles/896298/',\n", " 'https://habr.com/ru/articles/895982/',\n", " 'https://habr.com/ru/articles/896288/',\n", " 'https://habr.com/ru/articles/896306/',\n", " 'https://habr.com/ru/articles/896294/',\n", " 'https://habr.com/ru/articles/894928/',\n", " 'https://habr.com/ru/articles/896302/',\n", " 'https://habr.com/ru/articles/895206/',\n", " 'https://habr.com/ru/articles/896310/',\n", " 'https://habr.com/ru/articles/896152/',\n", " 'https://habr.com/ru/articles/896240/',\n", " 'https://habr.com/ru/articles/896276/',\n", " 'https://habr.com/ru/articles/896238/',\n", " 'https://habr.com/ru/articles/896166/',\n", " 'https://habr.com/ru/articles/896234/',\n", " 'https://habr.com/ru/articles/896236/',\n", " 'https://habr.com/ru/articles/896112/',\n", " 'https://habr.com/ru/articles/896066/',\n", " 'https://habr.com/ru/articles/896222/',\n", " 'https://habr.com/ru/articles/890538/',\n", " 'https://habr.com/ru/articles/895972/',\n", " 'https://habr.com/ru/articles/896072/',\n", " 'https://habr.com/ru/articles/896028/',\n", " 'https://habr.com/ru/articles/896110/',\n", " 'https://habr.com/ru/articles/896220/',\n", " 'https://habr.com/ru/articles/896204/',\n", " 'https://habr.com/ru/articles/896162/',\n", " 'https://habr.com/ru/articles/896208/',\n", " 'https://habr.com/ru/articles/896224/',\n", " 'https://habr.com/ru/articles/895956/',\n", " 'https://habr.com/ru/articles/896002/',\n", " 'https://habr.com/ru/articles/896160/',\n", " 'https://habr.com/ru/articles/896124/',\n", " 'https://habr.com/ru/articles/895338/',\n", " 'https://habr.com/ru/articles/895732/',\n", " 'https://habr.com/ru/articles/896094/',\n", " 'https://habr.com/ru/articles/896096/',\n", " 'https://habr.com/ru/articles/896120/',\n", " 'https://habr.com/ru/articles/896088/',\n", " 'https://habr.com/ru/articles/895760/',\n", " 'https://habr.com/ru/articles/896102/',\n", " 'https://habr.com/ru/articles/896084/',\n", " 'https://habr.com/ru/articles/896126/',\n", " 'https://habr.com/ru/articles/893432/',\n", " 'https://habr.com/ru/articles/896046/',\n", " 'https://habr.com/ru/articles/896140/',\n", " 'https://habr.com/ru/articles/896130/',\n", " 'https://habr.com/ru/articles/896074/',\n", " 'https://habr.com/ru/articles/896086/',\n", " 'https://habr.com/ru/articles/896064/',\n", " 'https://habr.com/ru/articles/896108/',\n", " 'https://habr.com/ru/articles/896070/',\n", " 'https://habr.com/ru/articles/896030/',\n", " 'https://habr.com/ru/articles/896022/',\n", " 'https://habr.com/ru/articles/896012/',\n", " 'https://habr.com/ru/articles/893050/',\n", " 'https://habr.com/ru/articles/895994/',\n", " 'https://habr.com/ru/articles/896054/',\n", " 'https://habr.com/ru/articles/895390/',\n", " 'https://habr.com/ru/articles/895676/',\n", " 'https://habr.com/ru/articles/895180/',\n", " 'https://habr.com/ru/articles/895810/',\n", " 'https://habr.com/ru/articles/895860/',\n", " 'https://habr.com/ru/articles/895980/',\n", " 'https://habr.com/ru/articles/895954/',\n", " 'https://habr.com/ru/articles/887726/',\n", " 'https://habr.com/ru/articles/896044/',\n", " 'https://habr.com/ru/articles/896020/',\n", " 'https://habr.com/ru/articles/883954/',\n", " 'https://habr.com/ru/articles/896024/',\n", " 'https://habr.com/ru/articles/896006/',\n", " 'https://habr.com/ru/articles/895068/',\n", " 'https://habr.com/ru/articles/895946/',\n", " 'https://habr.com/ru/articles/894688/',\n", " 'https://habr.com/ru/articles/895930/',\n", " 'https://habr.com/ru/articles/895950/',\n", " 'https://habr.com/ru/articles/895774/',\n", " 'https://habr.com/ru/articles/895942/',\n", " 'https://habr.com/ru/articles/895306/',\n", " 'https://habr.com/ru/articles/895928/',\n", " 'https://habr.com/ru/articles/895966/',\n", " 'https://habr.com/ru/articles/895902/',\n", " 'https://habr.com/ru/articles/895892/',\n", " 'https://habr.com/ru/articles/893430/',\n", " 'https://habr.com/ru/articles/895968/',\n", " 'https://habr.com/ru/articles/895960/',\n", " 'https://habr.com/ru/articles/892718/',\n", " 'https://habr.com/ru/articles/895024/',\n", " 'https://habr.com/ru/articles/895978/',\n", " 'https://habr.com/ru/articles/895926/',\n", " 'https://habr.com/ru/articles/895688/',\n", " 'https://habr.com/ru/articles/895938/',\n", " 'https://habr.com/ru/articles/895866/',\n", " 'https://habr.com/ru/articles/895900/',\n", " 'https://habr.com/ru/articles/892396/',\n", " 'https://habr.com/ru/articles/895858/',\n", " 'https://habr.com/ru/articles/895876/',\n", " 'https://habr.com/ru/articles/895854/',\n", " 'https://habr.com/ru/articles/895840/',\n", " 'https://habr.com/ru/articles/895864/',\n", " 'https://habr.com/ru/articles/895914/',\n", " 'https://habr.com/ru/articles/895868/',\n", " 'https://habr.com/ru/articles/895856/',\n", " 'https://habr.com/ru/articles/895580/',\n", " 'https://habr.com/ru/articles/895622/',\n", " 'https://habr.com/ru/articles/895922/',\n", " 'https://habr.com/ru/articles/895852/',\n", " 'https://habr.com/ru/articles/895906/',\n", " 'https://habr.com/ru/articles/895636/',\n", " 'https://habr.com/ru/articles/895428/',\n", " 'https://habr.com/ru/articles/895850/',\n", " 'https://habr.com/ru/articles/895822/',\n", " 'https://habr.com/ru/articles/895792/',\n", " 'https://habr.com/ru/articles/895790/',\n", " 'https://habr.com/ru/articles/895640/',\n", " 'https://habr.com/ru/articles/895784/',\n", " 'https://habr.com/ru/articles/894472/',\n", " 'https://habr.com/ru/articles/895782/',\n", " 'https://habr.com/ru/articles/895802/',\n", " 'https://habr.com/ru/articles/895826/',\n", " 'https://habr.com/ru/articles/895598/',\n", " 'https://habr.com/ru/articles/895832/',\n", " 'https://habr.com/ru/articles/895824/',\n", " 'https://habr.com/ru/articles/895798/',\n", " 'https://habr.com/ru/articles/895778/',\n", " 'https://habr.com/ru/articles/895800/',\n", " 'https://habr.com/ru/articles/895830/',\n", " 'https://habr.com/ru/articles/895818/',\n", " 'https://habr.com/ru/articles/895806/',\n", " 'https://habr.com/ru/articles/895780/',\n", " 'https://habr.com/ru/articles/895768/',\n", " 'https://habr.com/ru/articles/895692/',\n", " 'https://habr.com/ru/articles/894500/',\n", " 'https://habr.com/ru/articles/895638/',\n", " 'https://habr.com/ru/articles/895742/',\n", " 'https://habr.com/ru/articles/895696/',\n", " 'https://habr.com/ru/articles/895764/',\n", " 'https://habr.com/ru/articles/895118/',\n", " 'https://habr.com/ru/articles/895674/',\n", " 'https://habr.com/ru/articles/895756/',\n", " 'https://habr.com/ru/articles/895718/',\n", " 'https://habr.com/ru/articles/895722/',\n", " 'https://habr.com/ru/articles/895138/',\n", " 'https://habr.com/ru/articles/892786/',\n", " 'https://habr.com/ru/articles/895654/',\n", " 'https://habr.com/ru/articles/895678/',\n", " 'https://habr.com/ru/articles/895754/',\n", " 'https://habr.com/ru/articles/893720/',\n", " 'https://habr.com/ru/articles/895684/',\n", " 'https://habr.com/ru/articles/895750/',\n", " 'https://habr.com/ru/articles/895694/',\n", " 'https://habr.com/ru/articles/895220/',\n", " 'https://habr.com/ru/articles/895560/',\n", " 'https://habr.com/ru/articles/895606/',\n", " 'https://habr.com/ru/articles/895568/',\n", " 'https://habr.com/ru/articles/895634/',\n", " 'https://habr.com/ru/articles/895540/',\n", " 'https://habr.com/ru/articles/895658/',\n", " 'https://habr.com/ru/articles/895432/',\n", " 'https://habr.com/ru/articles/895550/',\n", " 'https://habr.com/ru/articles/895582/',\n", " 'https://habr.com/ru/articles/894742/',\n", " 'https://habr.com/ru/articles/895668/',\n", " 'https://habr.com/ru/articles/895590/',\n", " 'https://habr.com/ru/articles/895556/',\n", " 'https://habr.com/ru/articles/895632/',\n", " 'https://habr.com/ru/articles/895662/',\n", " 'https://habr.com/ru/articles/895608/',\n", " 'https://habr.com/ru/articles/895596/',\n", " 'https://habr.com/ru/articles/895588/',\n", " 'https://habr.com/ru/articles/895666/',\n", " 'https://habr.com/ru/articles/895610/',\n", " 'https://habr.com/ru/articles/895532/',\n", " 'https://habr.com/ru/articles/895524/',\n", " 'https://habr.com/ru/articles/895498/',\n", " 'https://habr.com/ru/articles/895534/',\n", " 'https://habr.com/ru/articles/895508/',\n", " 'https://habr.com/ru/articles/895542/',\n", " 'https://habr.com/ru/articles/895416/',\n", " 'https://habr.com/ru/articles/895536/',\n", " 'https://habr.com/ru/articles/895346/',\n", " 'https://habr.com/ru/articles/895496/',\n", " 'https://habr.com/ru/articles/881918/',\n", " 'https://habr.com/ru/articles/895512/',\n", " 'https://habr.com/ru/articles/895282/',\n", " 'https://habr.com/ru/articles/895458/',\n", " 'https://habr.com/ru/articles/895410/',\n", " 'https://habr.com/ru/articles/895544/',\n", " 'https://habr.com/ru/articles/894604/',\n", " 'https://habr.com/ru/articles/895530/',\n", " 'https://habr.com/ru/articles/895464/',\n", " 'https://habr.com/ru/articles/895454/',\n", " 'https://habr.com/ru/articles/895448/',\n", " 'https://habr.com/ru/articles/895404/',\n", " 'https://habr.com/ru/articles/895488/',\n", " 'https://habr.com/ru/articles/895436/',\n", " 'https://habr.com/ru/articles/895462/',\n", " 'https://habr.com/ru/articles/893168/',\n", " 'https://habr.com/ru/articles/895450/',\n", " 'https://habr.com/ru/articles/895494/',\n", " 'https://habr.com/ru/articles/895490/',\n", " 'https://habr.com/ru/articles/895444/',\n", " 'https://habr.com/ru/articles/895440/',\n", " 'https://habr.com/ru/articles/894754/',\n", " 'https://habr.com/ru/articles/895446/',\n", " 'https://habr.com/ru/articles/894850/',\n", " 'https://habr.com/ru/articles/895408/',\n", " 'https://habr.com/ru/articles/895478/',\n", " 'https://habr.com/ru/articles/895482/',\n", " 'https://habr.com/ru/articles/895486/',\n", " 'https://habr.com/ru/articles/895426/',\n", " 'https://habr.com/ru/articles/893722/',\n", " 'https://habr.com/ru/articles/895148/',\n", " 'https://habr.com/ru/articles/895362/',\n", " 'https://habr.com/ru/articles/895332/',\n", " 'https://habr.com/ru/articles/895252/',\n", " 'https://habr.com/ru/articles/895344/',\n", " 'https://habr.com/ru/articles/895376/',\n", " 'https://habr.com/ru/articles/891416/',\n", " 'https://habr.com/ru/articles/895396/',\n", " 'https://habr.com/ru/articles/895174/',\n", " 'https://habr.com/ru/articles/895402/',\n", " 'https://habr.com/ru/articles/894652/',\n", " 'https://habr.com/ru/articles/895380/',\n", " 'https://habr.com/ru/articles/894134/',\n", " 'https://habr.com/ru/articles/895330/',\n", " 'https://habr.com/ru/articles/895382/',\n", " 'https://habr.com/ru/articles/895348/',\n", " 'https://habr.com/ru/articles/895368/',\n", " 'https://habr.com/ru/articles/895366/',\n", " 'https://habr.com/ru/articles/895272/',\n", " 'https://habr.com/ru/articles/895342/',\n", " 'https://habr.com/ru/articles/895276/',\n", " 'https://habr.com/ru/articles/895274/',\n", " 'https://habr.com/ru/articles/895132/',\n", " 'https://habr.com/ru/articles/895292/',\n", " 'https://habr.com/ru/articles/895312/',\n", " 'https://habr.com/ru/articles/892726/',\n", " 'https://habr.com/ru/articles/895296/',\n", " 'https://habr.com/ru/articles/895300/',\n", " 'https://habr.com/ru/articles/895294/',\n", " 'https://habr.com/ru/articles/895298/',\n", " 'https://habr.com/ru/articles/895302/',\n", " 'https://habr.com/ru/articles/888222/',\n", " 'https://habr.com/ru/articles/895230/',\n", " 'https://habr.com/ru/articles/894364/',\n", " 'https://habr.com/ru/articles/894854/',\n", " 'https://habr.com/ru/articles/889404/',\n", " 'https://habr.com/ru/articles/895214/',\n", " 'https://habr.com/ru/articles/895278/',\n", " 'https://habr.com/ru/articles/895268/',\n", " 'https://habr.com/ru/articles/895314/',\n", " 'https://habr.com/ru/articles/876900/',\n", " 'https://habr.com/ru/articles/895242/',\n", " 'https://habr.com/ru/articles/895002/',\n", " 'https://habr.com/ru/articles/895022/',\n", " 'https://habr.com/ru/articles/895212/',\n", " 'https://habr.com/ru/articles/895222/',\n", " 'https://habr.com/ru/articles/895012/',\n", " 'https://habr.com/ru/articles/894484/',\n", " 'https://habr.com/ru/articles/895262/',\n", " 'https://habr.com/ru/articles/895224/',\n", " 'https://habr.com/ru/articles/895226/',\n", " 'https://habr.com/ru/articles/895238/',\n", " 'https://habr.com/ru/articles/894808/',\n", " 'https://habr.com/ru/articles/895236/',\n", " 'https://habr.com/ru/articles/895208/',\n", " 'https://habr.com/ru/articles/895264/',\n", " 'https://habr.com/ru/articles/895108/',\n", " 'https://habr.com/ru/articles/895256/',\n", " 'https://habr.com/ru/articles/894360/',\n", " 'https://habr.com/ru/articles/895218/',\n", " 'https://habr.com/ru/articles/894676/',\n", " 'https://habr.com/ru/articles/885602/',\n", " 'https://habr.com/ru/articles/894936/',\n", " 'https://habr.com/ru/articles/895186/',\n", " 'https://habr.com/ru/articles/894862/',\n", " 'https://habr.com/ru/articles/895176/',\n", " 'https://habr.com/ru/articles/895162/',\n", " 'https://habr.com/ru/articles/894950/',\n", " 'https://habr.com/ru/articles/895034/',\n", " 'https://habr.com/ru/articles/895184/',\n", " 'https://habr.com/ru/articles/895202/',\n", " 'https://habr.com/ru/articles/894678/',\n", " 'https://habr.com/ru/articles/895196/',\n", " 'https://habr.com/ru/articles/895154/',\n", " 'https://habr.com/ru/articles/894614/',\n", " 'https://habr.com/ru/articles/895058/',\n", " 'https://habr.com/ru/articles/895172/',\n", " 'https://habr.com/ru/articles/895164/',\n", " 'https://habr.com/ru/articles/894906/',\n", " 'https://habr.com/ru/articles/895158/',\n", " 'https://habr.com/ru/articles/892920/',\n", " 'https://habr.com/ru/articles/895046/',\n", " 'https://habr.com/ru/articles/895066/',\n", " 'https://habr.com/ru/articles/895092/',\n", " 'https://habr.com/ru/articles/895080/',\n", " 'https://habr.com/ru/articles/895076/',\n", " 'https://habr.com/ru/articles/894514/',\n", " 'https://habr.com/ru/articles/895114/',\n", " 'https://habr.com/ru/articles/894790/',\n", " 'https://habr.com/ru/articles/895060/',\n", " 'https://habr.com/ru/articles/895056/',\n", " 'https://habr.com/ru/articles/894834/',\n", " 'https://habr.com/ru/articles/895048/',\n", " 'https://habr.com/ru/articles/895150/',\n", " 'https://habr.com/ru/articles/895016/',\n", " 'https://habr.com/ru/articles/895146/',\n", " 'https://habr.com/ru/articles/895102/',\n", " 'https://habr.com/ru/articles/895096/',\n", " 'https://habr.com/ru/articles/895098/',\n", " 'https://habr.com/ru/articles/895082/',\n", " 'https://habr.com/ru/articles/894996/',\n", " 'https://habr.com/ru/articles/894984/',\n", " 'https://habr.com/ru/articles/894986/',\n", " 'https://habr.com/ru/articles/894998/',\n", " 'https://habr.com/ru/articles/895018/',\n", " 'https://habr.com/ru/articles/894968/',\n", " 'https://habr.com/ru/articles/894978/',\n", " 'https://habr.com/ru/articles/894980/',\n", " 'https://habr.com/ru/articles/894792/',\n", " 'https://habr.com/ru/articles/894956/',\n", " 'https://habr.com/ru/articles/894220/',\n", " 'https://habr.com/ru/articles/894952/',\n", " 'https://habr.com/ru/articles/895006/',\n", " 'https://habr.com/ru/articles/894418/',\n", " 'https://habr.com/ru/articles/890278/',\n", " 'https://habr.com/ru/articles/895008/',\n", " 'https://habr.com/ru/articles/894924/',\n", " 'https://habr.com/ru/articles/894830/',\n", " 'https://habr.com/ru/articles/892852/',\n", " 'https://habr.com/ru/articles/893484/',\n", " 'https://habr.com/ru/articles/894902/',\n", " 'https://habr.com/ru/articles/894914/',\n", " 'https://habr.com/ru/articles/894162/',\n", " 'https://habr.com/ru/articles/893970/',\n", " 'https://habr.com/ru/articles/894966/',\n", " 'https://habr.com/ru/articles/893856/',\n", " 'https://habr.com/ru/articles/894118/',\n", " 'https://habr.com/ru/articles/894940/',\n", " 'https://habr.com/ru/articles/894684/',\n", " 'https://habr.com/ru/articles/894916/',\n", " 'https://habr.com/ru/articles/894642/',\n", " 'https://habr.com/ru/articles/894780/',\n", " 'https://habr.com/ru/articles/893644/',\n", " 'https://habr.com/ru/articles/894768/',\n", " 'https://habr.com/ru/articles/894922/',\n", " 'https://habr.com/ru/articles/894910/',\n", " 'https://habr.com/ru/articles/892234/',\n", " 'https://habr.com/ru/articles/894884/',\n", " 'https://habr.com/ru/articles/894930/',\n", " 'https://habr.com/ru/articles/894892/',\n", " 'https://habr.com/ru/articles/893990/',\n", " 'https://habr.com/ru/articles/894552/',\n", " 'https://habr.com/ru/articles/893164/',\n", " 'https://habr.com/ru/articles/894760/',\n", " 'https://habr.com/ru/articles/894800/',\n", " 'https://habr.com/ru/articles/892158/',\n", " 'https://habr.com/ru/articles/893634/',\n", " 'https://habr.com/ru/articles/893800/',\n", " 'https://habr.com/ru/articles/894876/',\n", " 'https://habr.com/ru/articles/894242/',\n", " 'https://habr.com/ru/articles/894560/',\n", " 'https://habr.com/ru/articles/889734/',\n", " 'https://habr.com/ru/articles/894838/',\n", " 'https://habr.com/ru/articles/894872/',\n", " 'https://habr.com/ru/articles/894866/',\n", " 'https://habr.com/ru/articles/894478/',\n", " 'https://habr.com/ru/articles/892502/',\n", " 'https://habr.com/ru/articles/880606/',\n", " 'https://habr.com/ru/articles/894400/',\n", " 'https://habr.com/ru/articles/894568/',\n", " 'https://habr.com/ru/articles/894654/',\n", " 'https://habr.com/ru/articles/894804/',\n", " 'https://habr.com/ru/articles/894706/',\n", " 'https://habr.com/ru/articles/894748/',\n", " 'https://habr.com/ru/articles/894770/',\n", " 'https://habr.com/ru/articles/894816/',\n", " 'https://habr.com/ru/articles/894774/',\n", " 'https://habr.com/ru/articles/894818/',\n", " 'https://habr.com/ru/articles/894736/',\n", " 'https://habr.com/ru/articles/894304/',\n", " 'https://habr.com/ru/articles/894756/',\n", " 'https://habr.com/ru/articles/894752/',\n", " 'https://habr.com/ru/articles/894738/',\n", " 'https://habr.com/ru/articles/894788/',\n", " 'https://habr.com/ru/articles/894802/',\n", " 'https://habr.com/ru/articles/894766/',\n", " 'https://habr.com/ru/articles/894740/',\n", " 'https://habr.com/ru/articles/894578/',\n", " 'https://habr.com/ru/articles/894786/',\n", " 'https://habr.com/ru/articles/894750/',\n", " 'https://habr.com/ru/articles/894724/',\n", " 'https://habr.com/ru/articles/894666/',\n", " 'https://habr.com/ru/articles/894690/',\n", " 'https://habr.com/ru/articles/894650/',\n", " 'https://habr.com/ru/articles/894722/',\n", " 'https://habr.com/ru/articles/894672/',\n", " 'https://habr.com/ru/articles/894658/',\n", " 'https://habr.com/ru/articles/894686/',\n", " 'https://habr.com/ru/articles/894698/',\n", " 'https://habr.com/ru/articles/894704/',\n", " 'https://habr.com/ru/articles/894174/',\n", " 'https://habr.com/ru/articles/894408/',\n", " 'https://habr.com/ru/articles/894720/',\n", " 'https://habr.com/ru/articles/894728/',\n", " 'https://habr.com/ru/articles/894664/',\n", " 'https://habr.com/ru/articles/894680/',\n", " 'https://habr.com/ru/articles/894670/',\n", " 'https://habr.com/ru/articles/894692/',\n", " 'https://habr.com/ru/articles/894702/',\n", " 'https://habr.com/ru/articles/894730/',\n", " 'https://habr.com/ru/articles/894656/',\n", " 'https://habr.com/ru/articles/894346/',\n", " 'https://habr.com/ru/articles/894606/',\n", " 'https://habr.com/ru/articles/894618/',\n", " 'https://habr.com/ru/articles/894306/',\n", " 'https://habr.com/ru/articles/894648/',\n", " 'https://habr.com/ru/articles/894588/',\n", " 'https://habr.com/ru/articles/893824/',\n", " 'https://habr.com/ru/articles/894580/',\n", " 'https://habr.com/ru/articles/894582/',\n", " 'https://habr.com/ru/articles/894646/',\n", " 'https://habr.com/ru/articles/891928/',\n", " 'https://habr.com/ru/articles/893964/',\n", " 'https://habr.com/ru/articles/894590/',\n", " 'https://habr.com/ru/articles/894628/',\n", " 'https://habr.com/ru/articles/894624/',\n", " 'https://habr.com/ru/articles/894546/',\n", " 'https://habr.com/ru/articles/894612/',\n", " 'https://habr.com/ru/articles/892882/',\n", " 'https://habr.com/ru/articles/894602/',\n", " 'https://habr.com/ru/articles/894556/',\n", " 'https://habr.com/ru/articles/894530/',\n", " 'https://habr.com/ru/articles/878282/',\n", " 'https://habr.com/ru/articles/894570/',\n", " 'https://habr.com/ru/articles/894508/',\n", " 'https://habr.com/ru/articles/894502/',\n", " 'https://habr.com/ru/articles/893852/',\n", " 'https://habr.com/ru/articles/894348/',\n", " 'https://habr.com/ru/articles/889822/',\n", " 'https://habr.com/ru/articles/894554/',\n", " 'https://habr.com/ru/articles/894528/',\n", " 'https://habr.com/ru/articles/894462/',\n", " 'https://habr.com/ru/articles/894558/',\n", " 'https://habr.com/ru/articles/894562/',\n", " 'https://habr.com/ru/articles/894320/',\n", " 'https://habr.com/ru/articles/894524/',\n", " 'https://habr.com/ru/articles/894510/',\n", " 'https://habr.com/ru/articles/894544/',\n", " 'https://habr.com/ru/articles/894572/',\n", " 'https://habr.com/ru/articles/891988/',\n", " 'https://habr.com/ru/articles/894480/',\n", " 'https://habr.com/ru/articles/894458/',\n", " 'https://habr.com/ru/articles/894446/',\n", " 'https://habr.com/ru/articles/894282/',\n", " 'https://habr.com/ru/articles/894260/',\n", " 'https://habr.com/ru/articles/894456/',\n", " 'https://habr.com/ru/articles/894426/',\n", " 'https://habr.com/ru/articles/894448/',\n", " 'https://habr.com/ru/articles/894486/',\n", " 'https://habr.com/ru/articles/894438/',\n", " 'https://habr.com/ru/articles/894442/',\n", " 'https://habr.com/ru/articles/894176/',\n", " 'https://habr.com/ru/articles/894430/',\n", " 'https://habr.com/ru/articles/894432/',\n", " 'https://habr.com/ru/articles/881424/',\n", " 'https://habr.com/ru/articles/893506/',\n", " 'https://habr.com/ru/articles/894314/',\n", " 'https://habr.com/ru/articles/894122/',\n", " 'https://habr.com/ru/articles/891702/',\n", " 'https://habr.com/ru/articles/894390/',\n", " 'https://habr.com/ru/articles/894420/',\n", " 'https://habr.com/ru/articles/894398/',\n", " 'https://habr.com/ru/articles/894370/',\n", " 'https://habr.com/ru/articles/894338/',\n", " 'https://habr.com/ru/articles/894368/',\n", " 'https://habr.com/ru/articles/894392/',\n", " 'https://habr.com/ru/articles/894236/',\n", " 'https://habr.com/ru/articles/894412/',\n", " 'https://habr.com/ru/articles/894382/',\n", " 'https://habr.com/ru/articles/893470/',\n", " 'https://habr.com/ru/articles/894366/',\n", " 'https://habr.com/ru/articles/893848/',\n", " 'https://habr.com/ru/articles/894402/',\n", " 'https://habr.com/ru/articles/894254/',\n", " 'https://habr.com/ru/articles/894334/',\n", " 'https://habr.com/ru/articles/894384/',\n", " 'https://habr.com/ru/articles/893966/',\n", " 'https://habr.com/ru/articles/894372/',\n", " 'https://habr.com/ru/articles/894356/',\n", " 'https://habr.com/ru/articles/894376/',\n", " 'https://habr.com/ru/articles/893130/',\n", " 'https://habr.com/ru/articles/894214/',\n", " 'https://habr.com/ru/articles/894180/',\n", " 'https://habr.com/ru/articles/892890/',\n", " 'https://habr.com/ru/articles/894286/',\n", " 'https://habr.com/ru/articles/894262/',\n", " 'https://habr.com/ru/articles/894280/',\n", " 'https://habr.com/ru/articles/894146/',\n", " 'https://habr.com/ru/articles/894140/',\n", " 'https://habr.com/ru/articles/894182/',\n", " 'https://habr.com/ru/articles/894284/',\n", " 'https://habr.com/ru/articles/894248/',\n", " 'https://habr.com/ru/articles/894350/',\n", " 'https://habr.com/ru/articles/894316/',\n", " 'https://habr.com/ru/articles/894258/',\n", " 'https://habr.com/ru/articles/893988/',\n", " 'https://habr.com/ru/articles/894266/',\n", " 'https://habr.com/ru/articles/892118/',\n", " 'https://habr.com/ru/articles/894292/',\n", " 'https://habr.com/ru/articles/894216/',\n", " 'https://habr.com/ru/articles/894170/',\n", " 'https://habr.com/ru/articles/894214/',\n", " 'https://habr.com/ru/articles/894228/',\n", " 'https://habr.com/ru/articles/893846/',\n", " 'https://habr.com/ru/articles/894222/',\n", " 'https://habr.com/ru/articles/894104/',\n", " 'https://habr.com/ru/articles/893416/',\n", " 'https://habr.com/ru/articles/894234/',\n", " 'https://habr.com/ru/articles/894136/',\n", " 'https://habr.com/ru/articles/893460/',\n", " 'https://habr.com/ru/articles/894062/',\n", " 'https://habr.com/ru/articles/894168/',\n", " 'https://habr.com/ru/articles/894154/',\n", " 'https://habr.com/ru/articles/894152/',\n", " 'https://habr.com/ru/articles/893402/',\n", " 'https://habr.com/ru/articles/894142/',\n", " 'https://habr.com/ru/articles/893916/',\n", " 'https://habr.com/ru/articles/894226/',\n", " 'https://habr.com/ru/articles/893956/',\n", " 'https://habr.com/ru/articles/884072/',\n", " 'https://habr.com/ru/articles/894090/',\n", " 'https://habr.com/ru/articles/892268/',\n", " 'https://habr.com/ru/articles/893838/',\n", " 'https://habr.com/ru/articles/894014/',\n", " 'https://habr.com/ru/articles/894114/',\n", " 'https://habr.com/ru/articles/894088/',\n", " 'https://habr.com/ru/articles/894074/',\n", " 'https://habr.com/ru/articles/893508/',\n", " 'https://habr.com/ru/articles/894082/',\n", " 'https://habr.com/ru/articles/885924/',\n", " 'https://habr.com/ru/articles/894110/',\n", " 'https://habr.com/ru/articles/893744/',\n", " 'https://habr.com/ru/articles/894138/',\n", " 'https://habr.com/ru/articles/894004/',\n", " 'https://habr.com/ru/articles/893948/',\n", " 'https://habr.com/ru/articles/894100/',\n", " 'https://habr.com/ru/articles/893900/',\n", " 'https://habr.com/ru/articles/894126/',\n", " 'https://habr.com/ru/articles/894086/',\n", " 'https://habr.com/ru/articles/891624/',\n", " 'https://habr.com/ru/articles/892348/',\n", " 'https://habr.com/ru/articles/894050/',\n", " 'https://habr.com/ru/articles/893936/',\n", " 'https://habr.com/ru/articles/894046/',\n", " 'https://habr.com/ru/articles/893012/',\n", " 'https://habr.com/ru/articles/892654/',\n", " 'https://habr.com/ru/articles/893802/',\n", " 'https://habr.com/ru/articles/893694/',\n", " 'https://habr.com/ru/articles/891690/',\n", " 'https://habr.com/ru/articles/892952/',\n", " 'https://habr.com/ru/articles/894056/',\n", " 'https://habr.com/ru/articles/894068/',\n", " 'https://habr.com/ru/articles/893914/',\n", " 'https://habr.com/ru/articles/894042/',\n", " 'https://habr.com/ru/articles/893572/',\n", " 'https://habr.com/ru/articles/894006/',\n", " 'https://habr.com/ru/articles/894028/',\n", " 'https://habr.com/ru/articles/893822/',\n", " 'https://habr.com/ru/articles/893612/',\n", " 'https://habr.com/ru/articles/894036/',\n", " 'https://habr.com/ru/articles/893994/',\n", " 'https://habr.com/ru/articles/893874/',\n", " 'https://habr.com/ru/articles/893250/',\n", " 'https://habr.com/ru/articles/894008/',\n", " 'https://habr.com/ru/articles/893006/',\n", " 'https://habr.com/ru/articles/892140/',\n", " 'https://habr.com/ru/articles/893958/',\n", " 'https://habr.com/ru/articles/893910/',\n", " 'https://habr.com/ru/articles/893652/',\n", " 'https://habr.com/ru/articles/893960/',\n", " 'https://habr.com/ru/articles/893536/',\n", " 'https://habr.com/ru/articles/893876/',\n", " 'https://habr.com/ru/articles/893922/',\n", " 'https://habr.com/ru/articles/893950/',\n", " 'https://habr.com/ru/articles/894026/',\n", " 'https://habr.com/ru/articles/893882/',\n", " 'https://habr.com/ru/articles/893968/',\n", " 'https://habr.com/ru/articles/887276/',\n", " 'https://habr.com/ru/articles/893628/',\n", " 'https://habr.com/ru/articles/894032/',\n", " 'https://habr.com/ru/articles/893866/',\n", " 'https://habr.com/ru/articles/893890/',\n", " 'https://habr.com/ru/articles/893854/',\n", " 'https://habr.com/ru/articles/893926/',\n", " 'https://habr.com/ru/articles/893896/',\n", " 'https://habr.com/ru/articles/893892/',\n", " 'https://habr.com/ru/articles/893862/',\n", " 'https://habr.com/ru/articles/893870/',\n", " 'https://habr.com/ru/articles/893894/',\n", " 'https://habr.com/ru/articles/893860/',\n", " 'https://habr.com/ru/articles/893804/',\n", " 'https://habr.com/ru/articles/893918/',\n", " 'https://habr.com/ru/articles/893908/',\n", " 'https://habr.com/ru/articles/893542/',\n", " 'https://habr.com/ru/articles/893616/',\n", " 'https://habr.com/ru/articles/893938/',\n", " 'https://habr.com/ru/articles/893942/',\n", " 'https://habr.com/ru/articles/893932/',\n", " 'https://habr.com/ru/articles/893920/',\n", " 'https://habr.com/ru/articles/893806/']" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import re\n", "\n", "habr_prefix = \"https://habr.com/ru/articles/\"\n", "article_prefix = \"https://habr.com/ru/\"\n", "\n", "articles_urls = []\n", "\n", "\n", "for page_suffix in [\"\"] + [f\"page{i}/\" for i in range(50)]:\n", " page_url = habr_prefix + page_suffix\n", " src = requests.get(page_url).text\n", " for article_suffix in set(re.findall('articles/\\d+/', src)):\n", " articles_urls.append(article_prefix + article_suffix)\n", "\n", "articles_urls" ] }, { "cell_type": "code", "execution_count": 53, "id": "cf76dc3b-ef5e-45b2-b8f5-20d7164a0444", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1000" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(articles_urls)" ] }, { "cell_type": "code", "execution_count": 54, "id": "cb4a4c34-46e9-4c58-b987-a0583fe479e8", "metadata": { "scrolled": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████████████████████| 1000/1000 [09:35<00:00, 1.74it/s]\n" ] } ], "source": [ "import time\n", "from tqdm import tqdm\n", "\n", "pagers = []\n", "\n", "for url in tqdm(articles_urls):\n", " pagers.append(get_pager(url))\n", " " ] }, { "cell_type": "code", "execution_count": 55, "id": "46ff7a3f-0ffd-4b30-86f8-7ae41dfd999f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1000" ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pagers = [x for x in pagers if x is not None]\n", "len(pagers)" ] }, { "cell_type": "code", "execution_count": 56, "id": "f96feb7a-e4ef-4376-8561-71b93b45e07d", "metadata": {}, "outputs": [], "source": [ "from nltk.corpus import stopwords\n", "from nltk.tokenize import word_tokenize\n", "stop_words = set(stopwords.words('russian'))\n", "\n", "stop_words\n", "\n", "for pager in pagers:\n", " pager.original_tags = list(set([x for x in pager.original_tags if x not in stop_words]))" ] }, { "cell_type": "code", "execution_count": 57, "id": "29f38543-e898-4d57-b4cd-54a1766208dc", "metadata": {}, "outputs": [], "source": [ "# summarizer = Summarizer(\"cuda\")\n", "\n", "for pager in pagers:\n", " pager.text = pager.text[:3000]" ] }, { "cell_type": "code", "execution_count": 58, "id": "2b7ea244-93cb-4c62-8da9-d6c530f3059e", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_57433/3926614870.py:3: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/\n", " df = pd.DataFrame([pager.dict() for pager in pagers])\n" ] }, { "data": { "text/html": [ "
\n", " | title | \n", "text | \n", "original_tags | \n", "
---|---|---|---|
0 | \n", "Раскраска листинга процедуры T-SQL значениями ... | \n", "Сразу покажу, о чем идет речь, чтобы вы решили... | \n", "[sql, tsql, markup, profiler, performance] | \n", "
1 | \n", "Искусственный интеллект и алгоритмы в энергети... | \n", "Энергетические системы — одни из самых сложных... | \n", "[алгоритмы, ии, ит, интеллект, лэп, энергетика... | \n", "
2 | \n", "«Эти фильмы были ужасны» — короткий рассказ о ... | \n", "Концепт-арт к «Ксеногенезису» (1978) — к так и... | \n", "[кино, рисование, фильмы, фантастика, эффекты,... | \n", "
3 | \n", "Что будет, если не использовать TCP или UDP? /... | \n", "Коммутаторы, маршрутизаторы, брандмауэры — все... | \n", "[эксперимент, данных, tcp, передача, протоколы... | \n", "
4 | \n", "Почему тренд на аэрошоссеры возвращаются / Хабр | \n", "Почему аэрошоссеры возвращаются в 2025 годуНес... | \n", "[скорость, вес, шоссейный, аэротруба, велосипе... | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "
995 | \n", "Service Mesh в дикой природе или как не стать ... | \n", "ВведениеУгрозы безопасности в Service Mesh1. О... | \n", "[безопасность, приложений, микросервисы, микро... | \n", "
996 | \n", "Apple Pro Weekly News (17.03 – 23.03.25) / Хабр | \n", "Что сломали в последнем обновлении iOS, от чег... | \n", "[Apple, Siri, iMazing, iPhone, App, iPad, iOS,... | \n", "
997 | \n", "Как не завязнуть в болоте рутины и оставаться ... | \n", "С того момента, как я начал работать IT менедж... | \n", "[командой, проектов, руководство, проектами, л... | \n", "
998 | \n", "Релиз Linux 6.14 / Хабр | \n", "24 марта 2025 года Линус Торвальдс представил ... | \n", "[6.14, Linux, торвальдс] | \n", "
999 | \n", "Чтобы побеждать, достаточно одной книги / Хабр | \n", "\\nЕсть два правдивых утверждения.\\n\\nПервое пр... | \n", "[ruvds_перевод, техническая, программиста, лит... | \n", "
1000 rows × 3 columns
\n", "Step | \n", "Training Loss | \n", "Validation Loss | \n", "
---|---|---|
32 | \n", "5.416400 | \n", "4.127873 | \n", "
64 | \n", "4.426000 | \n", "4.022820 | \n", "
96 | \n", "4.410300 | \n", "3.992565 | \n", "
128 | \n", "4.223600 | \n", "4.147720 | \n", "
160 | \n", "4.010000 | \n", "4.239121 | \n", "
192 | \n", "4.041600 | \n", "4.379041 | \n", "
224 | \n", "4.206400 | \n", "4.391558 | \n", "
256 | \n", "3.987700 | \n", "4.585879 | \n", "
288 | \n", "3.908800 | \n", "4.618155 | \n", "
320 | \n", "4.004100 | \n", "4.647362 | \n", "
352 | \n", "4.154500 | \n", "4.636343 | \n", "
384 | \n", "3.407400 | \n", "4.534647 | \n", "
416 | \n", "3.428100 | \n", "4.678164 | \n", "
448 | \n", "3.402900 | \n", "4.636761 | \n", "
480 | \n", "2.665700 | \n", "4.763720 | \n", "
512 | \n", "2.831600 | \n", "4.805240 | \n", "
544 | \n", "2.998100 | \n", "4.617594 | \n", "
576 | \n", "2.804400 | \n", "5.175800 | \n", "
608 | \n", "2.127400 | \n", "5.117918 | \n", "
640 | \n", "2.272400 | \n", "4.671577 | \n", "
672 | \n", "2.303100 | \n", "4.919456 | \n", "
704 | \n", "2.158100 | \n", "5.571861 | \n", "
736 | \n", "1.684200 | \n", "5.425070 | \n", "
768 | \n", "1.757900 | \n", "5.110906 | \n", "
800 | \n", "1.862000 | \n", "5.411929 | \n", "
832 | \n", "1.459200 | \n", "5.478015 | \n", "
864 | \n", "1.304800 | \n", "5.500659 | \n", "
896 | \n", "1.365000 | \n", "5.509354 | \n", "
928 | \n", "1.444600 | \n", "5.719732 | \n", "
960 | \n", "0.984300 | \n", "6.151366 | \n", "
992 | \n", "1.017600 | \n", "5.967303 | \n", "
1024 | \n", "1.098300 | \n", "5.882926 | \n", "
1056 | \n", "0.776300 | \n", "6.050797 | \n", "
1088 | \n", "0.803200 | \n", "5.942257 | \n", "
1120 | \n", "0.899000 | \n", "6.097410 | \n", "
1152 | \n", "0.751200 | \n", "6.110172 | \n", "
1184 | \n", "0.667600 | \n", "6.307382 | \n", "
1216 | \n", "0.701000 | \n", "6.199901 | \n", "
1248 | \n", "0.688200 | \n", "6.502533 | \n", "
1280 | \n", "0.589300 | \n", "6.538973 | \n", "
1312 | \n", "0.480100 | \n", "6.469034 | \n", "
1344 | \n", "0.594700 | \n", "6.437945 | \n", "
1376 | \n", "0.551600 | \n", "6.482977 | \n", "
1408 | \n", "0.455600 | \n", "6.612985 | \n", "
1440 | \n", "0.480400 | \n", "6.619091 | \n", "
1472 | \n", "0.447000 | \n", "6.681647 | \n", "
1504 | \n", "0.393100 | \n", "6.687038 | \n", "
1536 | \n", "0.374100 | \n", "6.817354 | \n", "
1568 | \n", "0.348400 | \n", "6.522055 | \n", "
1600 | \n", "0.349100 | \n", "6.925219 | \n", "
1632 | \n", "0.300700 | \n", "6.731413 | \n", "
1664 | \n", "0.316900 | \n", "6.704000 | \n", "
1696 | \n", "0.335800 | \n", "6.584372 | \n", "
1728 | \n", "0.286500 | \n", "6.806914 | \n", "
1760 | \n", "0.261000 | \n", "6.755325 | \n", "
1792 | \n", "0.272200 | \n", "6.745789 | \n", "
1824 | \n", "0.281800 | \n", "7.027082 | \n", "
1856 | \n", "0.232800 | \n", "6.984172 | \n", "
1888 | \n", "0.230300 | \n", "7.019201 | \n", "
1920 | \n", "0.266100 | \n", "7.003294 | \n", "
1952 | \n", "0.184700 | \n", "7.019387 | \n", "
1984 | \n", "0.186000 | \n", "7.056015 | \n", "
2016 | \n", "0.180400 | \n", "7.068249 | \n", "
2048 | \n", "0.203600 | \n", "7.093792 | \n", "
2080 | \n", "0.182300 | \n", "7.032248 | \n", "
2112 | \n", "0.151200 | \n", "7.065526 | \n", "
2144 | \n", "0.161200 | \n", "7.156117 | \n", "
2176 | \n", "0.141000 | \n", "7.302792 | \n", "
2208 | \n", "0.122200 | \n", "7.313677 | \n", "
2240 | \n", "0.132400 | \n", "7.292119 | \n", "
2272 | \n", "0.143200 | \n", "7.382527 | \n", "
2304 | \n", "0.128000 | \n", "7.396447 | \n", "
2336 | \n", "0.102900 | \n", "7.321751 | \n", "
2368 | \n", "0.136200 | \n", "7.299072 | \n", "
2400 | \n", "0.083200 | \n", "7.346662 | \n", "
2432 | \n", "0.109200 | \n", "7.440024 | \n", "
2464 | \n", "0.099000 | \n", "7.494668 | \n", "
2496 | \n", "0.092300 | \n", "7.354113 | \n", "
2528 | \n", "0.077800 | \n", "7.522041 | \n", "
2560 | \n", "0.069700 | \n", "7.423881 | \n", "
2592 | \n", "0.076400 | \n", "7.300383 | \n", "
2624 | \n", "0.068300 | \n", "7.523688 | \n", "
2656 | \n", "0.065100 | \n", "7.270813 | \n", "
2688 | \n", "0.059500 | \n", "7.290957 | \n", "
2720 | \n", "0.061900 | \n", "7.378780 | \n", "
2752 | \n", "0.049000 | \n", "7.473153 | \n", "
2784 | \n", "0.046800 | \n", "7.502200 | \n", "
2816 | \n", "0.047900 | \n", "7.495668 | \n", "
2848 | \n", "0.034600 | \n", "7.529656 | \n", "
2880 | \n", "0.026700 | \n", "7.580919 | \n", "
2912 | \n", "0.028300 | \n", "7.630338 | \n", "
2944 | \n", "0.025300 | \n", "7.725150 | \n", "
2976 | \n", "0.027000 | \n", "7.529447 | \n", "
3008 | \n", "0.014800 | \n", "7.630457 | \n", "
3040 | \n", "0.019500 | \n", "7.620898 | \n", "
3072 | \n", "0.011600 | \n", "7.675141 | \n", "
3104 | \n", "0.013400 | \n", "7.726362 | \n", "
3136 | \n", "0.014800 | \n", "7.773665 | \n", "
3168 | \n", "0.012900 | \n", "7.706628 | \n", "
3200 | \n", "0.010100 | \n", "7.726365 | \n", "
3232 | \n", "0.007300 | \n", "7.754138 | \n", "
3264 | \n", "0.004800 | \n", "7.734880 | \n", "
3296 | \n", "0.007100 | \n", "7.717532 | \n", "
3328 | \n", "0.008800 | \n", "7.734691 | \n", "
3360 | \n", "0.004000 | \n", "7.723871 | \n", "
3392 | \n", "0.006100 | \n", "7.772519 | \n", "
3424 | \n", "0.006000 | \n", "7.810041 | \n", "
3456 | \n", "0.003900 | \n", "7.762656 | \n", "
3488 | \n", "0.005500 | \n", "7.769754 | \n", "
3520 | \n", "0.004500 | \n", "7.806248 | \n", "
3552 | \n", "0.005000 | \n", "7.815495 | \n", "
3584 | \n", "0.005000 | \n", "7.834338 | \n", "
3616 | \n", "0.004100 | \n", "7.856540 | \n", "
3648 | \n", "0.006100 | \n", "7.847775 | \n", "
3680 | \n", "0.003300 | \n", "7.834288 | \n", "
3712 | \n", "0.004400 | \n", "7.798307 | \n", "
3744 | \n", "0.002000 | \n", "7.792634 | \n", "
3776 | \n", "0.004200 | \n", "7.783415 | \n", "
3808 | \n", "0.002800 | \n", "7.783332 | \n", "
3840 | \n", "0.002500 | \n", "7.783207 | \n", "
3872 | \n", "0.003300 | \n", "7.791155 | \n", "
3904 | \n", "0.002700 | \n", "7.788821 | \n", "
3936 | \n", "0.003300 | \n", "7.790948 | \n", "
3968 | \n", "0.002900 | \n", "7.795428 | \n", "
4000 | \n", "0.002400 | \n", "7.796586 | \n", "
"
],
"text/plain": [
"