import gradio as gr from requests_html import HTMLSession import re def setup_session(): session = HTMLSession() return session def generate_naver_search_url(query): base_url = "https://search.naver.com/search.naver?" params = {"ssc": "tab.blog.all", "sm": "tab_jum"} params["query"] = query url = base_url + "&".join(f"{key}={value}" for key, value in params.items()) return url def crawl_naver_search_results(url): session = setup_session() response = session.get(url) response.html.render() results = [] i = 1 for li in response.html.find("li.bx"): for div in li.find("div.detail_box"): for div2 in div.find("div.title_area"): title = div2.text.strip() for a in div2.find("a", href=True): link = a.attrs["href"] results.append({"번호": i, "제목": title, "링크": link}) i += 1 html_table = "" for result in results[:10]: # 10개의 결과만 출력 html_table += f"" html_table += "
번호제목링크
{result['번호']}{result['제목']}{result['링크']}
" return html_table def get_blog_content(link): session = setup_session() response = session.get(link) response.html.render() title = "" for component in response.html.find("div.se-component.se-text.se-l-default"): for paragraph in component.find("p.se-text-paragraph"): title += paragraph.text.strip() + "\n" return title with gr.Interface( fn=lambda query: crawl_naver_search_results(generate_naver_search_url(query)), inputs=gr.Textbox(label="키워드를 입력하세요"), outputs=gr.HTML(label="크롤링된 제목과 링크 목록"), title="네이버 검색 제목과 링크 크롤러", description="검색 쿼리를 입력하여 네이버 검색 결과에서 제목과 링크를 크롤링합니다" ) as demo: button = gr.Button("블로그 제목 가져오기") text_input = gr.Textbox(label="링크를 입력하세요") text_output = gr.Textbox(label="블로그 제목") def get_blog_content_wrapper(link): return get_blog_content(link) button.click(fn=get_blog_content_wrapper, inputs=text_input, outputs=text_output) demo.launch(share=True)