File size: 2,599 Bytes
fb261e3
 
b6e38e2
 
fb261e3
b6e38e2
fb261e3
 
 
b6e38e2
 
fb261e3
b6e38e2
 
 
 
 
 
 
 
 
 
 
fb261e3
b6e38e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fb261e3
b6e38e2
 
 
 
 
 
 
 
fb261e3
b6e38e2
fb261e3
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import gradio as gr
from transformers import T5Tokenizer, T5ForConditionalGeneration
import torch
import re

# Load model
model_name = "ravindravala/ravi-v0.1"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Clean up output (remove duplicates, preserve meaning)
def clean_output(text):
    sentences = re.split(r'(?<=[.!?])\s+', text.strip())
    seen = set()
    unique_sentences = []
    for s in sentences:
        s_clean = s.strip()
        if s_clean and s_clean not in seen:
            seen.add(s_clean)
            unique_sentences.append(s_clean)
    return " ".join(unique_sentences).strip()

# Process a single paragraph
def process_block(block, temperature, top_k, top_p):
    if not block.strip():
        return block
    input_text = "paraphrase: " + block
    input_ids = tokenizer(input_text, return_tensors="pt", truncation=True, max_length=512).input_ids.to(device)

    input_length = input_ids.shape[1]
    min_len = max(30, int(input_length * 0.7))
    max_len = min(512, int(input_length * 1.3))

    output_ids = model.generate(
        input_ids,
        max_length=max_len,
        min_length=min_len,
        do_sample=True,
        top_k=top_k,
        top_p=top_p,
        temperature=temperature,
        repetition_penalty=1.3,
        no_repeat_ngram_size=3,
        eos_token_id=tokenizer.eos_token_id
    )

    output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    return clean_output(output_text)

# Handle multi-line text with formatting preserved
def humanize(text, temperature, top_k, top_p):
    if not text.strip():
        return "Please enter some text."
    parts = re.split(r'(\n+)', text)  # Keep newlines
    processed_parts = [process_block(p, temperature, top_k, top_p) if not p.isspace() and p.strip() else p for p in parts]
    return "".join(processed_parts)

# Gradio UI
demo = gr.Interface(
    fn=humanize,
    inputs=[
        gr.Textbox(lines=8, placeholder="Enter text to paraphrase...", label="Text to Humanize"),
        gr.Slider(0.1, 1.5, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(10, 100, value=50, step=1, label="Top-K"),
        gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-P"),
    ],
    outputs=gr.Textbox(label="Humanized Output", lines=8),
    title="AI → Human Paraphraser",
    description="Rewrites AI-like responses into natural, human-like text while preserving paragraphs and formatting."
)

demo.launch()