Spaces:
Running
on
Zero
Running
on
Zero
| # app.py — InstantID × Beautiful Realistic Asians v7 (ZeroGPU-friendly, persistent cache) | |
| """Persistent-cache backend for InstantID portrait generation. | |
| * 依存モデルは /data が書込可ならそこへ、それ以外は ~/.cache に保存 | |
| * wget を使った簡易リトライ DL | |
| """ | |
| # --- ★ Monkey-Patch: torchvision 0.17+ で消えた functional_tensor を補完 --- | |
| import types, sys | |
| from torchvision.transforms import functional as F | |
| mod = types.ModuleType("torchvision.transforms.functional_tensor") | |
| # 必要なのは rgb_to_grayscale だけなのでこれだけエイリアス | |
| mod.rgb_to_grayscale = F.rgb_to_grayscale | |
| sys.modules["torchvision.transforms.functional_tensor"] = mod | |
| # --------------------------------------------------------------------------- | |
| import os, subprocess, cv2, torch, spaces, gradio as gr, numpy as np | |
| from pathlib import Path | |
| from PIL import Image | |
| from diffusers import ( | |
| StableDiffusionPipeline, ControlNetModel, | |
| DPMSolverMultistepScheduler, AutoencoderKL, | |
| ) | |
| from insightface.app import FaceAnalysis | |
| ############################################################################## | |
| # 0. キャッシュ用ディレクトリ | |
| ############################################################################## | |
| PERSIST_BASE = Path("/data") | |
| CACHE_ROOT = ( | |
| PERSIST_BASE / "instantid_cache" | |
| if PERSIST_BASE.exists() and os.access(PERSIST_BASE, os.W_OK) | |
| else Path.home() / ".cache" / "instantid_cache" | |
| ) | |
| print("cache →", CACHE_ROOT) | |
| MODELS_DIR = CACHE_ROOT / "models" | |
| LORA_DIR = MODELS_DIR / "Lora" # FaceID LoRA などを置く | |
| EMB_DIR = CACHE_ROOT / "embeddings" | |
| UPSCALE_DIR = CACHE_ROOT / "realesrgan" | |
| for p in (MODELS_DIR, LORA_DIR, EMB_DIR, UPSCALE_DIR): | |
| p.mkdir(parents=True, exist_ok=True) | |
| def dl(url: str, dst: Path, attempts: int = 2): | |
| """wget + リトライの簡易ダウンローダ""" | |
| if dst.exists(): | |
| print("✓", dst.relative_to(CACHE_ROOT)); return | |
| for i in range(1, attempts + 1): | |
| print(f"⬇ {dst.name} (try {i}/{attempts})") | |
| if subprocess.call(["wget", "-q", "-O", str(dst), url]) == 0: | |
| return | |
| raise RuntimeError(f"download failed → {url}") | |
| ############################################################################## | |
| # 1. 必要アセットのダウンロード | |
| ############################################################################## | |
| print("— asset check —") | |
| # 1-A. ベース checkpoint | |
| BASE_CKPT = MODELS_DIR / "beautiful_realistic_asians_v7_fp16.safetensors" | |
| dl( | |
| "https://civitai.com/api/download/models/177164?type=Model&format=SafeTensor&size=pruned&fp=fp16", | |
| BASE_CKPT, | |
| ) | |
| # 1-B. FaceID LoRA(Δのみ) | |
| LORA_FILE = LORA_DIR / "ip-adapter-faceid-plusv2_sd15_lora.safetensors" | |
| dl( | |
| "https://huggingface.co/h94/IP-Adapter-FaceID/resolve/main/ip-adapter-faceid-plusv2_sd15_lora.safetensors", | |
| LORA_FILE, | |
| ) | |
| # 1-C. textual inversion Embeddings | |
| EMB_URLS = { | |
| "ng_deepnegative_v1_75t.pt": [ | |
| "https://huggingface.co/datasets/gsdf/EasyNegative/resolve/main/ng_deepnegative_v1_75t.pt", | |
| "https://huggingface.co/mrpxl2/animetarotV51.safetensors/raw/cc3008c0148061896549a995cc297aef0af4ef1b/ng_deepnegative_v1_75t.pt", | |
| ], | |
| "badhandv4.pt": [ | |
| "https://huggingface.co/datasets/gsdf/ConceptLab/resolve/main/badhandv4.pt", | |
| "https://huggingface.co/nolanaatama/embeddings/raw/main/badhandv4.pt", | |
| ], | |
| "CyberRealistic_Negative-neg.pt": [ | |
| "https://huggingface.co/datasets/gsdf/ConceptLab/resolve/main/CyberRealistic_Negative-neg.pt", | |
| "https://huggingface.co/wsj1995/embeddings/raw/main/CyberRealistic_Negative-neg.civitai.info", | |
| ], | |
| "UnrealisticDream.pt": [ | |
| "https://huggingface.co/datasets/gsdf/ConceptLab/resolve/main/UnrealisticDream.pt", | |
| "https://huggingface.co/imagepipeline/UnrealisticDream/raw/main/f84133b4-aad8-44be-b9ce-7e7e3a8c111f.pt", | |
| ], | |
| } | |
| for fname, urls in EMB_URLS.items(): | |
| dst = EMB_DIR / fname | |
| for idx, u in enumerate(urls, 1): | |
| try: | |
| dl(u, dst); break | |
| except RuntimeError: | |
| if idx == len(urls): raise | |
| print(" ↳ fallback URL …") | |
| # 1-D. Real-ESRGAN weights (×8) | |
| RRG_WEIGHTS = UPSCALE_DIR / "RealESRGAN_x8plus.pth" | |
| RRG_URLS = [ | |
| "https://huggingface.co/NoCrypt/Superscale_RealESRGAN/resolve/main/RealESRGAN_x8plus.pth", | |
| "https://huggingface.co/ai-forever/Real-ESRGAN/raw/main/RealESRGAN_x8.pth", | |
| "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/8x_NMKD-Superscale_100k.pth", | |
| ] | |
| for idx, link in enumerate(RRG_URLS, 1): | |
| try: | |
| dl(link, RRG_WEIGHTS); break | |
| except RuntimeError: | |
| if idx == len(RRG_URLS): raise | |
| print(" ↳ fallback URL …") | |
| ############################################################################## | |
| # 2. ランタイム初期化 | |
| ############################################################################## | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| dtype = torch.float16 if torch.cuda.is_available() else torch.float32 | |
| print("device:", device, "| dtype:", dtype) | |
| providers = ( | |
| ["CUDAExecutionProvider", "CPUExecutionProvider"] | |
| if torch.cuda.is_available() | |
| else ["CPUExecutionProvider"] | |
| ) | |
| face_app = FaceAnalysis(name="buffalo_l", root=str(CACHE_ROOT), providers=providers) | |
| face_app.prepare(ctx_id=(0 if torch.cuda.is_available() else -1), det_size=(640, 640)) | |
| # ControlNet + SD パイプライン | |
| controlnet = ControlNetModel.from_pretrained( | |
| "InstantX/InstantID", subfolder="ControlNetModel", torch_dtype=dtype | |
| ) | |
| pipe = StableDiffusionPipeline.from_single_file( | |
| BASE_CKPT, torch_dtype=dtype, safety_checker=None, use_safetensors=True, clip_skip=2 | |
| ) | |
| pipe.vae = AutoencoderKL.from_pretrained( | |
| "stabilityai/sd-vae-ft-mse", torch_dtype=dtype | |
| ).to(device) | |
| pipe.controlnet = controlnet | |
| pipe.scheduler = DPMSolverMultistepScheduler.from_config( | |
| pipe.scheduler.config, use_karras_sigmas=True, algorithm_type="sde-dpmsolver++" | |
| ) | |
| # --- ここが核心:画像エンコーダ込みで公式レポから直接ロード ------------------ | |
| pipe.load_ip_adapter( | |
| "h94/IP-Adapter", # Hugging Face Hub ID | |
| subfolder="models", # ip-adapter-plus-face_sd15.bin が入っているフォルダ | |
| weight_name="ip-adapter-plus-face_sd15.bin", | |
| ) | |
| # --------------------------------------------------------------------------- | |
| # FaceID LoRA(差分 LoRA のみ) | |
| pipe.load_lora_weights(str(LORA_DIR), weight_name=LORA_FILE.name) | |
| pipe.set_ip_adapter_scale(0.65) | |
| # textual inversion 読み込み | |
| for emb in EMB_DIR.glob("*.*"): | |
| try: | |
| pipe.load_textual_inversion(emb, token=emb.stem) | |
| print("emb loaded →", emb.stem) | |
| except Exception: | |
| print("emb skip →", emb.name) | |
| pipe.to(device) | |
| print("pipeline ready ✔") | |
| ############################################################################## | |
| # 3. アップスケーラ | |
| ############################################################################## | |
| try: | |
| from basicsr.archs.rrdb_arch import RRDBNet | |
| try: | |
| from realesrgan import RealESRGAN | |
| except ImportError: | |
| from realesrgan import RealESRGANer as RealESRGAN | |
| rrdb = RRDBNet(3, 3, 64, 23, 32, scale=8) | |
| upsampler = RealESRGAN(device, rrdb, scale=8) | |
| upsampler.load_weights(str(RRG_WEIGHTS)) | |
| UPSCALE_OK = True | |
| except Exception as e: | |
| print("Real-ESRGAN disabled →", e) | |
| UPSCALE_OK = False | |
| ############################################################################## | |
| # 4. プロンプト & 生成関数 | |
| ############################################################################## | |
| BASE_PROMPT = ( | |
| "(masterpiece:1.2), best quality, ultra-realistic, RAW photo, 8k,\n" | |
| "photo of {subject},\n" | |
| "cinematic lighting, golden hour, rim light, shallow depth of field,\n" | |
| "textured skin, high detail, shot on Canon EOS R5, 85 mm f/1.4, ISO 200,\n" | |
| "<lora:ip-adapter-faceid-plusv2_sd15_lora:0.65>, (face),\n" | |
| "(aesthetic:1.1), (cinematic:0.8)" | |
| ) | |
| NEG_PROMPT = ( | |
| "ng_deepnegative_v1_75t, CyberRealistic_Negative-neg, UnrealisticDream, " | |
| "(worst quality:2), (low quality:1.8), lowres, (jpeg artifacts:1.2), " | |
| "painting, sketch, illustration, drawing, cartoon, anime, cgi, render, 3d, " | |
| "monochrome, grayscale, text, logo, watermark, signature, username, " | |
| "(MajicNegative_V2:0.8), bad hands, extra digits, fused fingers, malformed limbs, " | |
| "missing arms, missing legs, (badhandv4:0.7), BadNegAnatomyV1-neg, skin blemishes, acnes, age spot, glans" | |
| ) | |
| def generate( | |
| face_np, subject, add_prompt, add_neg, cfg, ip_scale, steps, w, h, upscale, up_factor, | |
| progress=gr.Progress(track_tqdm=True), | |
| ): | |
| if face_np is None or face_np.size == 0: | |
| raise gr.Error("顔画像をアップロードしてください。") | |
| prompt = BASE_PROMPT.format(subject=(subject.strip() or "a beautiful 20yo woman")) | |
| if add_prompt: | |
| prompt += ", " + add_prompt | |
| neg = NEG_PROMPT + (", " + add_neg if add_neg else "") | |
| pipe.set_ip_adapter_scale(ip_scale) | |
| img_in = Image.fromarray(face_np) | |
| result = pipe( | |
| prompt=prompt, | |
| negative_prompt=neg, | |
| ip_adapter_image=img_in, | |
| image=img_in, | |
| controlnet_conditioning_scale=0.9, | |
| num_inference_steps=int(steps) + 5, | |
| guidance_scale=cfg, | |
| width=int(w), | |
| height=int(h), | |
| ).images[0] | |
| if upscale: | |
| if UPSCALE_OK: | |
| up, _ = upsampler.enhance( | |
| cv2.cvtColor(np.array(result), cv2.COLOR_RGB2BGR), outscale=up_factor | |
| ) | |
| result = Image.fromarray(cv2.cvtColor(up, cv2.COLOR_BGR2RGB)) | |
| else: | |
| result = result.resize( | |
| (int(result.width * up_factor), int(result.height * up_factor)), | |
| Image.LANCZOS, | |
| ) | |
| return result | |
| ############################################################################## | |
| # 5. Gradio UI | |
| ############################################################################## | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# InstantID – Beautiful Realistic Asians v7") | |
| with gr.Row(): | |
| with gr.Column(): | |
| face_in = gr.Image(label="顔写真", type="numpy") | |
| subj_in = gr.Textbox(label="被写体説明", placeholder="e.g. woman in black suit, smiling") | |
| add_in = gr.Textbox(label="追加プロンプト") | |
| addneg_in = gr.Textbox(label="追加ネガティブ") | |
| ip_sld = gr.Slider(0, 1.5, 0.65, step=0.05, label="IP-Adapter scale") | |
| cfg_sld = gr.Slider(1, 15, 6, step=0.5, label="CFG") | |
| step_sld = gr.Slider(10, 50, 20, step=1, label="Steps") | |
| w_sld = gr.Slider(512, 1024, 512, step=64, label="幅") | |
| h_sld = gr.Slider(512, 1024, 768, step=64, label="高さ") | |
| up_ck = gr.Checkbox(label="アップスケール", value=True) | |
| up_fac = gr.Slider(1, 8, 2, step=1, label="倍率") | |
| btn = gr.Button("生成", variant="primary") | |
| with gr.Column(): | |
| out_img = gr.Image(label="結果") | |
| btn.click( | |
| generate, | |
| [face_in, subj_in, add_in, addneg_in, cfg_sld, ip_sld, step_sld, w_sld, h_sld, up_ck, up_fac], | |
| out_img, | |
| api_name="predict", | |
| ) | |
| print("launching …") | |
| demo.queue().launch(show_error=True) |