Spaces:

flax-community
/

dalle-mini

Running

App Files Files Community

boris commited on Jan 16, 2022

Commit

25862e8

1 Parent(s): 89cf9ea

fix: style

Browse files

Files changed (2) hide show

tools/inference/inference_pipeline.ipynb +46 -19
tools/train/train.py +5 -5

tools/inference/inference_pipeline.ipynb CHANGED Viewed

@@ -70,15 +70,15 @@
     "# Model references\n",
     "\n",
     "# dalle-mini\n",
-    "DALLE_MODEL = 'dalle-mini/dalle-mini/model-3bqwu04f:latest'  # can be wandb artifact or 🤗 Hub or local folder\n",
     "DALLE_COMMIT_ID = None  # used only with 🤗 hub\n",
     "\n",
     "# VQGAN model\n",
-    "VQGAN_REPO = 'dalle-mini/vqgan_imagenet_f16_16384'\n",
-    "VQGAN_COMMIT_ID = 'e93a26e7707683d349bf5d5c41c5b0ef69b677a9'\n",
     "\n",
     "# CLIP model\n",
-    "CLIP_REPO = 'openai/clip-vit-base-patch16'\n",
     "CLIP_COMMIT_ID = None"
    ]
   },
@@ -121,18 +121,28 @@
     "import wandb\n",
     "\n",
     "# Load dalle-mini\n",
-    "if ':' in DALLE_MODEL:\n",
     "    # wandb artifact\n",
     "    artifact = wandb.Api().artifact(DALLE_MODEL)\n",
     "    # we only download required files (no need for opt_state which is large)\n",
-    "    model_files = ['config.json', 'flax_model.msgpack', 'merges.txt', 'special_tokens_map.json', 'tokenizer.json', 'tokenizer_config.json', 'vocab.json']\n",
     "    for f in model_files:\n",
-    "        artifact.get_path(f).download('model')\n",
-    "    model = DalleBart.from_pretrained('model', dtype=dtype, abstract_init=True)\n",
-    "    tokenizer = AutoTokenizer.from_pretrained('model')\n",
     "else:\n",
     "    # local folder or 🤗 Hub\n",
-    "    model = DalleBart.from_pretrained(DALLE_MODEL, revision=DALLE_COMMIT_ID, dtype=dtype, abstract_init=True)\n",
     "    tokenizer = AutoTokenizer.from_pretrained(DALLE_MODEL, revision=DALLE_COMMIT_ID)\n",
     "\n",
     "# Load VQGAN\n",
@@ -191,7 +201,7 @@
     "from functools import partial\n",
     "\n",
     "# model inference\n",
-    "@partial(jax.pmap, axis_name=\"batch\", static_broadcasted_argnums=(3,4))\n",
     "def p_generate(tokenized_prompt, key, params, top_k, top_p):\n",
     "    return model.generate(\n",
     "        **tokenized_prompt,\n",
@@ -203,11 +213,13 @@
     "        top_p=top_p\n",
     "    )\n",
     "\n",
     "# decode images\n",
     "@partial(jax.pmap, axis_name=\"batch\")\n",
     "def p_decode(indices, params):\n",
     "    return vqgan.decode_code(indices, params=params)\n",
     "\n",
     "# score images\n",
     "@partial(jax.pmap, axis_name=\"batch\")\n",
     "def p_clip(inputs, params):\n",
@@ -235,7 +247,7 @@
     "import random\n",
     "\n",
     "# create a random key\n",
-    "seed = random.randint(0, 2**32-1)\n",
     "key = jax.random.PRNGKey(seed)"
    ]
   },
@@ -287,7 +299,7 @@
    },
    "outputs": [],
    "source": [
-    "prompt = 'a red T-shirt'"
    ]
   },
   {
@@ -323,7 +335,13 @@
     "repeated_prompts = [processed_prompt] * jax.device_count()\n",
     "\n",
     "# tokenize\n",
-    "tokenized_prompt = tokenizer(repeated_prompts, return_tensors='jax', padding='max_length', truncation=True, max_length=128).data\n",
     "tokenized_prompt"
    ]
   },
@@ -408,12 +426,14 @@
     "    # get a new key\n",
     "    key, subkey = jax.random.split(key)\n",
     "    # generate images\n",
-    "    encoded_images = p_generate(tokenized_prompt, shard_prng_key(subkey), model_params, gen_top_k, gen_top_p)\n",
     "    # remove BOS\n",
     "    encoded_images = encoded_images.sequences[..., 1:]\n",
     "    # decode images\n",
     "    decoded_images = p_decode(encoded_images, vqgan_params)\n",
-    "    decoded_images = decoded_images.clip(0., 1.).reshape((-1, 256, 256, 3))\n",
     "    for img in decoded_images:\n",
     "        images.append(Image.fromarray(np.asarray(img * 255, dtype=np.uint8)))"
    ]
@@ -436,7 +456,14 @@
    "outputs": [],
    "source": [
     "# get clip scores\n",
-    "clip_inputs = processor(text=[prompt] * jax.device_count(), images=images, return_tensors='np', padding='max_length', max_length=77, truncation=True).data\n",
     "logits = p_clip(shard(clip_inputs), clip_params)\n",
     "logits = logits.squeeze().flatten()"
    ]
@@ -458,10 +485,10 @@
    },
    "outputs": [],
    "source": [
-    "print(f'Prompt: {prompt}\\n')\n",
     "for idx in logits.argsort()[::-1]:\n",
     "    display(images[idx])\n",
-    "    print(f'Score: {logits[idx]:.2f}\\n')"
    ]
   }
  ],

     "# Model references\n",
     "\n",
     "# dalle-mini\n",
+    "DALLE_MODEL = \"dalle-mini/dalle-mini/model-3bqwu04f:latest\"  # can be wandb artifact or 🤗 Hub or local folder\n",
     "DALLE_COMMIT_ID = None  # used only with 🤗 hub\n",
     "\n",
     "# VQGAN model\n",
+    "VQGAN_REPO = \"dalle-mini/vqgan_imagenet_f16_16384\"\n",
+    "VQGAN_COMMIT_ID = \"e93a26e7707683d349bf5d5c41c5b0ef69b677a9\"\n",
     "\n",
     "# CLIP model\n",
+    "CLIP_REPO = \"openai/clip-vit-base-patch16\"\n",
     "CLIP_COMMIT_ID = None"
    ]
   },
     "import wandb\n",
     "\n",
     "# Load dalle-mini\n",
+    "if \":\" in DALLE_MODEL:\n",
     "    # wandb artifact\n",
     "    artifact = wandb.Api().artifact(DALLE_MODEL)\n",
     "    # we only download required files (no need for opt_state which is large)\n",
+    "    model_files = [\n",
+    "        \"config.json\",\n",
+    "        \"flax_model.msgpack\",\n",
+    "        \"merges.txt\",\n",
+    "        \"special_tokens_map.json\",\n",
+    "        \"tokenizer.json\",\n",
+    "        \"tokenizer_config.json\",\n",
+    "        \"vocab.json\",\n",
+    "    ]\n",
     "    for f in model_files:\n",
+    "        artifact.get_path(f).download(\"model\")\n",
+    "    model = DalleBart.from_pretrained(\"model\", dtype=dtype, abstract_init=True)\n",
+    "    tokenizer = AutoTokenizer.from_pretrained(\"model\")\n",
     "else:\n",
     "    # local folder or 🤗 Hub\n",
+    "    model = DalleBart.from_pretrained(\n",
+    "        DALLE_MODEL, revision=DALLE_COMMIT_ID, dtype=dtype, abstract_init=True\n",
+    "    )\n",
     "    tokenizer = AutoTokenizer.from_pretrained(DALLE_MODEL, revision=DALLE_COMMIT_ID)\n",
     "\n",
     "# Load VQGAN\n",
     "from functools import partial\n",
     "\n",
     "# model inference\n",
+    "@partial(jax.pmap, axis_name=\"batch\", static_broadcasted_argnums=(3, 4))\n",
     "def p_generate(tokenized_prompt, key, params, top_k, top_p):\n",
     "    return model.generate(\n",
     "        **tokenized_prompt,\n",
     "        top_p=top_p\n",
     "    )\n",
     "\n",
+    "\n",
     "# decode images\n",
     "@partial(jax.pmap, axis_name=\"batch\")\n",
     "def p_decode(indices, params):\n",
     "    return vqgan.decode_code(indices, params=params)\n",
     "\n",
+    "\n",
     "# score images\n",
     "@partial(jax.pmap, axis_name=\"batch\")\n",
     "def p_clip(inputs, params):\n",
     "import random\n",
     "\n",
     "# create a random key\n",
+    "seed = random.randint(0, 2 ** 32 - 1)\n",
     "key = jax.random.PRNGKey(seed)"
    ]
   },
    },
    "outputs": [],
    "source": [
+    "prompt = \"a red T-shirt\""
    ]
   },
   {
     "repeated_prompts = [processed_prompt] * jax.device_count()\n",
     "\n",
     "# tokenize\n",
+    "tokenized_prompt = tokenizer(\n",
+    "    repeated_prompts,\n",
+    "    return_tensors=\"jax\",\n",
+    "    padding=\"max_length\",\n",
+    "    truncation=True,\n",
+    "    max_length=128,\n",
+    ").data\n",
     "tokenized_prompt"
    ]
   },
     "    # get a new key\n",
     "    key, subkey = jax.random.split(key)\n",
     "    # generate images\n",
+    "    encoded_images = p_generate(\n",
+    "        tokenized_prompt, shard_prng_key(subkey), model_params, gen_top_k, gen_top_p\n",
+    "    )\n",
     "    # remove BOS\n",
     "    encoded_images = encoded_images.sequences[..., 1:]\n",
     "    # decode images\n",
     "    decoded_images = p_decode(encoded_images, vqgan_params)\n",
+    "    decoded_images = decoded_images.clip(0.0, 1.0).reshape((-1, 256, 256, 3))\n",
     "    for img in decoded_images:\n",
     "        images.append(Image.fromarray(np.asarray(img * 255, dtype=np.uint8)))"
    ]
    "outputs": [],
    "source": [
     "# get clip scores\n",
+    "clip_inputs = processor(\n",
+    "    text=[prompt] * jax.device_count(),\n",
+    "    images=images,\n",
+    "    return_tensors=\"np\",\n",
+    "    padding=\"max_length\",\n",
+    "    max_length=77,\n",
+    "    truncation=True,\n",
+    ").data\n",
     "logits = p_clip(shard(clip_inputs), clip_params)\n",
     "logits = logits.squeeze().flatten()"
    ]
    },
    "outputs": [],
    "source": [
+    "print(f\"Prompt: {prompt}\\n\")\n",
     "for idx in logits.argsort()[::-1]:\n",
     "    display(images[idx])\n",
+    "    print(f\"Score: {logits[idx]:.2f}\\n\")"
    ]
   }
  ],

tools/train/train.py CHANGED Viewed

@@ -219,9 +219,7 @@ class TrainingArguments:
             "help": 'The optimizer to use. Can be "distributed_shampoo" (default), "adam" or "adafactor"'
         },
     )
-    weight_decay: float = field(
-        default=None, metadata={"help": "Weight decay."}
-    )
     beta1: float = field(
         default=0.9,
         metadata={"help": "Beta1 for Adam & Distributed Shampoo."},
@@ -237,13 +235,15 @@ class TrainingArguments:
         default=1.0, metadata={"help": "Max gradient norm for Adafactor."}
     )
     block_size: int = field(
-        default=1024, metadata={"help": "Chunked size for large layers with Distributed Shampoo."}
     )
     preconditioning_compute_steps: int = field(
         default=10, metadata={"help": "Number of steps to update preconditioner."}
     )
     skip_preconditioning_dim_size_gt: int = field(
-        default=4096, metadata={"help": "Max size for preconditioning with Distributed Shampoo."}
     )
     optim_quantized: bool = field(
         default=False,

             "help": 'The optimizer to use. Can be "distributed_shampoo" (default), "adam" or "adafactor"'
         },
     )
+    weight_decay: float = field(default=None, metadata={"help": "Weight decay."})
     beta1: float = field(
         default=0.9,
         metadata={"help": "Beta1 for Adam & Distributed Shampoo."},
         default=1.0, metadata={"help": "Max gradient norm for Adafactor."}
     )
     block_size: int = field(
+        default=1024,
+        metadata={"help": "Chunked size for large layers with Distributed Shampoo."},
     )
     preconditioning_compute_steps: int = field(
         default=10, metadata={"help": "Number of steps to update preconditioner."}
     )
     skip_preconditioning_dim_size_gt: int = field(
+        default=4096,
+        metadata={"help": "Max size for preconditioning with Distributed Shampoo."},
     )
     optim_quantized: bool = field(
         default=False,

fix: style

fix: style