Spaces:

flax-community
/

dalle-mini

Running

App Files Files Community

boris commited on Oct 9, 2021

Commit

c85fbb6

1 Parent(s): 9a553a4

fix: typo

Browse files

Files changed (1) hide show

dev/inference/wandb-backend.ipynb +74 -27

dev/inference/wandb-backend.ipynb CHANGED Viewed

@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "4ff2a984-b8b2-4a69-89cf-0d16da2393c8",
    "metadata": {},
    "outputs": [],
@@ -12,7 +12,7 @@
     "import random\n",
     "import numpy as np\n",
     "from PIL import Image\n",
-    "from tqdm import tqdm\n",
     "import jax\n",
     "import jax.numpy as jnp\n",
     "from flax.training.common_utils import shard, shard_prng_key\n",
@@ -26,7 +26,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "92f4557c-fd7f-4edc-81c2-de0b0a10c270",
    "metadata": {},
    "outputs": [],
@@ -36,13 +36,13 @@
     "VQGAN_REPO, VQGAN_COMMIT_ID = 'dalle-mini/vqgan_imagenet_f16_16384', None\n",
     "normalize_text = True\n",
     "latest_only = False   # log only latest or all versions\n",
-    "suffix = '_1'           # mainly for duplicate inference runs with a deleted version\n",
     "add_clip_32 = False"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "23e00271-941c-4e1b-b6a9-107a1b77324d",
    "metadata": {},
    "outputs": [],
@@ -52,16 +52,25 @@
     "VQGAN_REPO, VQGAN_COMMIT_ID = 'dalle-mini/vqgan_imagenet_f16_16384', None\n",
     "normalize_text = False\n",
     "latest_only = True   # log only latest or all versions\n",
-    "suffix = '_2'           # mainly for duplicate inference runs with a deleted version\n",
     "add_clip_32 = True"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "93b2e24b-f0e5-4abe-a3ec-0aa834cc3bf3",
    "metadata": {},
-   "outputs": [],
    "source": [
     "batch_size = 8\n",
     "num_images = 128\n",
@@ -75,10 +84,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "c6a878fa-4bf5-4978-abb5-e235841d765b",
    "metadata": {},
-   "outputs": [],
    "source": [
     "vqgan = VQModel.from_pretrained(VQGAN_REPO, revision=VQGAN_COMMIT_ID)\n",
     "clip = FlaxCLIPModel.from_pretrained(\"openai/clip-vit-base-patch16\")\n",
@@ -94,7 +111,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "a500dd07-dbc3-477d-80d4-2b73a3b83ef3",
    "metadata": {},
    "outputs": [],
@@ -104,20 +121,42 @@
     "    return vqgan.decode_code(indices, params=params)\n",
     "\n",
     "@partial(jax.pmap, axis_name=\"batch\")\n",
-    "def p_clip(inputs):\n",
-    "    logits = clip(params=clip_params, **inputs).logits_per_image\n",
     "    return logits\n",
     "\n",
     "if add_clip_32:\n",
     "    @partial(jax.pmap, axis_name=\"batch\")\n",
-    "    def p_clip32(inputs):\n",
-    "        logits = clip32(params=clip32_params, **inputs).logits_per_image\n",
     "        return logits"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "e57797ab-0b3a-4490-be58-03d8d1c23fe9",
    "metadata": {},
    "outputs": [],
@@ -133,7 +172,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "f3e02d9d-4ee1-49e7-a7bc-4d8b139e9614",
    "metadata": {},
    "outputs": [],
@@ -150,7 +189,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "f0d7ed17-7abb-4a31-ab3c-a12b9039a570",
    "metadata": {},
    "outputs": [],
@@ -163,7 +202,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "7e784a43-626d-4e8d-9e47-a23775b2f35f",
    "metadata": {},
    "outputs": [],
@@ -179,7 +218,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "d1cc9993-1bfc-4ec6-a004-c056189c42ac",
    "metadata": {},
    "outputs": [],
@@ -202,7 +241,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "23b2444c-67a9-44d7-abd1-187ed83a9431",
    "metadata": {},
    "outputs": [],
@@ -213,10 +252,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "bba70f33-af8b-4eb3-9973-7be672301a0b",
    "metadata": {},
-   "outputs": [],
    "source": [
     "artifact_versions = get_artifact_versions(run_id, latest_only)\n",
     "last_inference_version = get_last_inference_version(run_id)\n",
@@ -276,9 +324,8 @@
     "            tokenized_prompt = shard(tokenized_prompt)\n",
     "\n",
     "            # generate images\n",
-    "            print('Generating images')\n",
     "            images = []\n",
-    "            for i in tqdm(range(num_images // jax.device_count())):\n",
     "                key, subkey = jax.random.split(key)\n",
     "                encoded_images = p_generate(tokenized_prompt, shard_prng_key(subkey), model_params)\n",
     "                encoded_images = encoded_images.sequences[..., 1:]\n",
@@ -294,7 +341,7 @@
     "            images_per_prompt_indices = np.asarray(range(0, len(images), batch_size))\n",
     "            clip_inputs['pixel_values'] = jnp.concatenate(list(clip_inputs['pixel_values'][images_per_prompt_indices + i] for i in range(batch_size)))\n",
     "            clip_inputs = shard(clip_inputs)\n",
-    "            logits = p_clip(clip_inputs)\n",
     "            logits = logits.reshape(-1, num_images)\n",
     "            top_scores = logits.argsort()[:, -top_k:][..., ::-1]\n",
     "            logits = jax.device_get(logits)\n",
@@ -314,7 +361,7 @@
     "                images_per_prompt_indices = np.asarray(range(0, len(images), batch_size))\n",
     "                clip_inputs['pixel_values'] = jnp.concatenate(list(clip_inputs['pixel_values'][images_per_prompt_indices + i] for i in range(batch_size)))\n",
     "                clip_inputs = shard(clip_inputs)\n",
-    "                logits = p_clip32(clip_inputs)\n",
     "                logits = logits.reshape(-1, num_images)\n",
     "                top_scores = logits.argsort()[:, -top_k:][..., ::-1]\n",
     "                logits = jax.device_get(logits)\n",

  "cells": [
   {
    "cell_type": "code",
+   "execution_count": 1,
    "id": "4ff2a984-b8b2-4a69-89cf-0d16da2393c8",
    "metadata": {},
    "outputs": [],
     "import random\n",
     "import numpy as np\n",
     "from PIL import Image\n",
+    "from tqdm.notebook import tqdm\n",
     "import jax\n",
     "import jax.numpy as jnp\n",
     "from flax.training.common_utils import shard, shard_prng_key\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 2,
    "id": "92f4557c-fd7f-4edc-81c2-de0b0a10c270",
    "metadata": {},
    "outputs": [],
     "VQGAN_REPO, VQGAN_COMMIT_ID = 'dalle-mini/vqgan_imagenet_f16_16384', None\n",
     "normalize_text = True\n",
     "latest_only = False   # log only latest or all versions\n",
+    "suffix = ''           # mainly for duplicate inference runs with a deleted version\n",
     "add_clip_32 = False"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 3,
    "id": "23e00271-941c-4e1b-b6a9-107a1b77324d",
    "metadata": {},
    "outputs": [],
     "VQGAN_REPO, VQGAN_COMMIT_ID = 'dalle-mini/vqgan_imagenet_f16_16384', None\n",
     "normalize_text = False\n",
     "latest_only = True   # log only latest or all versions\n",
+    "suffix = ''           # mainly for duplicate inference runs with a deleted version\n",
     "add_clip_32 = True"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 4,
    "id": "93b2e24b-f0e5-4abe-a3ec-0aa834cc3bf3",
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:absl:Unable to initialize backend 'tpu_driver': NOT_FOUND: Unable to find driver in registry given worker: \n",
+      "INFO:absl:Unable to initialize backend 'gpu': NOT_FOUND: Could not find registered platform with name: \"cuda\". Available platform names are: TPU Interpreter Host\n"
+     ]
+    }
+   ],
    "source": [
     "batch_size = 8\n",
     "num_images = 128\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 5,
    "id": "c6a878fa-4bf5-4978-abb5-e235841d765b",
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Working with z of shape (1, 256, 16, 16) = 65536 dimensions.\n"
+     ]
+    }
+   ],
    "source": [
     "vqgan = VQModel.from_pretrained(VQGAN_REPO, revision=VQGAN_COMMIT_ID)\n",
     "clip = FlaxCLIPModel.from_pretrained(\"openai/clip-vit-base-patch16\")\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 6,
    "id": "a500dd07-dbc3-477d-80d4-2b73a3b83ef3",
    "metadata": {},
    "outputs": [],
     "    return vqgan.decode_code(indices, params=params)\n",
     "\n",
     "@partial(jax.pmap, axis_name=\"batch\")\n",
+    "def p_clip(inputs, params):\n",
+    "    logits = clip(params=params, **inputs).logits_per_image\n",
     "    return logits\n",
     "\n",
     "if add_clip_32:\n",
     "    @partial(jax.pmap, axis_name=\"batch\")\n",
+    "    def p_clip32(inputs, params):\n",
+    "        logits = clip32(params=params, **inputs).logits_per_image\n",
     "        return logits"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 7,
+   "id": "ebf4f7bf-2efa-46cc-b3f4-2d7a54f7b2cb",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "ShardedDeviceArray([4.6051702, 4.6051702, 4.6051702, 4.6051702, 4.6051702,\n",
+       "                    4.6051702, 4.6051702, 4.6051702], dtype=float32)"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "clip_params['logit_scale']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
    "id": "e57797ab-0b3a-4490-be58-03d8d1c23fe9",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 9,
    "id": "f3e02d9d-4ee1-49e7-a7bc-4d8b139e9614",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 10,
    "id": "f0d7ed17-7abb-4a31-ab3c-a12b9039a570",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 11,
    "id": "7e784a43-626d-4e8d-9e47-a23775b2f35f",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 12,
    "id": "d1cc9993-1bfc-4ec6-a004-c056189c42ac",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 13,
    "id": "23b2444c-67a9-44d7-abd1-187ed83a9431",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 14,
    "id": "bba70f33-af8b-4eb3-9973-7be672301a0b",
    "metadata": {},
+   "outputs": [
+    {
+     "ename": "SyntaxError",
+     "evalue": "EOL while scanning string literal (1745443972.py, line 60)",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;36m  File \u001b[0;32m\"/tmp/ipykernel_402605/1745443972.py\"\u001b[0;36m, line \u001b[0;32m60\u001b[0m\n\u001b[0;31m    for i in tqdm(range(num_images // jax.device_count()), desc='Generating Images):\u001b[0m\n\u001b[0m                                                                                    ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m EOL while scanning string literal\n"
+     ]
+    }
+   ],
    "source": [
     "artifact_versions = get_artifact_versions(run_id, latest_only)\n",
     "last_inference_version = get_last_inference_version(run_id)\n",
     "            tokenized_prompt = shard(tokenized_prompt)\n",
     "\n",
     "            # generate images\n",
     "            images = []\n",
+    "            for i in tqdm(range(num_images // jax.device_count()), desc='Generating Images):\n",
     "                key, subkey = jax.random.split(key)\n",
     "                encoded_images = p_generate(tokenized_prompt, shard_prng_key(subkey), model_params)\n",
     "                encoded_images = encoded_images.sequences[..., 1:]\n",
     "            images_per_prompt_indices = np.asarray(range(0, len(images), batch_size))\n",
     "            clip_inputs['pixel_values'] = jnp.concatenate(list(clip_inputs['pixel_values'][images_per_prompt_indices + i] for i in range(batch_size)))\n",
     "            clip_inputs = shard(clip_inputs)\n",
+    "            logits = p_clip(clip_inputs, clip_params)\n",
     "            logits = logits.reshape(-1, num_images)\n",
     "            top_scores = logits.argsort()[:, -top_k:][..., ::-1]\n",
     "            logits = jax.device_get(logits)\n",
     "                images_per_prompt_indices = np.asarray(range(0, len(images), batch_size))\n",
     "                clip_inputs['pixel_values'] = jnp.concatenate(list(clip_inputs['pixel_values'][images_per_prompt_indices + i] for i in range(batch_size)))\n",
     "                clip_inputs = shard(clip_inputs)\n",
+    "                logits = p_clip32(clip_inputs, clip32_params)\n",
     "                logits = logits.reshape(-1, num_images)\n",
     "                top_scores = logits.argsort()[:, -top_k:][..., ::-1]\n",
     "                logits = jax.device_get(logits)\n",