Spaces:

flax-community
/

dalle-mini

Running

App Files Files Community

boris commited on Oct 9, 2021

Commit

353365f

1 Parent(s): b8bbe68

feat: add scoring

Browse files

Files changed (1) hide show

dev/inference/wandb-backend.ipynb +338 -51

dev/inference/wandb-backend.ipynb CHANGED Viewed

@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 197,
    "id": "4ff2a984-b8b2-4a69-89cf-0d16da2393c8",
    "metadata": {},
    "outputs": [],
@@ -10,7 +10,13 @@
     "import csv\n",
     "import tempfile\n",
     "from functools import partial\n",
     "import jax\n",
     "import wandb\n",
     "from dalle_mini.model import CustomFlaxBartForConditionalGeneration\n",
     "from vqgan_jax.modeling_flax_vqgan import VQModel\n",
@@ -30,6 +36,30 @@
     "normalize_text = True"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -44,7 +74,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 245,
    "id": "e57797ab-0b3a-4490-be58-03d8d1c23fe9",
    "metadata": {},
    "outputs": [],
@@ -53,51 +94,32 @@
     "    reader = csv.DictReader(f)\n",
     "    samples = []\n",
     "    for row in reader:\n",
-    "        samples.append(row)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 246,
    "id": "f75b2869-fc25-4f56-b937-e97bbb712ede",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "101"
-      ]
-     },
-     "execution_count": 246,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
    "source": [
     "len(samples)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 248,
-   "id": "2ea0b166-a20c-4d78-bffb-b792ca512d17",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "104"
-      ]
-     },
-     "execution_count": 248,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
    "source": [
-    "samples_to_add = ['empty'] * (-len(samples) % 8)\n",
-    "samples.extend(samples_to_add)\n",
-    "len(samples)"
    ]
   },
   {
@@ -112,7 +134,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 204,
    "id": "3ffb1d09-bd1c-4f57-9ae5-3eda6f7d3a08",
    "metadata": {},
    "outputs": [],
@@ -148,21 +170,11 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "29613a9d-de7e-44e3-94f1-650085039204",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "versions = sorted(versions, key=lambda x: int(x.version[1:]))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d77159df-1a16-4996-aafd-1df82c5a3509",
    "metadata": {},
    "outputs": [],
    "source": [
-    "versions"
    ]
   },
   {
@@ -253,6 +265,8 @@
    "source": [
     "if last_version_inference is None:\n",
     "    assert version == 0\n",
     "else:\n",
     "    assert version == last_version_inference + 1"
    ]
@@ -338,7 +352,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 207,
    "id": "d1cc9993-1bfc-4ec6-a004-c056189c42ac",
    "metadata": {},
    "outputs": [],
@@ -360,6 +384,12 @@
     "    def p_decode(indices, params):\n",
     "        return vqgan.decode_code(indices, params=params)\n",
     "    \n",
     "    functions_pmapped = False"
    ]
   },
@@ -369,25 +399,282 @@
    "id": "7a24b903-777b-4e3d-817c-00ed613a7021",
    "metadata": {},
    "outputs": [],
-   "source": []
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "e1c04761-1016-47e9-925c-3a9ec6fec95a",
    "metadata": {},
    "outputs": [],
    "source": [
-    "wandb.finish()"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "e79ac8f2-adc2-4a16-970c-dadcceadd566",
    "metadata": {},
    "outputs": [],
    "source": []
   }
  ],
  "metadata": {

  "cells": [
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "4ff2a984-b8b2-4a69-89cf-0d16da2393c8",
    "metadata": {},
    "outputs": [],
     "import csv\n",
     "import tempfile\n",
     "from functools import partial\n",
+    "import random\n",
+    "import numpy as np\n",
+    "from PIL import Image\n",
     "import jax\n",
+    "import jax.numpy as jnp\n",
+    "from flax.training.common_utils import shard, shard_prng_key\n",
+    "from flax.jax_utils import replicate\n",
     "import wandb\n",
     "from dalle_mini.model import CustomFlaxBartForConditionalGeneration\n",
     "from vqgan_jax.modeling_flax_vqgan import VQModel\n",
     "normalize_text = True"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "93b2e24b-f0e5-4abe-a3ec-0aa834cc3bf3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "batch_size = 8\n",
+    "num_images = 128\n",
+    "top_k = 8\n",
+    "text_normalizer = TextNormalizer() if normalize_text else None"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6a045827-3461-4499-8959-38d173bc4e5e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "seed = random.randint(0, 2**32-1)\n",
+    "key = jax.random.PRNGKey(seed)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
   },
   {
    "cell_type": "code",
+   "execution_count": null,
+   "id": "4927529a-8828-4150-bc76-e1b60d8dee62",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "clip_params = replicate(clip.params)\n",
+    "vqgan_params = replicate(vqgan.params)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
    "id": "e57797ab-0b3a-4490-be58-03d8d1c23fe9",
    "metadata": {},
    "outputs": [],
     "    reader = csv.DictReader(f)\n",
     "    samples = []\n",
     "    for row in reader:\n",
+    "        samples.append(row)\n",
+    "    # make list multiple of batch_size by adding \"empty\"\n",
+    "    samples_to_add = [{'Caption':'empty', 'Theme':'empty'}] * (-len(samples) % batch_size)\n",
+    "    samples.extend(samples_to_add)\n",
+    "    # reshape\n",
+    "    samples = [samples[i:i+batch_size] for i in range(0, len(samples), batch_size)]"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "f75b2869-fc25-4f56-b937-e97bbb712ede",
    "metadata": {},
+   "outputs": [],
    "source": [
     "len(samples)"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
+   "id": "c48525c9-447a-4430-81d7-4b699f545638",
    "metadata": {},
+   "outputs": [],
    "source": [
+    "samples[-1]"
    ]
   },
   {
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "3ffb1d09-bd1c-4f57-9ae5-3eda6f7d3a08",
    "metadata": {},
    "outputs": [],
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "ead44aee-52d5-4ca2-8984-c4d267d9e72a",
    "metadata": {},
    "outputs": [],
    "source": [
+    "versions[0].version"
    ]
   },
   {
    "source": [
     "if last_version_inference is None:\n",
     "    assert version == 0\n",
+    "elif last_version_inference >= version:\n",
+    "    print(f'Version {version} has already been logged')\n",
     "else:\n",
     "    assert version == last_version_inference + 1"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
+   "id": "320823c9-124a-4fc3-a12c-8c015a128285",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_params = replicate(model.params)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
    "id": "d1cc9993-1bfc-4ec6-a004-c056189c42ac",
    "metadata": {},
    "outputs": [],
     "    def p_decode(indices, params):\n",
     "        return vqgan.decode_code(indices, params=params)\n",
     "    \n",
+    "    @partial(jax.pmap, axis_name=\"batch\")\n",
+    "    def p_clip(inputs):\n",
+    "        logits = clip(**inputs).logits_per_image\n",
+    "        return logits\n",
+    "        scores = jax.nn.softmax(logits, axis=0).squeeze()        \n",
+    "    \n",
     "    functions_pmapped = False"
    ]
   },
    "id": "7a24b903-777b-4e3d-817c-00ed613a7021",
    "metadata": {},
    "outputs": [],
+   "source": [
+    "# TODO: loop over samples\n",
+    "batch = samples[0]\n",
+    "prompts = [x['Caption'] for x in batch]\n",
+    "processed_prompts = [text_normalizer(x) for x in prompts] if normalize_text else prompts"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "d77aa785-dc05-4070-aba2-aa007524d20b",
    "metadata": {},
    "outputs": [],
    "source": [
+    "processed_prompts"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "95db38fb-8948-4814-98ae-c172ca7c6d0a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "repeated_prompts = processed_prompts * jax.device_count()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e948ba9e-3700-4e87-926f-580a10f3e5cd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tokenized_prompt = tokenizer(repeated_prompts, return_tensors='jax', padding='max_length', truncation=True, max_length=128).data\n",
+    "tokenized_prompt = shard(tokenized_prompt)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "30d96812-fc17-4acf-bb64-5fdb8d0cd313",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tokenized_prompt['input_ids'].shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "92ea034b-2649-4d18-ab6d-877ed04ae5c4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "images = []\n",
+    "for i in range(num_images // jax.device_count()):\n",
+    "    key, subkey = jax.random.split(key, 2)\n",
+    "    \n",
+    "    encoded_images = p_generate(tokenized_prompt, shard_prng_key(subkey), model_params)\n",
+    "    encoded_images = encoded_images.sequences[..., 1:]\n",
+    "    \n",
+    "    decoded_images = p_decode(encoded_images, vqgan_params)\n",
+    "    decoded_images = decoded_images.clip(0., 1.).reshape((-1, 256, 256, 3))\n",
+    "    \n",
+    "    for img in decoded_images:\n",
+    "        images.append(Image.fromarray(np.asarray(img * 255, dtype=np.uint8)))\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "84d52f30-44c9-4a74-9992-fb2578f19b90",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "len(images)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "beb594f9-5b91-47fe-98bd-41e68c6b1d73",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "images[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bb135190-64e5-44af-b416-e688b034da44",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "images[1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d78a0d92-72c2-4f82-a6ab-b3f5865dd863",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "clip_inputs = processor(text=prompts, images=images, return_tensors='np', padding='max_length', max_length=77, truncation=True).data"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "89ff78a6-bfa4-44d9-ad66-07a4a68b4352",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# each shard will have one prompt\n",
+    "clip_inputs['input_ids'].shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2cda8984-049c-4c87-96ad-7b0412750656",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# each shard needs to have the images corresponding to a specific prompt\n",
+    "clip_inputs['pixel_values'].shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0a044e8f-be29-404b-b6c7-8f2395c5efc6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "images_per_prompt_indices = np.asarray(range(0, len(images), batch_size))\n",
+    "images_per_prompt_indices"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7a6c61b3-12e0-45d8-b39a-830288324d3d",
    "metadata": {},
    "outputs": [],
    "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7318e67e-4214-46f9-bf60-6d139d4bd00f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reorder so each shard will have correct images\n",
+    "clip_inputs['pixel_values'] = jnp.concatenate(list(clip_inputs['pixel_values'][images_per_prompt_indices + i] for i in range(batch_size)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "90c949a2-8e2a-4905-b6d4-92038f1704b8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "clip_inputs = shard(clip_inputs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "58fa836e-5ebb-45e7-af77-ab10646dfbfb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "logits = p_clip(clip_inputs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fd7a3f91-3a1f-4a0a-8b3e-3c926cd367fb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "logits.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fa406db7-0a21-4e4b-9890-4c7aece4280c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "logits = logits.reshape(-1, num_images)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9c359a8c-2c27-4e68-8775-371857397723",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "logits.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a56b9f28-dd91-4382-bc47-11e89fda1254",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "logits"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0bed8167-0a6d-46c1-badf-8bdc20b93c31",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "top_idx = logits.argsort()[:, -top_k:][..., ::-1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "188c5333-6b8c-4a17-8cc8-15651c77ef99",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "len(images)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "babd22b3-e773-467d-8bbb-f0323f57a44b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "results = []\n",
+    "columns = ['Caption', 'Theme'] + [f'Image {i+1}' for i in range(top_k)] + [f'Score {i+1}' for i in range(top_k)]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "75976c9f-dea5-48e3-8920-55a1bbfd91c2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for i, (idx, scores, sample) in enumerate(zip(top_idx, logits, batch)):\n",
+    "    cur_images = [images[x] for x in images_per_prompt_indices + i]\n",
+    "    top_images = [wandb.Image(cur_images[x]) for x in idx]\n",
+    "    top_scores = [logits[x] for x in idx]\n",
+    "    results.append([sample['Caption'], sample['Theme']] + top_images + top_scores)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e1c04761-1016-47e9-925c-3a9ec6fec95a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "wandb.finish()"
+   ]
   }
  ],
  "metadata": {