Spaces:

flax-community
/

dalle-mini

Running

App Files Files Community

boris commited on Nov 30, 2021

Commit

741bf32

1 Parent(s): ae754a3

style: reformat

Browse files

Files changed (2) hide show

tools/inference/inference_pipeline.ipynb +0 -0
tools/inference/log_inference_samples.ipynb +121 -46

tools/inference/inference_pipeline.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff

tools/inference/log_inference_samples.ipynb CHANGED Viewed

@@ -31,11 +31,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "run_ids = ['63otg87g']\n",
-    "ENTITY, PROJECT = 'dalle-mini', 'dalle-mini'  # used only for training run\n",
-    "VQGAN_REPO, VQGAN_COMMIT_ID = 'dalle-mini/vqgan_imagenet_f16_16384', 'e93a26e7707683d349bf5d5c41c5b0ef69b677a9'\n",
-    "latest_only = True    # log only latest or all versions\n",
-    "suffix = ''           # mainly for duplicate inference runs with a deleted version\n",
     "add_clip_32 = False"
    ]
   },
@@ -63,8 +66,8 @@
     "num_images = 128\n",
     "top_k = 8\n",
     "text_normalizer = TextNormalizer()\n",
-    "padding_item = 'NONE'\n",
-    "seed = random.randint(0, 2**32-1)\n",
     "key = jax.random.PRNGKey(seed)\n",
     "api = wandb.Api()"
    ]
@@ -100,12 +103,15 @@
     "def p_decode(indices, params):\n",
     "    return vqgan.decode_code(indices, params=params)\n",
     "\n",
     "@partial(jax.pmap, axis_name=\"batch\")\n",
     "def p_clip16(inputs, params):\n",
     "    logits = clip16(params=params, **inputs).logits_per_image\n",
     "    return logits\n",
     "\n",
     "if add_clip_32:\n",
     "    @partial(jax.pmap, axis_name=\"batch\")\n",
     "    def p_clip32(inputs, params):\n",
     "        logits = clip32(params=params, **inputs).logits_per_image\n",
@@ -119,13 +125,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "with open('samples.txt', encoding='utf8') as f:\n",
     "    samples = [l.strip() for l in f.readlines()]\n",
     "    # make list multiple of batch_size by adding elements\n",
     "    samples_to_add = [padding_item] * (-len(samples) % batch_size)\n",
     "    samples.extend(samples_to_add)\n",
     "    # reshape\n",
-    "    samples = [samples[i:i+batch_size] for i in range(0, len(samples), batch_size)]"
    ]
   },
   {
@@ -138,9 +144,17 @@
     "def get_artifact_versions(run_id, latest_only=False):\n",
     "    try:\n",
     "        if latest_only:\n",
-    "            return [api.artifact(type='bart_model', name=f'{ENTITY}/{PROJECT}/model-{run_id}:latest')]\n",
     "        else:\n",
-    "            return api.artifact_versions(type_name='bart_model', name=f'{ENTITY}/{PROJECT}/model-{run_id}', per_page=10000)\n",
     "    except:\n",
     "        return []"
    ]
@@ -153,7 +167,7 @@
    "outputs": [],
    "source": [
     "def get_training_config(run_id):\n",
-    "    training_run = api.run(f'{ENTITY}/{PROJECT}/{run_id}')\n",
     "    config = training_run.config\n",
     "    return config"
    ]
@@ -168,8 +182,8 @@
     "# retrieve inference run details\n",
     "def get_last_inference_version(run_id):\n",
     "    try:\n",
-    "        inference_run = api.run(f'dalle-mini/dalle-mini/{run_id}-clip16{suffix}')\n",
-    "        return inference_run.summary.get('version', None)\n",
     "    except:\n",
     "        return None"
    ]
@@ -183,7 +197,6 @@
    "source": [
     "# compile functions - needed only once per run\n",
     "def pmap_model_function(model):\n",
-    "    \n",
     "    @partial(jax.pmap, axis_name=\"batch\")\n",
     "    def _generate(tokenized_prompt, key, params):\n",
     "        return model.generate(\n",
@@ -195,7 +208,7 @@
     "            top_k=gen_top_k,\n",
     "            top_p=gen_top_p\n",
     "        )\n",
-    "    \n",
     "    return _generate"
    ]
   },
@@ -222,13 +235,21 @@
     "training_config = get_training_config(run_id)\n",
     "run = None\n",
     "p_generate = None\n",
-    "model_files = ['config.json', 'flax_model.msgpack', 'merges.txt', 'special_tokens_map.json', 'tokenizer.json', 'tokenizer_config.json', 'vocab.json']\n",
     "for artifact in artifact_versions:\n",
-    "    print(f'Processing artifact: {artifact.name}')\n",
     "    version = int(artifact.version[1:])\n",
     "    results16, results32 = [], []\n",
-    "    columns = ['Caption'] + [f'Image {i+1}' for i in range(top_k)]\n",
-    "    \n",
     "    if latest_only:\n",
     "        assert last_inference_version is None or version > last_inference_version\n",
     "    else:\n",
@@ -236,14 +257,23 @@
     "            # we should start from v0\n",
     "            assert version == 0\n",
     "        elif version <= last_inference_version:\n",
-    "            print(f'v{version} has already been logged (versions logged up to v{last_inference_version}')\n",
     "        else:\n",
     "            # check we are logging the correct version\n",
     "            assert version == last_inference_version + 1\n",
     "\n",
     "    # start/resume corresponding run\n",
     "    if run is None:\n",
-    "        run = wandb.init(job_type='inference', entity='dalle-mini', project='dalle-mini', config=training_config, id=f'{run_id}-clip16{suffix}', resume='allow')\n",
     "\n",
     "    # work in temporary directory\n",
     "    with tempfile.TemporaryDirectory() as tmp:\n",
@@ -264,64 +294,109 @@
     "\n",
     "        # process one batch of captions\n",
     "        for batch in tqdm(samples):\n",
-    "            processed_prompts = [text_normalizer(x) for x in batch] if model.config.normalize_text else list(batch)\n",
     "\n",
     "            # repeat the prompts to distribute over each device and tokenize\n",
     "            processed_prompts = processed_prompts * jax.device_count()\n",
-    "            tokenized_prompt = tokenizer(processed_prompts, return_tensors='jax', padding='max_length', truncation=True, max_length=128).data\n",
     "            tokenized_prompt = shard(tokenized_prompt)\n",
     "\n",
     "            # generate images\n",
     "            images = []\n",
-    "            pbar = tqdm(range(num_images // jax.device_count()), desc='Generating Images', leave=True)\n",
     "            for i in pbar:\n",
     "                key, subkey = jax.random.split(key)\n",
-    "                encoded_images = p_generate(tokenized_prompt, shard_prng_key(subkey), model_params)\n",
     "                encoded_images = encoded_images.sequences[..., 1:]\n",
     "                decoded_images = p_decode(encoded_images, vqgan_params)\n",
-    "                decoded_images = decoded_images.clip(0., 1.).reshape((-1, 256, 256, 3))\n",
     "                for img in decoded_images:\n",
-    "                    images.append(Image.fromarray(np.asarray(img * 255, dtype=np.uint8)))\n",
     "\n",
-    "            def add_clip_results(results, processor, p_clip, clip_params):  \n",
-    "                clip_inputs = processor(text=batch, images=images, return_tensors='np', padding='max_length', max_length=77, truncation=True).data\n",
     "                # each shard will have one prompt, images need to be reorganized to be associated to the correct shard\n",
-    "                images_per_prompt_indices = np.asarray(range(0, len(images), batch_size))\n",
-    "                clip_inputs['pixel_values'] = jnp.concatenate(list(clip_inputs['pixel_values'][images_per_prompt_indices + i] for i in range(batch_size)))\n",
     "                clip_inputs = shard(clip_inputs)\n",
     "                logits = p_clip(clip_inputs, clip_params)\n",
     "                logits = logits.reshape(-1, num_images)\n",
     "                top_scores = logits.argsort()[:, -top_k:][..., ::-1]\n",
     "                logits = jax.device_get(logits)\n",
     "                # add to results table\n",
-    "                for i, (idx, scores, sample) in enumerate(zip(top_scores, logits, batch)):\n",
-    "                    if sample == padding_item: continue\n",
     "                    cur_images = [images[x] for x in images_per_prompt_indices + i]\n",
-    "                    top_images = [wandb.Image(cur_images[x], caption=f'Score: {scores[x]:.2f}') for x in idx]\n",
     "                    results.append([sample] + top_images)\n",
-    "                    \n",
     "            # get clip scores\n",
-    "            pbar.set_description('Calculating CLIP 16 scores')\n",
     "            add_clip_results(results16, processor16, p_clip16, clip16_params)\n",
-    "                \n",
     "            # get clip 32 scores\n",
     "            if add_clip_32:\n",
-    "                pbar.set_description('Calculating CLIP 32 scores')\n",
     "                add_clip_results(results32, processor32, p_clip32, clip32_params)\n",
     "\n",
     "            pbar.close()\n",
     "\n",
-    "                \n",
-    "\n",
     "    # log results\n",
     "    table = wandb.Table(columns=columns, data=results16)\n",
-    "    run.log({'Samples': table, 'version': version})\n",
     "    wandb.finish()\n",
-    "    \n",
-    "    if add_clip_32:        \n",
-    "        run = wandb.init(job_type='inference', entity='dalle-mini', project='dalle-mini', config=training_config, id=f'{run_id}-clip32{suffix}', resume='allow')\n",
     "        table = wandb.Table(columns=columns, data=results32)\n",
-    "        run.log({'Samples': table, 'version': version})\n",
     "        wandb.finish()\n",
     "        run = None  # ensure we don't log on this run"
    ]

    "metadata": {},
    "outputs": [],
    "source": [
+    "run_ids = [\"63otg87g\"]\n",
+    "ENTITY, PROJECT = \"dalle-mini\", \"dalle-mini\"  # used only for training run\n",
+    "VQGAN_REPO, VQGAN_COMMIT_ID = (\n",
+    "    \"dalle-mini/vqgan_imagenet_f16_16384\",\n",
+    "    \"e93a26e7707683d349bf5d5c41c5b0ef69b677a9\",\n",
+    ")\n",
+    "latest_only = True  # log only latest or all versions\n",
+    "suffix = \"\"  # mainly for duplicate inference runs with a deleted version\n",
     "add_clip_32 = False"
    ]
   },
     "num_images = 128\n",
     "top_k = 8\n",
     "text_normalizer = TextNormalizer()\n",
+    "padding_item = \"NONE\"\n",
+    "seed = random.randint(0, 2 ** 32 - 1)\n",
     "key = jax.random.PRNGKey(seed)\n",
     "api = wandb.Api()"
    ]
     "def p_decode(indices, params):\n",
     "    return vqgan.decode_code(indices, params=params)\n",
     "\n",
+    "\n",
     "@partial(jax.pmap, axis_name=\"batch\")\n",
     "def p_clip16(inputs, params):\n",
     "    logits = clip16(params=params, **inputs).logits_per_image\n",
     "    return logits\n",
     "\n",
+    "\n",
     "if add_clip_32:\n",
+    "\n",
     "    @partial(jax.pmap, axis_name=\"batch\")\n",
     "    def p_clip32(inputs, params):\n",
     "        logits = clip32(params=params, **inputs).logits_per_image\n",
    "metadata": {},
    "outputs": [],
    "source": [
+    "with open(\"samples.txt\", encoding=\"utf8\") as f:\n",
     "    samples = [l.strip() for l in f.readlines()]\n",
     "    # make list multiple of batch_size by adding elements\n",
     "    samples_to_add = [padding_item] * (-len(samples) % batch_size)\n",
     "    samples.extend(samples_to_add)\n",
     "    # reshape\n",
+    "    samples = [samples[i : i + batch_size] for i in range(0, len(samples), batch_size)]"
    ]
   },
   {
     "def get_artifact_versions(run_id, latest_only=False):\n",
     "    try:\n",
     "        if latest_only:\n",
+    "            return [\n",
+    "                api.artifact(\n",
+    "                    type=\"bart_model\", name=f\"{ENTITY}/{PROJECT}/model-{run_id}:latest\"\n",
+    "                )\n",
+    "            ]\n",
     "        else:\n",
+    "            return api.artifact_versions(\n",
+    "                type_name=\"bart_model\",\n",
+    "                name=f\"{ENTITY}/{PROJECT}/model-{run_id}\",\n",
+    "                per_page=10000,\n",
+    "            )\n",
     "    except:\n",
     "        return []"
    ]
    "outputs": [],
    "source": [
     "def get_training_config(run_id):\n",
+    "    training_run = api.run(f\"{ENTITY}/{PROJECT}/{run_id}\")\n",
     "    config = training_run.config\n",
     "    return config"
    ]
     "# retrieve inference run details\n",
     "def get_last_inference_version(run_id):\n",
     "    try:\n",
+    "        inference_run = api.run(f\"dalle-mini/dalle-mini/{run_id}-clip16{suffix}\")\n",
+    "        return inference_run.summary.get(\"version\", None)\n",
     "    except:\n",
     "        return None"
    ]
    "source": [
     "# compile functions - needed only once per run\n",
     "def pmap_model_function(model):\n",
     "    @partial(jax.pmap, axis_name=\"batch\")\n",
     "    def _generate(tokenized_prompt, key, params):\n",
     "        return model.generate(\n",
     "            top_k=gen_top_k,\n",
     "            top_p=gen_top_p\n",
     "        )\n",
+    "\n",
     "    return _generate"
    ]
   },
     "training_config = get_training_config(run_id)\n",
     "run = None\n",
     "p_generate = None\n",
+    "model_files = [\n",
+    "    \"config.json\",\n",
+    "    \"flax_model.msgpack\",\n",
+    "    \"merges.txt\",\n",
+    "    \"special_tokens_map.json\",\n",
+    "    \"tokenizer.json\",\n",
+    "    \"tokenizer_config.json\",\n",
+    "    \"vocab.json\",\n",
+    "]\n",
     "for artifact in artifact_versions:\n",
+    "    print(f\"Processing artifact: {artifact.name}\")\n",
     "    version = int(artifact.version[1:])\n",
     "    results16, results32 = [], []\n",
+    "    columns = [\"Caption\"] + [f\"Image {i+1}\" for i in range(top_k)]\n",
+    "\n",
     "    if latest_only:\n",
     "        assert last_inference_version is None or version > last_inference_version\n",
     "    else:\n",
     "            # we should start from v0\n",
     "            assert version == 0\n",
     "        elif version <= last_inference_version:\n",
+    "            print(\n",
+    "                f\"v{version} has already been logged (versions logged up to v{last_inference_version}\"\n",
+    "            )\n",
     "        else:\n",
     "            # check we are logging the correct version\n",
     "            assert version == last_inference_version + 1\n",
     "\n",
     "    # start/resume corresponding run\n",
     "    if run is None:\n",
+    "        run = wandb.init(\n",
+    "            job_type=\"inference\",\n",
+    "            entity=\"dalle-mini\",\n",
+    "            project=\"dalle-mini\",\n",
+    "            config=training_config,\n",
+    "            id=f\"{run_id}-clip16{suffix}\",\n",
+    "            resume=\"allow\",\n",
+    "        )\n",
     "\n",
     "    # work in temporary directory\n",
     "    with tempfile.TemporaryDirectory() as tmp:\n",
     "\n",
     "        # process one batch of captions\n",
     "        for batch in tqdm(samples):\n",
+    "            processed_prompts = (\n",
+    "                [text_normalizer(x) for x in batch]\n",
+    "                if model.config.normalize_text\n",
+    "                else list(batch)\n",
+    "            )\n",
     "\n",
     "            # repeat the prompts to distribute over each device and tokenize\n",
     "            processed_prompts = processed_prompts * jax.device_count()\n",
+    "            tokenized_prompt = tokenizer(\n",
+    "                processed_prompts,\n",
+    "                return_tensors=\"jax\",\n",
+    "                padding=\"max_length\",\n",
+    "                truncation=True,\n",
+    "                max_length=128,\n",
+    "            ).data\n",
     "            tokenized_prompt = shard(tokenized_prompt)\n",
     "\n",
     "            # generate images\n",
     "            images = []\n",
+    "            pbar = tqdm(\n",
+    "                range(num_images // jax.device_count()),\n",
+    "                desc=\"Generating Images\",\n",
+    "                leave=True,\n",
+    "            )\n",
     "            for i in pbar:\n",
     "                key, subkey = jax.random.split(key)\n",
+    "                encoded_images = p_generate(\n",
+    "                    tokenized_prompt, shard_prng_key(subkey), model_params\n",
+    "                )\n",
     "                encoded_images = encoded_images.sequences[..., 1:]\n",
     "                decoded_images = p_decode(encoded_images, vqgan_params)\n",
+    "                decoded_images = decoded_images.clip(0.0, 1.0).reshape(\n",
+    "                    (-1, 256, 256, 3)\n",
+    "                )\n",
     "                for img in decoded_images:\n",
+    "                    images.append(\n",
+    "                        Image.fromarray(np.asarray(img * 255, dtype=np.uint8))\n",
+    "                    )\n",
     "\n",
+    "            def add_clip_results(results, processor, p_clip, clip_params):\n",
+    "                clip_inputs = processor(\n",
+    "                    text=batch,\n",
+    "                    images=images,\n",
+    "                    return_tensors=\"np\",\n",
+    "                    padding=\"max_length\",\n",
+    "                    max_length=77,\n",
+    "                    truncation=True,\n",
+    "                ).data\n",
     "                # each shard will have one prompt, images need to be reorganized to be associated to the correct shard\n",
+    "                images_per_prompt_indices = np.asarray(\n",
+    "                    range(0, len(images), batch_size)\n",
+    "                )\n",
+    "                clip_inputs[\"pixel_values\"] = jnp.concatenate(\n",
+    "                    list(\n",
+    "                        clip_inputs[\"pixel_values\"][images_per_prompt_indices + i]\n",
+    "                        for i in range(batch_size)\n",
+    "                    )\n",
+    "                )\n",
     "                clip_inputs = shard(clip_inputs)\n",
     "                logits = p_clip(clip_inputs, clip_params)\n",
     "                logits = logits.reshape(-1, num_images)\n",
     "                top_scores = logits.argsort()[:, -top_k:][..., ::-1]\n",
     "                logits = jax.device_get(logits)\n",
     "                # add to results table\n",
+    "                for i, (idx, scores, sample) in enumerate(\n",
+    "                    zip(top_scores, logits, batch)\n",
+    "                ):\n",
+    "                    if sample == padding_item:\n",
+    "                        continue\n",
     "                    cur_images = [images[x] for x in images_per_prompt_indices + i]\n",
+    "                    top_images = [\n",
+    "                        wandb.Image(cur_images[x], caption=f\"Score: {scores[x]:.2f}\")\n",
+    "                        for x in idx\n",
+    "                    ]\n",
     "                    results.append([sample] + top_images)\n",
+    "\n",
     "            # get clip scores\n",
+    "            pbar.set_description(\"Calculating CLIP 16 scores\")\n",
     "            add_clip_results(results16, processor16, p_clip16, clip16_params)\n",
+    "\n",
     "            # get clip 32 scores\n",
     "            if add_clip_32:\n",
+    "                pbar.set_description(\"Calculating CLIP 32 scores\")\n",
     "                add_clip_results(results32, processor32, p_clip32, clip32_params)\n",
     "\n",
     "            pbar.close()\n",
     "\n",
     "    # log results\n",
     "    table = wandb.Table(columns=columns, data=results16)\n",
+    "    run.log({\"Samples\": table, \"version\": version})\n",
     "    wandb.finish()\n",
+    "\n",
+    "    if add_clip_32:\n",
+    "        run = wandb.init(\n",
+    "            job_type=\"inference\",\n",
+    "            entity=\"dalle-mini\",\n",
+    "            project=\"dalle-mini\",\n",
+    "            config=training_config,\n",
+    "            id=f\"{run_id}-clip32{suffix}\",\n",
+    "            resume=\"allow\",\n",
+    "        )\n",
     "        table = wandb.Table(columns=columns, data=results32)\n",
+    "        run.log({\"Samples\": table, \"version\": version})\n",
     "        wandb.finish()\n",
     "        run = None  # ensure we don't log on this run"
    ]