{ "cells": [ { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Generating train split: 40 examples [00:00, 9668.75 examples/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "DatasetDict({\n", " train: Dataset({\n", " features: ['challenge', 'solution', 'placeholder', 'context', 'assessment_criteria', 'image'],\n", " num_rows: 40\n", " })\n", "})\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "from datasets import load_dataset\n", "\n", "dataset = load_dataset(\"json\", data_files=\"example.json\")\n", "\n", "print(dataset)\n" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Map: 100%|██████████| 40/40 [00:00<00:00, 6077.16 examples/s]\n" ] } ], "source": [ "from datasets import Image\n", "from PIL import Image as PILImage\n", "\n", "new_features = dataset[\"train\"].features\n", "new_features[\"image\"] = Image()\n", "\n", "dataset.features = new_features\n", "\n", "dataset = dataset.map(lambda x: {\"image\": PILImage.open(x[\"image\"]) if x[\"image\"] else None})\n" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'challenge': Value(dtype='string', id=None),\n", " 'solution': Value(dtype='string', id=None),\n", " 'placeholder': Value(dtype='string', id=None),\n", " 'context': Value(dtype='string', id=None),\n", " 'assessment_criteria': Sequence(feature=Value(dtype='string', id=None), length=-1, id=None),\n", " 'image': Image(mode=None, decode=True, id=None)}" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dataset = dataset.cast_column(\"image\", Image())\n", "\n", "dataset[\"train\"].features" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Map: 100%|██████████| 40/40 [00:00<00:00, 13932.25 examples/s]it/s]\n", "Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1119.68ba/s]\n", "Uploading the dataset shards: 100%|██████████| 1/1 [00:01<00:00, 1.36s/it]\n" ] }, { "data": { "text/plain": [ "CommitInfo(commit_url='https://huggingface.co/datasets/burtenshaw/dummy-code-quiz/commit/9fdc884bb54602cce29639e76a7cabedf79811f8', commit_message='Upload dataset', commit_description='', oid='9fdc884bb54602cce29639e76a7cabedf79811f8', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/burtenshaw/dummy-code-quiz', endpoint='https://huggingface.co', repo_type='dataset', repo_id='burtenshaw/dummy-code-quiz'), pr_revision=None, pr_num=None)" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dataset.push_to_hub(\"burtenshaw/dummy-code-quiz\", private=False)\n" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'/Users/ben/code/code_assignment_app/images/1.png'" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dataset[\"train\"][0][\"image\"]" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Generating burtenshaw split: 100%|██████████| 1/1 [00:00<00:00, 262.13 examples/s]\n" ] } ], "source": [ "response_ds = load_dataset(\"agents-course/dummy-code-quiz_responses\")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DatasetDict({\n", " burtenshaw: Dataset({\n", " features: ['username', 'datetime', 'grade', 'challenge', 'submitted_code', 'correct_solution', 'is_correct'],\n", " num_rows: 1\n", " })\n", "})" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "response_ds" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.10" } }, "nbformat": 4, "nbformat_minor": 2 }