{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"gpuType":"T4","authorship_tag":"ABX9TyNmLoeMm2V9yoyQDaneLvbr"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"accelerator":"GPU"},"cells":[{"cell_type":"code","execution_count":1,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"B3ulPFQCjm-P","executionInfo":{"status":"ok","timestamp":1755174590649,"user_tz":-420,"elapsed":149,"user":{"displayName":"Laam Pham","userId":"04566654796696849937"}},"outputId":"c9406e64-d9ff-4a06-d16d-f63a8c3b1d6e"},"outputs":[{"output_type":"stream","name":"stdout","text":["Thu Aug 14 12:29:50 2025 \n","+-----------------------------------------------------------------------------------------+\n","| NVIDIA-SMI 550.54.15 Driver Version: 550.54.15 CUDA Version: 12.4 |\n","|-----------------------------------------+------------------------+----------------------+\n","| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n","| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n","| | | MIG M. |\n","|=========================================+========================+======================|\n","| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |\n","| N/A 35C P8 9W / 70W | 0MiB / 15360MiB | 0% Default |\n","| | | N/A |\n","+-----------------------------------------+------------------------+----------------------+\n"," \n","+-----------------------------------------------------------------------------------------+\n","| Processes: |\n","| GPU GI CI PID Type Process name GPU Memory |\n","| ID ID Usage |\n","|=========================================================================================|\n","| No running processes found |\n","+-----------------------------------------------------------------------------------------+\n"]}],"source":["!nvidia-smi"]},{"cell_type":"code","source":["import torch"],"metadata":{"id":"e5Wc9ksVjroy","executionInfo":{"status":"ok","timestamp":1755174600684,"user_tz":-420,"elapsed":5359,"user":{"displayName":"Laam Pham","userId":"04566654796696849937"}}},"execution_count":2,"outputs":[]},{"cell_type":"code","source":["class PyTorchCUDA101:\n"," def __init__(self) -> None:\n"," if not torch.cuda.is_available():\n"," raise RuntimeError(\"CUDA not available\")\n"," self.device = torch.device(\"cuda\")\n","\n"," print(f\"Using GPU: {torch.cuda.get_device_name()}\")\n"," print(f\"CUDA version: {torch.version.cuda}\")\n"," print(f\"PyTorch version: {torch.__version__}\")\n","\n"," print(\"\\n\" + \"=\"*60)\n"," print(\"GPU MEMORY BASELINE: Understanding CUDA Overhead\")\n"," print(\"=\"* 60)\n","\n"," torch.cuda.empty_cache()\n"," torch.cuda.reset_peak_memory_stats()\n"," baseline_mem = torch.cuda.memory_allocated() / 1024**2\n","\n"," min_tensor = torch.ones((1,1), device=\"cuda\")\n"," cuda_overhead = torch.cuda.memory_allocated() / 1024**2\n","\n"," print(f\"šŸ“Š Memory before CUDA init: {baseline_mem:.1f} MB\")\n"," print(f\"šŸ“Š Memory after CUDA init: {cuda_overhead:.1f} MB\")\n"," print(f\"šŸŽÆ CUDA kernel overhead: {cuda_overhead - baseline_mem:.1f} MB\")\n","\n"," print(f\"\\nšŸ’” Key Reality Check: CUDA kernels consume 1-2 GB regardless of your model size!\")\n"," print(f\" This overhead is constant and unavoidable for any GPU computation.\")\n"," print(f\" Additional memory used for buffers, intermediate results, and fragmentation\")\n"," print(f\" makes precise memory calculations challenging - focus on relative improvements.\")"],"metadata":{"id":"9scPSfzCjulL","executionInfo":{"status":"ok","timestamp":1755175093077,"user_tz":-420,"elapsed":4,"user":{"displayName":"Laam Pham","userId":"04566654796696849937"}}},"execution_count":9,"outputs":[]},{"cell_type":"code","source":["p = PyTorchCUDA101()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"cRZ-5LW5komv","executionInfo":{"status":"ok","timestamp":1755175093836,"user_tz":-420,"elapsed":14,"user":{"displayName":"Laam Pham","userId":"04566654796696849937"}},"outputId":"01a5bd7c-3739-4eec-9247-9b98bdb82bba"},"execution_count":10,"outputs":[{"output_type":"stream","name":"stdout","text":["Using GPU: Tesla T4\n","CUDA version: 12.4\n","PyTorch version: 2.6.0+cu124\n","\n","============================================================\n","GPU MEMORY BASELINE: Understanding CUDA Overhead\n","============================================================\n","šŸ“Š Memory before CUDA init: 0.0 MB\n","šŸ“Š Memory after CUDA init: 0.0 MB\n","šŸŽÆ CUDA kernel overhead: 0.0 MB\n","\n","šŸ’” Key Reality Check: CUDA kernels consume 1-2 GB regardless of your model size!\n"," This overhead is constant and unavoidable for any GPU computation.\n"," Additional memory used for buffers, intermediate results, and fragmentation\n"," makes precise memory calculations challenging - focus on relative improvements.\n"]}]},{"cell_type":"code","source":["!nvidia-smi"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"oTZ0xVRnkqr-","executionInfo":{"status":"ok","timestamp":1755175117694,"user_tz":-420,"elapsed":106,"user":{"displayName":"Laam Pham","userId":"04566654796696849937"}},"outputId":"9bae77de-5e0a-4f66-8a70-d51bc04e00fa"},"execution_count":11,"outputs":[{"output_type":"stream","name":"stdout","text":["Thu Aug 14 12:38:37 2025 \n","+-----------------------------------------------------------------------------------------+\n","| NVIDIA-SMI 550.54.15 Driver Version: 550.54.15 CUDA Version: 12.4 |\n","|-----------------------------------------+------------------------+----------------------+\n","| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n","| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n","| | | MIG M. |\n","|=========================================+========================+======================|\n","| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |\n","| N/A 44C P0 25W / 70W | 120MiB / 15360MiB | 0% Default |\n","| | | N/A |\n","+-----------------------------------------+------------------------+----------------------+\n"," \n","+-----------------------------------------------------------------------------------------+\n","| Processes: |\n","| GPU GI CI PID Type Process name GPU Memory |\n","| ID ID Usage |\n","|=========================================================================================|\n","+-----------------------------------------------------------------------------------------+\n"]}]},{"cell_type":"code","source":[],"metadata":{"id":"kDRiSDIvluHU"},"execution_count":null,"outputs":[]}]}