{ "cells": [ { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "gpu_ram_utilization_bytes = torch.cuda.memory_allocated()\n", "gpu_ram_utilization_mb = gpu_ram_utilization_bytes / (1024 * 1024)\n", "gpu_ram_total_bytes = torch.cuda.get_device_properties(0).total_memory\n", "gpu_ram_percentage = (gpu_ram_utilization_bytes / gpu_ram_total_bytes) * 100" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "gpu_ram_utilization_mb, gpu_ram_total_bytes" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "ellNFnP7f2Wx", "outputId": "3adb85e1-f41a-433f-bd77-f1301abb7731" }, "outputs": [], "source": [ "import os\n", "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n", "\n", "import psutil\n", "import torch\n", "from datetime import datetime\n", "import time\n", "import matplotlib.pyplot as plt\n", "\n", "\n", "import torch\n", "import torch.nn as nn\n", "import torch.optim as optim\n", "from torch.utils.data import DataLoader\n", "from torchvision import datasets, transforms\n", "import torch.nn.functional as F\n", "\n", "\n", "\n", "timestamps = []\n", "cpu_ram_mb = []\n", "cpu_ram_percent = []\n", "gpu_ram_mb = []\n", "gpu_ram_percent = []\n", "\n", "\n", "\n", "# --- System Utilization ---------------------------------------------------------------------------\n", "def get_system_utilization():\n", " current_time = datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")\n", "\n", " cpu_ram = psutil.virtual_memory()\n", " cpu_ram_utilization_bytes = cpu_ram.used\n", " cpu_ram_utilization_mb = cpu_ram_utilization_bytes / (1024 * 1024)\n", " cpu_ram_percentage = cpu_ram.percent\n", "\n", " gpu_ram_utilization_mb = None\n", " gpu_ram_percentage = None\n", " if torch.cuda.is_available():\n", " gpu_ram_utilization_bytes = torch.cuda.memory_allocated()\n", " gpu_ram_utilization_mb = gpu_ram_utilization_bytes / (1024 * 1024)\n", " gpu_ram_total_bytes = torch.cuda.get_device_properties(0).total_memory\n", " gpu_ram_percentage = (gpu_ram_utilization_bytes / gpu_ram_total_bytes) * 100\n", "\n", " return {\n", " \"time\": current_time,\n", " \"cpu_ram_utilization_mb\": cpu_ram_utilization_mb,\n", " \"cpu_ram_percentage\": cpu_ram_percentage,\n", " \"gpu_ram_utilization_mb\": gpu_ram_utilization_mb,\n", " \"gpu_ram_percentage\": gpu_ram_percentage\n", " }\n", "\n", "\n", "\n", "def update_utilization_lists():\n", " global timestamps, cpu_ram_mb, cpu_ram_percent, gpu_ram_mb, gpu_ram_percent\n", "\n", " utilization = get_system_utilization()\n", "\n", " timestamps.append(utilization[\"time\"])\n", " cpu_ram_mb.append(utilization[\"cpu_ram_utilization_mb\"])\n", " cpu_ram_percent.append(utilization[\"cpu_ram_percentage\"])\n", " gpu_ram_mb.append(utilization[\"gpu_ram_utilization_mb\"])\n", " gpu_ram_percent.append(utilization[\"gpu_ram_percentage\"])\n", "\n", "\n", "\n", "# --- Define the VAE model -------------------------------------------------------------------------\n", "class VAE(nn.Module):\n", " update_utilization_lists()\n", " def __init__(self, latent_dim=20):\n", " super(VAE, self).__init__()\n", " self.latent_dim = latent_dim\n", "\n", " # Encoder\n", " update_utilization_lists()\n", " self.encoder = nn.Sequential(\n", " nn.Linear(28 * 28, 512),\n", " nn.ReLU(),\n", " nn.Linear(512, 256),\n", " nn.ReLU(),\n", " nn.Linear(256, 2 * latent_dim) # Output mean and log variance\n", " )\n", "\n", " # Decoder\n", " update_utilization_lists()\n", " self.decoder = nn.Sequential(\n", " nn.Linear(latent_dim, 256),\n", " nn.ReLU(),\n", " nn.Linear(256, 512),\n", " nn.ReLU(),\n", " nn.Linear(512, 28 * 28),\n", " nn.Sigmoid()\n", " )\n", "\n", " def encode(self, x):\n", " update_utilization_lists()\n", " h = self.encoder(x)\n", "\n", " update_utilization_lists()\n", " mu, logvar = h.chunk(2, dim=-1) # Split into mean and log variance\n", "\n", " update_utilization_lists()\n", " return mu, logvar\n", "\n", " def reparameterize(self, mu, logvar):\n", " update_utilization_lists()\n", " std = torch.exp(0.5 * logvar)\n", "\n", " update_utilization_lists()\n", " eps = torch.randn_like(std)\n", "\n", " update_utilization_lists()\n", " return mu + eps * std\n", "\n", " def decode(self, z):\n", " update_utilization_lists()\n", " decoded = self.decoder(z)\n", "\n", " update_utilization_lists()\n", " return decoded\n", "\n", " def forward(self, x):\n", " update_utilization_lists()\n", " mu, logvar = self.encode(x.view(-1, 28 * 28))\n", "\n", " z = self.reparameterize(mu, logvar)\n", " return self.decode(z), mu, logvar\n", "\n", "\n", "\n", "# --- Loss function --------------------------------------------------------------------------------\n", "def loss_function(recon_x, x, mu, logvar):\n", " update_utilization_lists()\n", " BCE = F.binary_cross_entropy(recon_x, x.view(-1, 28 * 28), reduction='sum')\n", " \n", " update_utilization_lists()\n", " KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())\n", " \n", " update_utilization_lists()\n", " return BCE + KLD\n", "\n", "\n", "\n", "# --- Load MNIST dataset ---------------------------------------------------------------------------\n", "transform = transforms.Compose([transforms.ToTensor()])\n", "train_dataset = datasets.MNIST(root='/home/23m1521/datasets/MNIST', train=True, download=True, transform=transform)\n", "train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=os.cpu_count())\n", "\n", "\n", "\n", "# --- Initialize model, optimizer ------------------------------------------------------------------\n", "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", "model = VAE(latent_dim=20).to(device)\n", "optimizer = optim.Adam(model.parameters(), lr=1e-3)\n", "\n", "\n", "\n", "# --- Training loop --------------------------------------------------------------------------------\n", "def train(epoch):\n", " update_utilization_lists()\n", " model.train()\n", " \n", " train_loss = 0\n", " for batch_idx, (data, _) in enumerate(train_loader):\n", " update_utilization_lists()\n", " \n", " data = data.to(device)\n", " update_utilization_lists()\n", " \n", " optimizer.zero_grad()\n", " update_utilization_lists()\n", " \n", " recon_batch, mu, logvar = model(data)\n", " update_utilization_lists()\n", " \n", " loss = loss_function(recon_batch, data, mu, logvar)\n", " update_utilization_lists()\n", " \n", " loss.backward()\n", " update_utilization_lists()\n", " \n", " train_loss += loss.item()\n", " update_utilization_lists()\n", " \n", " optimizer.step()\n", " update_utilization_lists()\n", "\n", " if batch_idx % 100 == 0:\n", " print(f'Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} '\n", " f'({100. * batch_idx / len(train_loader):.0f}%)]\\tLoss: {loss.item() / len(data):.6f}')\n", "\n", " print(f'====> Epoch: {epoch} Average loss: {train_loss / len(train_loader.dataset):.4f}')\n", "\n", "\n", "\n", "# --- Train for 10 epochs --------------------------------------------------------------------------\n", "for epoch in range(1,3):\n", " update_utilization_lists()\n", " train(epoch)\n", " update_utilization_lists()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "6M9KOwxshmZF", "outputId": "274be81e-b8a7-4100-f6d8-235d5a8ffb6d" }, "outputs": [], "source": [ "print(\"CPU RAM (MB):\", min(cpu_ram_mb), max(cpu_ram_mb))\n", "print(\"CPU RAM (%):\", min(cpu_ram_percent), max(cpu_ram_percent))\n", "if torch.cuda.is_available():\n", " print(\"GPU RAM (MB):\", min(gpu_ram_mb), max(gpu_ram_mb))\n", " print(\"GPU RAM (%):\", min(gpu_ram_percent), max(gpu_ram_percent))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 400 }, "id": "mKdK390Ehq7u", "outputId": "524a035c-98c5-4c45-99c8-96a882007427" }, "outputs": [], "source": [ "plt.figure(figsize=(21, 8))\n", "\n", "# --- Plot CPU RAM Utilization (MB) ----------------------------------------------------------------\n", "plt.subplot(2, 2, 1)\n", "plt.plot(range(len(timestamps)), cpu_ram_mb, label=\"CPU RAM (MB)\")\n", "plt.title(\"CPU RAM Utilization (MB)\")\n", "plt.xlabel(\"Time\")\n", "plt.ylabel(\"MB\")\n", "plt.xticks(rotation=45)\n", "plt.grid(True)\n", "plt.legend()\n", "\n", "# --- Plot CPU RAM Utilization (%) -----------------------------------------------------------------\n", "plt.subplot(2, 2, 2)\n", "plt.plot(range(len(timestamps)), cpu_ram_percent, label=\"CPU RAM (%)\", color=\"orange\")\n", "plt.title(\"CPU RAM Utilization (%)\")\n", "plt.xlabel(\"Time\")\n", "plt.ylabel(\"Percentage\")\n", "plt.xticks(rotation=45)\n", "plt.grid(True)\n", "plt.legend()\n", "\n", "# --- Plot GPU RAM Utilization (MB) if GPU exists --------------------------------------------------\n", "if torch.cuda.is_available():\n", " plt.subplot(2, 2, 3)\n", " plt.plot(range(len(timestamps)), gpu_ram_mb, label=\"GPU RAM (MB)\", color=\"green\")\n", " plt.title(\"GPU RAM Utilization (MB)\")\n", " plt.xlabel(\"Time\")\n", " plt.ylabel(\"MB\")\n", " plt.xticks(rotation=45)\n", " plt.grid(True)\n", " plt.legend()\n", "\n", "\n", "# --- Plot GPU RAM Utilization (%) if GPU exists ---------------------------------------------------\n", " plt.subplot(2, 2, 4)\n", " plt.plot(range(len(timestamps)), gpu_ram_percent, label=\"GPU RAM (%)\", color=\"red\")\n", " plt.title(\"GPU RAM Utilization (%)\")\n", " plt.xlabel(\"Time\")\n", " plt.ylabel(\"Percentage\")\n", " plt.xticks(rotation=45)\n", " plt.grid(True)\n", " plt.legend()\n", "\n", "\n", "plt.tight_layout()\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "if torch.cuda.is_available():\n", " fig.add_trace(\n", " go.Scatter(x=list(range(len(timestamps))), y=gpu_ram_mb, mode='lines', name='GPU RAM (MB)', line=dict(color='green')),\n", " row=2, col=1\n", " )\n", "fig.show() " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import plotly.graph_objects as go\n", "from plotly.subplots import make_subplots\n", "import torch\n", "\n", "# Create subplots\n", "fig = make_subplots(\n", " rows=2, cols=2,\n", " subplot_titles=(\"CPU RAM Utilization (MB)\", \"CPU RAM Utilization (%)\",\n", " \"GPU RAM Utilization (MB)\", \"GPU RAM Utilization (%)\")\n", ")\n", "\n", "# Plot CPU RAM Utilization (MB)\n", "fig.add_trace(\n", " go.Scatter(x=list(range(len(timestamps))), y=cpu_ram_mb, mode='lines', name='CPU RAM (MB)'),\n", " row=1, col=1\n", ")\n", "\n", "# Plot CPU RAM Utilization (%)\n", "fig.add_trace(\n", " go.Scatter(x=list(range(len(timestamps))), y=cpu_ram_percent, mode='lines', name='CPU RAM (%)', line=dict(color='orange')),\n", " row=1, col=2\n", ")\n", "\n", "# Plot GPU RAM Utilization (MB) if GPU exists\n", "if torch.cuda.is_available():\n", " fig.add_trace(\n", " go.Scatter(x=list(range(len(timestamps))), y=gpu_ram_mb, mode='lines', name='GPU RAM (MB)', line=dict(color='green')),\n", " row=2, col=1\n", " )\n", "\n", " # Plot GPU RAM Utilization (%)\n", " fig.add_trace(\n", " go.Scatter(x=list(range(len(timestamps))), y=gpu_ram_percent, mode='lines', name='GPU RAM (%)', line=dict(color='red')),\n", " row=2, col=2\n", " )\n", "\n", "# Update layout\n", "fig.update_layout(\n", " height=800, width=1200,\n", " title_text=\"System Resource Utilization\",\n", " showlegend=True\n", ")\n", "\n", "fig.update_xaxes(title_text=\"Time\", tickangle=45)\n", "fig.update_yaxes(title_text=\"MB or Percentage\")\n", "\n", "# Show plot\n", "fig.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 454 }, "id": "3MGfGd_Ojcrf", "outputId": "f1091984-2658-4053-ff08-c7c300c08d0e" }, "outputs": [], "source": [ "plt.figure(figsize=(21, 4))\n", "\n", "r = 12000 # range(len(timestamps))\n", "x, y = range(r), cpu_ram_mb[:r]\n", "\n", "plt.plot(x, y, label=\"CPU RAM (MB)\")\n", "plt.title(\"CPU RAM Utilization (MB)\")\n", "plt.xlabel(\"Time\")\n", "plt.ylabel(\"MB\")\n", "plt.xticks(rotation=45)\n", "plt.grid(True)\n", "plt.legend()\n", "plt.tight_layout()\n", "plt.show()" ] } ], "metadata": { "colab": { "provenance": [] }, "kernelspec": { "display_name": "cuda_env2", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.2" } }, "nbformat": 4, "nbformat_minor": 0 }