big update

Browse files

Files changed (9) hide show

.gitignore +2 -1
LICENSE +21 -0
README.md +8 -15
config.py +8 -0
run.py +12 -15
src/FewShotEpisoder.py +3 -1
src/{eval.py → evaluate.py} +14 -14
src/model/ProtoNet.py +4 -4
src/train.py +32 -18

.gitignore CHANGED Viewed

@@ -1,6 +1,7 @@
 # user-defined
-data/raw
 *.pth
 # pycharm
 .idea

 # user-defined
+**/data
 *.pth
+**/__pycache__/
 # pycharm
 .idea

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2025 한명환
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -1,11 +1,3 @@
----
-license: mit
-datasets:
-- dpdl-benchmark/omniglot
-language:
-- ko
-pipeline_tag: image-classification
----
 `torch` `torchvision` `tqdm`
 This implementation is inspired by **"Prototypical Networks for Few-Shot Learning" (Snell et al., 2017)**.
@@ -17,7 +9,9 @@ This repository implements a Prototypical Network for few-shot image classificat
 Few-shot learning aims to enable models to generalize to new classes with only a few labeled examples. Prototypical Networks achieve this by computing a prototype (mean embedding) for each class and classifying query samples based on their distances to these prototypes in the embedding space.
-<a href="https://colab.research.google.com/drive/1gsVtGvISCpXQZsKvFjLVocn89ovazusE?usp=sharing">Test Result on Colab</a>
 ## Instruction
 Organize your dataset into a structure compatible with PyTorch's ImageFolder:
@@ -39,21 +33,20 @@ Run the training script with desired parameters:
 ```
 python run.py train --dataset_path path/to/your/dataset --save_to /path/to/save/model --n_way 5 --k_shot 2 --n_query 4 --epochs 1 --iters 4
 ```
-* `dataset_path`: Path to your dataset.
 * `save_to`: path to save the trained model.
 * `n_way`: number of classes in each episode.
 * `k_shot`: Number of support samples per class.
 * `n-_query`: Number of query samples per class.
-* `epochs`: Number of episodes.
-* `iters`: Number of training epochs.
 ### Evaluation
 ```
 python run.py --path path/to/your/dataset --model path/to/saved/model.pth --n_way 5
 ```
-* `path`: Path to your dataset.
 * `model`: Path to your model.
-* `n_way`: Number of classes in each episode.
 ### Download Omniglot Dataset
 ```
@@ -68,4 +61,4 @@ Prototypical Networks are a powerful approach for **few-shot learning**, where t
 * **Embedding Representation with CNN**: Each input image is passed through a convolutional encoder to obtain a feature embedding.
 * **Prototype Computation**: The prototype for each class is computed as the mean of the embeddings of support samples belonging to that class.
 * **Distance-Based Classification**: Query samples are classified based on the distance (using `torch.cdist`) to the nearest prototype.
-* **Optimization**: The network is trained to minimize the distance between query samples and their correct prototypes while maximizing the distance to incorrect ones.

 `torch` `torchvision` `tqdm`
 This implementation is inspired by **"Prototypical Networks for Few-Shot Learning" (Snell et al., 2017)**.
 Few-shot learning aims to enable models to generalize to new classes with only a few labeled examples. Prototypical Networks achieve this by computing a prototype (mean embedding) for each class and classifying query samples based on their distances to these prototypes in the embedding space.
+> You can access the full documentation here: [gitbook](https://lif31up.gitbook.io/lif31up/meta-learning/prototypical-networks-for-few-shot-learning)
+> You can access the test result on colab here: [colab](https://colab.research.google.com/drive/1gsVtGvISCpXQZsKvFjLVocn89ovazusE?usp=sharing)
 ## Instruction
 Organize your dataset into a structure compatible with PyTorch's ImageFolder:
 ```
 python run.py train --dataset_path path/to/your/dataset --save_to /path/to/save/model --n_way 5 --k_shot 2 --n_query 4 --epochs 1 --iters 4
 ```
+* `dataset`: Path to your dataset.
 * `save_to`: path to save the trained model.
 * `n_way`: number of classes in each episode.
 * `k_shot`: Number of support samples per class.
 * `n-_query`: Number of query samples per class.
+> change training configuration from `config.py`
 ### Evaluation
 ```
 python run.py --path path/to/your/dataset --model path/to/saved/model.pth --n_way 5
 ```
+* `dataset`: Path to your dataset.
 * `model`: Path to your model.
 ### Download Omniglot Dataset
 ```
 * **Embedding Representation with CNN**: Each input image is passed through a convolutional encoder to obtain a feature embedding.
 * **Prototype Computation**: The prototype for each class is computed as the mean of the embeddings of support samples belonging to that class.
 * **Distance-Based Classification**: Query samples are classified based on the distance (using `torch.cdist`) to the nearest prototype.
+* **Optimization**: The network is trained to minimize the distance between query samples and their correct prototypes while maximizing the distance to incorrect ones.

config.py ADDED Viewed

	@@ -0,0 +1,8 @@

+HYPERPARAMETER_CONFIG = {
+  "lr": 0.001,
+  "weight_decay": 0.0001
+} # HYPERPARAMETER_CONFIG
+TRAINING_CONFIG = {
+  "iters": 10,
+  "epochs": 10,
+} # TRAINING_CONFIG

run.py CHANGED Viewed

@@ -1,33 +1,30 @@
 import argparse
-import src.train as train
-import src.eval as eval
 import torchvision as tv
 def main():
   # eval(default)
   parser = argparse.ArgumentParser(description="Few-shot learning using Prototypical Network")
-  parser.add_argument("--path", type=str, help="path of your model")
   parser.add_argument("--model", type=str, help="path of your model")
-  parser.add_argument("--n_way", type=int, help="number of classes per episode")
   # train
   subparser = parser.add_subparsers(title="subcommands", dest="subcommand")
   parser_train = subparser.add_parser("train", help="train your model")
-  parser_train.add_argument("--path", type=str, help="path to your dataset")
   parser_train.add_argument("--save_to", type=str, help="path to save your model")
   parser_train.add_argument("--n_way", type=int, help="number of classes per episode")
   parser_train.add_argument("--k_shot", type=int, help="number of support samples per class")
   parser_train.add_argument("--n_query", type=int, help="number of query samples per class")
   parser_train.add_argument("--iters", type=int, help="how much iteration your model does for an episode")
   parser_train.add_argument("--epochs", type=int, help="how much epochs your model does for training")
-  parser_train.set_defaults(func=lambda kwargs: train.main(
-    path=kwargs.dataset_path,
-    save_to=kwargs.save_to,
-    n_way=kwargs.n_way,
-    k_shot=kwargs.k_shot,
-    n_query=kwargs.n_query,
-    iters=kwargs.iters,
-    epochs=kwargs.epochs)
   ) # parser_train.set_defaults()
   # download dataset
@@ -35,10 +32,10 @@ def main():
   parser_download.add_argument("--path", type=str, help="path to download dataset")
   parser_download.set_defaults(func=lambda kwargs: tv.datasets.Omniglot(root=kwargs.path, background=True, download=True))
   args = parser.parse_args()
   if hasattr(args, 'func'): args.func(args)
-  elif args.path and args.model: eval.main(model=args.path, path=args.model, n_way=args.n_way)
-  else: print("invalid argument. exiting program.")
 # main():
 if __name__ == "__main__": main()

 import argparse
 import torchvision as tv
+from src.train import train
+from src.evaluate import evaluate
 def main():
   # eval(default)
   parser = argparse.ArgumentParser(description="Few-shot learning using Prototypical Network")
   parser.add_argument("--model", type=str, help="path of your model")
+  parser.add_argument("--dataset", type=str, help="path of your dataset")
   # train
   subparser = parser.add_subparsers(title="subcommands", dest="subcommand")
   parser_train = subparser.add_parser("train", help="train your model")
+  parser_train.add_argument("--dataset", type=str, help="path to your dataset")
   parser_train.add_argument("--save_to", type=str, help="path to save your model")
   parser_train.add_argument("--n_way", type=int, help="number of classes per episode")
   parser_train.add_argument("--k_shot", type=int, help="number of support samples per class")
   parser_train.add_argument("--n_query", type=int, help="number of query samples per class")
   parser_train.add_argument("--iters", type=int, help="how much iteration your model does for an episode")
   parser_train.add_argument("--epochs", type=int, help="how much epochs your model does for training")
+  parser_train.set_defaults(func=lambda kwargs: train(
+    DATASET=kwargs.dataset,
+    SAVE_TO=kwargs.save_to,
+    N_WAY=kwargs.n_way,
+    K_SHOT=kwargs.k_shot,
+    N_QUERY=kwargs.n_query)
   ) # parser_train.set_defaults()
   # download dataset
   parser_download.add_argument("--path", type=str, help="path to download dataset")
   parser_download.set_defaults(func=lambda kwargs: tv.datasets.Omniglot(root=kwargs.path, background=True, download=True))
+  # parse logic
   args = parser.parse_args()
   if hasattr(args, 'func'): args.func(args)
+  else: evaluate(MODEL=args.model, DATASET=args.dataset)
 # main():
 if __name__ == "__main__": main()

src/FewShotEpisoder.py CHANGED Viewed

@@ -7,7 +7,7 @@ import torch.nn.functional as F
 class FewShotDataset(Dataset):
   """ A custom Dataset class for Few-Shot Learning tasks.
     This dataset can operate in two modes: "support" (for prototype calculation) and "query" (for evaluation). """
-  def __init__(self, dataset: typing.Iterable, indices: list, classes: list, transform:typing.Callable, mode="support"):
     """ Args:
         dataset (list): List of (feature, label) pairs.
         indices (list): List of indices to be used for the dataset.
@@ -59,6 +59,8 @@ class FewShotEpisoder:
     indices_c = {label: [] for label in range(len(self.classes))}
     for index, (_, label) in enumerate(self.dataset):
       if label in self.classes: indices_c[label].append(index)
     return indices_c
   # get_indices():

 class FewShotDataset(Dataset):
   """ A custom Dataset class for Few-Shot Learning tasks.
     This dataset can operate in two modes: "support" (for prototype calculation) and "query" (for evaluation). """
+  def __init__(self, dataset, indices: list, classes: list, transform:typing.Callable, mode="support"):
     """ Args:
         dataset (list): List of (feature, label) pairs.
         indices (list): List of indices to be used for the dataset.
     indices_c = {label: [] for label in range(len(self.classes))}
     for index, (_, label) in enumerate(self.dataset):
       if label in self.classes: indices_c[label].append(index)
+    for label, _indices_c in indices_c.items():
+      indices_c[label] = random.sample(_indices_c, self.k_shot + self.n_query)
     return indices_c
   # get_indices():

src/{eval.py → evaluate.py} RENAMED Viewed

@@ -1,25 +1,25 @@
 import torch
 from torch import nn
 from torch.utils.data import DataLoader
-from src.FewShotEpisoder import FewShotEpisoder
-from src.model.ProtoNet import ProtoNet
 import torchvision as tv
-def main(model: str, path: str, n_way=5):
-  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
   # load model
-  data = torch.load(model)
-  state = data["state"]
-  transform = data["transform"]
-  model = ProtoNet().to(device)
-  model.load_state_dict(state)
   model.eval()
   # create FSL episode generator
-  imageset = tv.datasets.ImageFolder(root=path)
-  chosen_classes = list(imageset.class_to_idx.values())[:n_way]
-  episoder = FewShotEpisoder(imageset, chosen_classes, 2, 1, transform)
   # compute prototype from support examples
   support_set, query_set = episoder.get_episode()
@@ -40,7 +40,7 @@ def main(model: str, path: str, n_way=5):
     loss = criterion(pred, label)
     total_loss += loss.item()
     if torch.argmax(pred) == torch.argmax(label): count += 1
-  print(f"loss: {total_loss / len(query_set):.4f} accuracy: {count / n_problem:.4f}({count}/{n_problem})")
 # main()
-if __name__ == "__main__": main("./model/model.pth", "../data/raw/omniglot-py/images_background/Futurama")

 import torch
 from torch import nn
 from torch.utils.data import DataLoader
 import torchvision as tv
+from src.model.ProtoNet import ProtoNet
+from src.FewShotEpisoder import FewShotEpisoder
+def evaluate(MODEL: str, DATASET: str):
+  device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # select device
+  # load model
+  data = torch.load(MODEL)
+  n_way, k_shot, n_query = data["framework"]
   # load model
+  model = ProtoNet(*data["model_config"].values()).to(device)
+  model.load_state_dict(data["state"])
   model.eval()
   # create FSL episode generator
+  imageset = tv.datasets.ImageFolder(root=DATASET)
+  episoder = FewShotEpisoder(imageset, data["chosen_classes"], k_shot, n_query, data["transform"])
   # compute prototype from support examples
   support_set, query_set = episoder.get_episode()
     loss = criterion(pred, label)
     total_loss += loss.item()
     if torch.argmax(pred) == torch.argmax(label): count += 1
+  print(f"accuracy: {count / n_problem:.4f}({count}/{n_problem})")
 # main()
+if __name__ == "__main__": evaluate("./model/model.pth", "../data/omniglot-py/images_background/Futurama")

src/model/ProtoNet.py CHANGED Viewed

@@ -1,13 +1,13 @@
 import torch
 from torch import nn
-import torch.nn.functional as F
 class ProtoNet(nn.Module):
-  def __init__(self, in_channels=3, hidden_channel=26):
     super(ProtoNet, self).__init__()
     self.conv1 = nn.Conv2d(in_channels, hidden_channel, kernel_size=3, stride=1, padding=1)
     self.conv2 = nn.Conv2d(hidden_channel, hidden_channel, kernel_size=3, stride=1, padding=1)
-    self.conv3 = nn.Conv2d(hidden_channel, in_channels, kernel_size=3, stride=1, padding=1)
     self.relu = nn.ReLU()
     self.flatten = nn.Flatten()
     self.softmax = nn.LogSoftmax(dim=1)
@@ -21,7 +21,7 @@ class ProtoNet(nn.Module):
     if metric == "euclidean":
       dists = torch.cdist(x, self.prototypes, p=2)  # L2 distance
     elif metric == "cosine":
-      dists = 1 - F.cosine_similarity(x.unsqueeze(1), self.prototypes.unsqueeze(0), dim=2)  # 1 - cosine similarity
     else:
       raise ValueError("Unsupported distance metric. Choose 'euclidean' or 'cosine'.")
     return dists

 import torch
 from torch import nn
+import torch.nn.functional as torch_f
 class ProtoNet(nn.Module):
+  def __init__(self, in_channels=3, hidden_channel=26, output_channel=3):
     super(ProtoNet, self).__init__()
     self.conv1 = nn.Conv2d(in_channels, hidden_channel, kernel_size=3, stride=1, padding=1)
     self.conv2 = nn.Conv2d(hidden_channel, hidden_channel, kernel_size=3, stride=1, padding=1)
+    self.conv3 = nn.Conv2d(hidden_channel, output_channel, kernel_size=3, stride=1, padding=1)
     self.relu = nn.ReLU()
     self.flatten = nn.Flatten()
     self.softmax = nn.LogSoftmax(dim=1)
     if metric == "euclidean":
       dists = torch.cdist(x, self.prototypes, p=2)  # L2 distance
     elif metric == "cosine":
+      dists = 1 - torch_f.cosine_similarity(x.unsqueeze(1), self.prototypes.unsqueeze(0), dim=2)  # 1 - cosine similarity
     else:
       raise ValueError("Unsupported distance metric. Choose 'euclidean' or 'cosine'.")
     return dists

src/train.py CHANGED Viewed

@@ -1,32 +1,37 @@
 import torch.cuda
 import torchvision as tv
 from torch import nn
 from torch.utils.data import DataLoader
 from src.FewShotEpisoder import FewShotEpisoder
 from src.model.ProtoNet import ProtoNet
-from tqdm import tqdm
-def main(path, save_to, n_way=5, k_shot=5, n_query=2, iters=10, epochs=1):
   device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # init device
-  # create FSL episode generator
   transform = tv.transforms.Compose([
     tv.transforms.Resize((224, 224)),
     tv.transforms.ToTensor(),
     tv.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
   ]) # transform
-  imageset = tv.datasets.ImageFolder(root=path)
-  chosen_classes = list(imageset.class_to_idx.values())[:n_way]
-  episoder = FewShotEpisoder(imageset, chosen_classes, k_shot, n_query, transform)
-  # init learning
-  model = ProtoNet(3).to(device)
-  optim = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.01)
   criterion = nn.CrossEntropyLoss()
-  for _ in tqdm(range(epochs), desc="epochs/episodes"):
     support_set, query_set = episoder.get_episode()
-    # compute prototype from support examples
     prototypes = list()
     embedded_features_list = [[] for _ in range(len(support_set.classes))]
     for embedded_feature, label in support_set: embedded_features_list[label].append(embedded_feature)
@@ -36,23 +41,32 @@ def main(path, save_to, n_way=5, k_shot=5, n_query=2, iters=10, epochs=1):
     # for
     prototypes = torch.stack(prototypes)
     model.prototyping(prototypes)
-    for _ in tqdm(range(iters), desc="\titerations/queries"):
-      total_loss = 0.0
       for feature, label in DataLoader(query_set, shuffle=True):
         loss = criterion(model.forward(feature), label)
-        total_loss += loss.item()
         optim.zero_grad()
         loss.backward()
         optim.step()
-      print(f"loss: {total_loss / len(query_set):.4f}")
-  # for for for
   # saving the model's parameters and the other data
   features = {
     "state": model.state_dict(),
     "transform": transform,
   }  # features
-  torch.save(features, save_to)
 # main()
-if __name__ == "__main__": main(path="../data/raw/omniglot-py/images_background/Futurama", save_to="./model/model.pth")

 import torch.cuda
 import torchvision as tv
 from torch import nn
+from tqdm import tqdm
 from torch.utils.data import DataLoader
 from src.FewShotEpisoder import FewShotEpisoder
 from src.model.ProtoNet import ProtoNet
+from config import TRAINING_CONFIG, HYPERPARAMETER_CONFIG
+def train(DATASET:str, SAVE_TO:str, N_WAY:int, K_SHOT:int, N_QUERY:int, IETRS=TRAINING_CONFIG["iters"], EPOCHS=TRAINING_CONFIG["epochs"]):
   device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # init device
+  # define transform
   transform = tv.transforms.Compose([
     tv.transforms.Resize((224, 224)),
     tv.transforms.ToTensor(),
     tv.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
   ]) # transform
+  # init episode generator
+  imageset = tv.datasets.ImageFolder(root=DATASET)
+  chosen_classes = list(imageset.class_to_idx.values())[:N_WAY]
+  episoder = FewShotEpisoder(imageset, chosen_classes, K_SHOT, N_QUERY, transform)
+  # init model
+  model_config = {"in_channels": 3, "hidden_channels": 26, "output_channels": 3}
+  model = ProtoNet(*model_config.values()).to(device)
+  optim = torch.optim.Adam(model.parameters(), lr=HYPERPARAMETER_CONFIG["lr"], weight_decay=HYPERPARAMETER_CONFIG["weight_decay"])
   criterion = nn.CrossEntropyLoss()
+  progress_bar, whole_loss = tqdm(range(EPOCHS)), float()
+  for _ in progress_bar:
     support_set, query_set = episoder.get_episode()
+    # STAGE1: compute prototype from support examples
     prototypes = list()
     embedded_features_list = [[] for _ in range(len(support_set.classes))]
     for embedded_feature, label in support_set: embedded_features_list[label].append(embedded_feature)
     # for
     prototypes = torch.stack(prototypes)
     model.prototyping(prototypes)
+    # STAGE2: update parameters form loss associated with prototypes
+    epochs_loss = 0.0
+    for _ in range(IETRS):
+      iter_loss = 0.0
       for feature, label in DataLoader(query_set, shuffle=True):
         loss = criterion(model.forward(feature), label)
+        iter_loss += loss.item()
         optim.zero_grad()
         loss.backward()
         optim.step()
+      epochs_loss += iter_loss / len(query_set)
+    # for # for
+    epochs_loss = epochs_loss / IETRS
+    progress_bar.set_postfix(loss=epochs_loss)
+  # for
   # saving the model's parameters and the other data
   features = {
     "state": model.state_dict(),
+    "model_config": model_config,
     "transform": transform,
+    "chosen_classes": chosen_classes,
+    "framework": (N_WAY, K_SHOT, N_QUERY)
   }  # features
+  torch.save(features, SAVE_TO)
+  print(f"model save to {SAVE_TO}")
 # main()
+if __name__ == "__main__": train("../data/omniglot-py/images_background/Futurama", "./model/model.pth", 5, 5, 2, 5, 5)