python – How to use tensorboard Embedding Projector using Pytorch with custom dataset and custom model


Currently im doing image embedding visualisation and I want to use Tensorboard Projector PCA and T-SNE to see the image embedding similarity. I follow a website code to do the visualisation but I am unable to make the expected visualisation where the same images should clump together but it just show up different images clumping together. I don’t use labels or metadata as I’m doing unsupervised learning. For PCA i got random images together in a circle but for T-SNE I shows nothing just 0 or it stuck at loading and will make my Google Colab not responding.

for PCA

for TSNE

I use this source as my guide to do the Tensorboard Embedding visualisation
https://medium.com/@kumon/visualizing-image-feature-vectors-through-tensorboard-b850ce1be7f1
however he use TensorFlow and a pretrained model MobileNet.

I am using Pytorch so I modify the coding from the website to be used in Pytorch. The model I use also is a custom model that is the Siamese Network that is already trained to get feature image embedding dimension 250 and saved as a weight model path.

From the website, what is needed for the projector is a sprite.jpg, feature_vecs.tsv and the projector-config.pbtxt so the projector will work.

This code I use to get the sprite.jpg. My custom dataset is in a rarfile with 14056 images.

import os
import rarfile
from PIL import Image
import torch
from torchvision import transforms
from torchvision.utils import make_grid

# Path to your rar file
rar_file_path="/content/drive/MyDrive/DATASET-V1.rar"

# Directory to extract the images
extracted_folder="/content/extracted_images/"

# Create a folder to extract the images if it doesn't exist
os.makedirs(extracted_folder, exist_ok=True)

# Extract the contents of the rar file
with rarfile.RarFile(rar_file_path, 'r') as rar_ref:
    rar_ref.extractall(extracted_folder)

# List all image files in the extracted directory including subfolders
image_files = []
for root, dirs, files in os.walk(extracted_folder):
    for file in files:
        if file.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp')):
            image_files.append(os.path.join(root, file))

if len(image_files) == 0:
    print("No image files found in the extracted directory. Check the file extensions or the content of the extracted folder.")
else:
    # Define the number of images per row in the sprite image
    num_images_per_row = 100
    image_size = 50

    # Load and process the images using torchvision transforms
    transform = transforms.Compose([
        transforms.Resize((image_size, image_size)),
        transforms.ToTensor(),
    ])

    # Load and transform images
    images = []
    for image_file in image_files:
        try:
            img = Image.open(image_file).convert("RGB")
            img = transform(img)
            images.append(img)
        except Exception as e:
            print(f"Error loading {image_file}: {e}")

    if len(images) > 0:
        # Create a grid of images
        grid = make_grid(images, nrow=num_images_per_row)

        # Convert the PyTorch tensor to a PIL image
        sprite_image = transforms.ToPILImage()(grid)

        # Save the sprite image
        sprite_image.save('sprite.jpg')
    else:
        print("No images loaded successfully. Check for errors in image loading.")

        print("No images loaded successfully. Check for errors in image loading.")

Next is the code where I use to get the feature_vecs.tsv.

import os
import torch
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import numpy as np
import torch.nn as nn
import onnxruntime as ort
import rarfile
from io import BytesIO

# Path to your rar file
rar_file_path="/content/drive/MyDrive/DATASET-V1.rar"

# Directory to extract the images
extracted_folder="/content/extracted_images/"

# Create a folder to extract the images if it doesn't exist
os.makedirs(extracted_folder, exist_ok=True)

# Extract the contents of the rar file
with rarfile.RarFile(rar_file_path, 'r') as rar_ref:
    rar_ref.extractall(extracted_folder)

# Define the path to your Siamese network's encoder and optimizer weights
encoder_weight_path = "/content/drive/MyDrive/test_o/siam2/enc_weight_20231109022242.pth"
optimizer_weight_path = "/content/drive/MyDrive/test_o/siam2/optim_weight_20231110073800.pth"

# Initialize the Siamese network model and load the weights
class SiameseNetwork(nn.Module):

    def __init__(self):
        super(SiameseNetwork, self).__init__()

        # Setting up the Sequential of CNN Layers
        self.cnn1 = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=11,stride=4),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(3, stride=2),

            nn.Conv2d(96, 256, kernel_size=5, stride=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, stride=2),

            nn.Conv2d(256, 384, kernel_size=3,stride=1),
            nn.ReLU(inplace=True)
        )

        # Setting up the Fully Connected Layers
        self.fc1 = nn.Sequential(
            nn.Linear(384*11*11, 1024),
            nn.ReLU(inplace=True),

            nn.Linear(1024, 256),
            nn.ReLU(inplace=True),

            nn.Linear(256,250)
        )

    def forward_once(self, x):
        # This function will be called for both images
        # It's output is used to determine the similiarity
        output = self.cnn1(x)
        output = output.view(output.size()[0], -1)
        output = self.fc1(output)
        return output


siamese_model = SiameseNetwork()  # Replace with the Siamese network model

# Load the encoder and optimizer weights
siamese_model.load_state_dict(torch.load(encoder_weight_path, map_location=torch.device('cpu')))
#siamese_model.load_state_dict(torch.load(optimizer_weight_path,  map_location=torch.device('cpu') ))

# Load the weights to the Siamese model
#siamese_model.load_state_dict(encoder_weights)
#siamese_model.load_state_dict(optimizer_weights)

# Define the preprocessing transformations
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Define a custom dataset to load and preprocess the images from the extracted folder
class CustomDataset(Dataset):
    def __init__(self, image_dir, transform=None):
        self.image_dir = image_dir
        self.image_files = []
        for root, dirs, files in os.walk(image_dir):
            for file in files:
                if file.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp')):
                    self.image_files.append(os.path.join(root, file))
        self.transform = transform

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_path = self.image_files[idx]
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image

# Create a DataLoader for the dataset
dataset = CustomDataset(extracted_folder, transform)
dataloader = DataLoader(dataset, batch_size=10, shuffle=False)

# Initialize a list to store the extracted features
features = []

# Extract features for each batch of images using the Siamese network's encoder
for batch in dataloader:
    # Process a single batch of inputs
    input_data = batch.numpy()  # Convert PyTorch tensor to NumPy array

    # Forward pass to get the features
    with torch.no_grad():
        features_batch = siamese_model.forward_once(torch.tensor(input_data))

    # Convert PyTorch tensor to NumPy array
    features_batch = features_batch.cpu().numpy()

    # Append the features to the list
    features.append(features_batch)

# Concatenate the features along the batch dimension
features = np.concatenate(features)

# Save the extracted features to a file (e.g., feature_vecs2.tsv)
with open('feature_vecs2.tsv', 'w') as fw:
    np.savetxt(fw, features, delimiter="\t")

Next is the config file. For my projector_config.pbtxt I’ve edited it to have sprite.jpg and feature _vecs2.tsv. I upload the file into google colab.

projector config file

So with all the 3 files i have. I use it to load into tensorboard. I run the coding in Google Colab
the tensorbord code i use is:

%load_ext tensorboard
%tensorboard --logdir /content

for PCA

for TSNE

I expect to get 10 clumps of images from the visualisations but it shows all the images combine into a big sphere. I am unsure if I’m using the tensorboard in pytorch the right way as I am new to pytorch and tensorboard. I would like to know where do i need to modify the code to get the visualisation to show similar images in a clump. Does anyone know how to use tensorboard to project image embedding for checking the image similarity accuracy using pytorch?



Source link

Leave a Comment