| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371 |
- import os
- import torch
- import clip
- import pandas as pd
- import plotly.express as px
- import numpy as np
- import matplotlib.pyplot as plt
- from PIL import Image
- from torchvision import transforms
- from sklearn.manifold import TSNE
- from tqdm import tqdm
- from sklearn.decomposition import PCA
- import warnings
- warnings.filterwarnings('ignore')
-
-
- # Ensure models are in evaluation mode and use GPU if available
- device = "cuda" if torch.cuda.is_available() else "cpu"
- print(device)
-
- # Load DINOv2 model and CLIP model
- dino_variant = "dinov2_vitl14" # Change to the desired DINO variant
- dino_model = torch.hub.load('facebookresearch/dinov2', dino_variant)
- dino_model.eval().to(device)
- clip_model, _ = clip.load("ViT-L/14", device=device, jit=False)
- clip_model.eval().to(device)
-
- # Define preprocessing transformations
- transform = transforms.Compose([
- transforms.Resize((224, 224)),
- transforms.ToTensor(),
- transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
- ])
-
-
-
- def load_finetuned_dino(model_variant, state_dict_path):
- # Load the base DINO model
- model = torch.hub.load('facebookresearch/dinov2', model_variant)
- model.eval().to(device)
- state_dict = torch.load(state_dict_path, map_location=device)
- model.load_state_dict(state_dict, strict=False)
- print(f"Loaded fine-tuned weights from {state_dict_path}")
-
- return model
-
-
- # Add Gaussian noise
- def add_noise(image, std_dev=0.1):
- noise = torch.randn_like(image) * std_dev
- return image + noise
-
-
- # Extract embeddings using the DINO model
- def extract_dino_embeddings(image_path, model, add_noise_flag=False, noise_std=0.1):
- image = Image.open(image_path).convert("RGB")
- image_tensor = transform(image).unsqueeze(0).to(device)
-
- with torch.no_grad():
- if add_noise_flag:
- noisy_image_tensor = add_noise(image_tensor, std_dev=noise_std)
- embedding = model(noisy_image_tensor).squeeze()
- else:
- embedding = model(image_tensor).squeeze()
-
- return embedding.cpu().numpy()
-
-
- # Extract embeddings using the CLIP model
- def extract_clip_embeddings(image_path, model):
- image = Image.open(image_path).convert("RGB")
- image_tensor = transform(image).unsqueeze(0).to(device)
-
- with torch.no_grad():
- embedding = model.encode_image(image_tensor).squeeze()
-
- return embedding.cpu().numpy()
-
-
- # Extract embeddings for all images in a folder
- def extract_all_embeddings(folder_path, model, method, add_noise_flag=False, noise_std=0.1):
- embeddings = []
- for image_name in tqdm(os.listdir(folder_path), desc=f"Processing {folder_path}"):
- image_path = os.path.join(folder_path, image_name)
- if os.path.isfile(image_path):
- if method == "dino":
- embedding = extract_dino_embeddings(image_path, model, add_noise_flag, noise_std)
- elif method == "clip":
- embedding = extract_clip_embeddings(image_path, model)
- embeddings.append(embedding)
- return np.array(embeddings)
-
-
- # Visualize embeddings using t-SNE
- def visualize_embeddings_2d(real_embeddings, fake_embeddings, title, out_path, perplexity=40):
- all_embeddings = np.vstack([real_embeddings, fake_embeddings])
- labels = ["Real"] * len(real_embeddings) + ["Fake"] * len(fake_embeddings)
-
- tsne = TSNE(n_components=2, random_state=42, perplexity=perplexity)
- tsne_results = tsne.fit_transform(all_embeddings)
-
- plt.figure(figsize=(10, 8))
- for label, color in zip(["Real", "Fake"], ["blue", "red"]):
- indices = [i for i, l in enumerate(labels) if l == label]
- plt.scatter(tsne_results[indices, 0], tsne_results[indices, 1], label=label, alpha=0.7, s=50, c=color)
-
- plt.title(title)
- plt.legend()
- plt.savefig(out_path)
- plt.close()
-
-
- def visualize_embeddings_3d(real_embeddings, fake_embeddings, title, out_path, perplexity=40):
- all_embeddings = np.vstack([real_embeddings, fake_embeddings])
- labels = ["Real"] * len(real_embeddings) + ["Fake"] * len(fake_embeddings)
-
- tsne = TSNE(n_components=3, random_state=42, perplexity=perplexity)
- tsne_results = tsne.fit_transform(all_embeddings)
-
- # Create a DataFrame for Plotly
- tsne_df = pd.DataFrame(tsne_results, columns=['x', 'y', 'z'])
- tsne_df['label'] = labels
-
- # Create the 3D scatter plot
- fig = px.scatter_3d(tsne_df, x='x', y='y', z='z', color='label', title=title,
- color_discrete_map={"Real": "blue", "Fake": "red"},
- labels={'label': 'Image Type'})
-
- fig.update_traces(marker=dict(size=5, opacity=0.8), selector=dict(mode='markers'))
- # Save the figure as an interactive HTML file
- fig.write_html(out_path)
- print(f"3D plot saved as {out_path}")
- fig.data = None
- fig.layout = None
-
-
- def visualize_embeddings_4dirs_3d(real_train_folder, fake_train_folder, real_test_folder, fake_test_folder, title, out_path, method, perplexity=50, finetuned_dino_path=None):
-
- # Use fine-tuned DINO if a path is provided
- if finetuned_dino_path:
- print("Using fine-tuned DINO model.")
- dino_model = load_finetuned_dino(dino_variant, finetuned_dino_path)
- dino_model.eval().to(device)
- else:
- dino_variant = "dinov2_vitl14" # Change to the desired DINO variant
- dino_model = torch.hub.load('facebookresearch/dinov2', dino_variant)
- dino_model.eval().to(device)
- print("Using original DINO model.")
-
- if 'clip' in method:
- # Extract embeddings for all folders CLIP
- real_train_embeddings = extract_all_embeddings(real_train_folder, clip_model, method="clip")
- fake_train_embeddings = extract_all_embeddings(fake_train_folder, clip_model, method="clip")
- real_test_embeddings = extract_all_embeddings(real_test_folder, clip_model, method="clip")
- fake_test_embeddings = extract_all_embeddings(fake_test_folder, clip_model, method="clip")
-
- if 'dino' in method:
- # Extract embeddings for all folders DINO
- real_train_embeddings = extract_all_embeddings(real_train_folder, dino_model, method="dino")
- fake_train_embeddings = extract_all_embeddings(fake_train_folder, dino_model, method="dino")
- real_test_embeddings = extract_all_embeddings(real_test_folder, dino_model, method="dino")
- fake_test_embeddings = extract_all_embeddings(fake_test_folder, dino_model, method="dino")
-
- if 'concat' in method:
- # CLIP
- clip_real_train_embeddings = extract_all_embeddings(real_train_folder, clip_model, method="clip")
- clip_fake_train_embeddings = extract_all_embeddings(fake_train_folder, clip_model, method="clip")
- clip_real_test_embeddings = extract_all_embeddings(real_test_folder, clip_model, method="clip")
- clip_fake_test_embeddings = extract_all_embeddings(fake_test_folder, clip_model, method="clip")
-
- # DINO
- dino_real_train_embeddings = extract_all_embeddings(real_train_folder, dino_model, method="dino")
- dino_fake_train_embeddings = extract_all_embeddings(fake_train_folder, dino_model, method="dino")
- dino_real_test_embeddings = extract_all_embeddings(real_test_folder, dino_model, method="dino")
- dino_fake_test_embeddings = extract_all_embeddings(fake_test_folder, dino_model, method="dino")
-
- real_train_embeddings = dino_clip_concat(clip_real_train_embeddings, dino_real_train_embeddings)
- fake_train_embeddings = dino_clip_concat(clip_fake_train_embeddings, dino_fake_train_embeddings)
- real_test_embeddings = dino_clip_concat(clip_real_test_embeddings, dino_real_test_embeddings)
- fake_test_embeddings = dino_clip_concat(clip_fake_test_embeddings, dino_fake_test_embeddings)
-
-
-
- # Combine embeddings and create labels
- all_embeddings = np.vstack([
- real_train_embeddings,
- fake_train_embeddings,
- real_test_embeddings,
- fake_test_embeddings
- ])
-
- labels = (["Real Train"] * len(real_train_embeddings) +
- ["Fake Train"] * len(fake_train_embeddings) +
- ["Real Test"] * len(real_test_embeddings) +
- ["Fake Test"] * len(fake_test_embeddings))
-
- # Perform t-SNE on the combined embeddings
- tsne = TSNE(n_components=3, random_state=42, perplexity=perplexity)
- tsne_results = tsne.fit_transform(all_embeddings)
-
- # Create a DataFrame for Plotly
- tsne_df = pd.DataFrame(tsne_results, columns=['x', 'y', 'z'])
- tsne_df['label'] = labels
-
- # Create the 3D scatter plot
- fig = px.scatter_3d(tsne_df, x='x', y='y', z='z', color='label', title=title,
- color_discrete_map={
- "Real Train": "blue",
- "Fake Train": "red",
- "Real Test": "green",
- "Fake Test": "orange"
- },
- labels={'label': 'Image Type'})
-
- fig.update_traces(marker=dict(size=5, opacity=0.8), selector=dict(mode='markers'))
- # Save the figure as an interactive HTML file
- fig.write_html(out_path)
- print(f"3D plot saved as {out_path}")
- fig.data = None
- fig.layout = None
-
-
- # Compute cosine similarities between original and noisy embeddings
- def compute_cosine_similarities(folder_path, model, noise_std=0.1):
- similarities = []
- labels = [] # 0 for real, 1 for fake
- for image_name in tqdm(os.listdir(folder_path), desc=f"Processing {folder_path} for Cosine Similarities"):
- image_path = os.path.join(folder_path, image_name)
- if os.path.isfile(image_path):
- original_embedding = extract_dino_embeddings(image_path, model, add_noise_flag=False, noise_std=noise_std)
- noisy_embedding = extract_dino_embeddings(image_path, model, add_noise_flag=True, noise_std=noise_std)
- similarity = np.dot(original_embedding, noisy_embedding) / (
- np.linalg.norm(original_embedding) * np.linalg.norm(noisy_embedding)
- )
- similarities.append(similarity)
- return np.array(similarities)
-
-
- # Visualize cosine similarities using t-SNE
- def visualize_cosine_similarities(real_similarities, fake_similarities, output_path, title, perplexity=40):
- all_similarities = np.hstack([real_similarities, fake_similarities])
- labels = ["Real"] * len(real_similarities) + ["Fake"] * len(fake_similarities)
-
- tsne = TSNE(n_components=1, perplexity=perplexity, random_state=42)
- tsne_results = tsne.fit_transform(all_similarities.reshape(-1, 1))
-
- plt.figure(figsize=(10, 8))
- for label, color in zip(["Real", "Fake"], ["blue", "red"]):
- indices = [i for i, l in enumerate(labels) if l == label]
- plt.scatter(tsne_results[indices, 0], np.zeros_like(tsne_results[indices, 0]), label=label, alpha=0.7, s=50, c=color)
-
- plt.title(title)
- plt.legend()
- plt.savefig(output_path)
- plt.close()
-
-
- def align_embeddings(clip_embeddings, dino_embeddings):
- if clip_embeddings.shape[1] < dino_embeddings.shape[1]:
- pca = PCA(n_components=clip_embeddings.shape[1])
- dino_embeddings_aligned = pca.fit_transform(dino_embeddings)
- return clip_embeddings, dino_embeddings_aligned
- elif clip_embeddings.shape[1] > dino_embeddings.shape[1]:
- pca = PCA(n_components=dino_embeddings.shape[1])
- clip_embeddings_aligned = pca.fit_transform(clip_embeddings)
- return clip_embeddings_aligned, dino_embeddings
- else:
- return clip_embeddings, dino_embeddings
-
-
- # Function to compute weighted concatenation of embeddings
- def dino_clip_concat(clip_embeddings, dino_embeddings, clip_weight=0.5, dino_weight=0.5):
- clip_embeddings, dino_embeddings = align_embeddings(clip_embeddings, dino_embeddings)
- return clip_weight * clip_embeddings + dino_weight * dino_embeddings
-
-
- def tsne_visualize(real_folder, fake_folder, output_dir, finetuned_dino_path=None, dim=2):
- os.makedirs(output_dir, exist_ok=True)
-
- # Use fine-tuned DINO if a path is provided
- if finetuned_dino_path:
- print("Using fine-tuned DINO model.")
- dino_model = load_finetuned_dino(dino_variant, finetuned_dino_path)
- dino_model.eval().to(device)
- else:
- dino_variant = "dinov2_vitl14" # Change to the desired DINO variant
- dino_model = torch.hub.load('facebookresearch/dinov2', dino_variant)
- dino_model.eval().to(device)
- print("Using original DINO model.")
-
-
- if dim == 2:
- visualize_embeddings = visualize_embeddings_2d
- file_type = 'png'
- elif dim == 3:
- visualize_embeddings = visualize_embeddings_3d
- file_type = 'html'
-
-
- # CLIP-VITL14 embeddings
- real_clip_embeddings = extract_all_embeddings(real_folder, clip_model, method="clip")
- fake_clip_embeddings = extract_all_embeddings(fake_folder, clip_model, method="clip")
- visualize_embeddings(real_clip_embeddings, fake_clip_embeddings, "CLIP-VITL14 Embeddings", os.path.join(output_dir, f"{dim}d_clip_embeddings.{file_type}"))
-
- # DINOv2-VITL14 embeddings
- real_dino_embeddings = extract_all_embeddings(real_folder, dino_model, method="dino")
- fake_dino_embeddings = extract_all_embeddings(fake_folder, dino_model, method="dino")
- visualize_embeddings(real_dino_embeddings, fake_dino_embeddings, "DINOv2-VITL14 Embeddings", os.path.join(output_dir, f"{dim}d_dino_embeddings.{file_type}"))
-
- # Weighted concatenation of CLIP and DINO embeddings
- real_combined_embeddings = dino_clip_concat(real_clip_embeddings, real_dino_embeddings)
- fake_combined_embeddings = dino_clip_concat(fake_clip_embeddings, fake_dino_embeddings)
- visualize_embeddings(real_combined_embeddings, fake_combined_embeddings, "CLIP+DINO Combined Embeddings", os.path.join(output_dir, f"{dim}d_clip_dino_embeddings.{file_type}"))
-
- # # RIGID: DINOv2 original vs noisy embeddings (real)
- # real_dino_noisy_embeddings = extract_all_embeddings(real_folder, dino_model, method="dino", add_noise_flag=True)
- # visualize_embeddings(real_dino_embeddings, real_dino_noisy_embeddings, "Real: DINOv2 Original vs Noisy", os.path.join(output_dir, "real_dino_noisy.{file_type}"))
-
- # # RIGID: DINOv2 original vs noisy embeddings (fake)
- # fake_dino_noisy_embeddings = extract_all_embeddings(fake_folder, dino_model, method="dino", add_noise_flag=True)
- # visualize_embeddings(fake_dino_embeddings, fake_dino_noisy_embeddings, "Fake: DINOv2 Original vs Noisy", os.path.join(output_dir, "fake_dino_noisy.{file_type}"))
-
-
- # # RIGID: Cosine similarities
- # real_cosine_similarities = compute_cosine_similarities(real_folder, dino_model)
- # fake_cosine_similarities = compute_cosine_similarities(fake_folder, dino_model)
- # visualize_cosine_similarities(real_cosine_similarities, fake_cosine_similarities, os.path.join(output_dir, "cosine_similarities.png"), "Cosine Similarities of Real vs Fake")
-
-
-
- if __name__ == "__main__":
- # fake_test_folder = "/media/external_16TB_1/amirtaha_amanzadi/datasets/GenImage-tiny-all/1_fake"
- # real_test_folder = "/media/external_16TB_1/amirtaha_amanzadi/datasets/GenImage-tiny-all/0_real"
-
- # fake_folder = "../../datasets/GenImage-tiny-all/1_fake"
- # real_folder = "../../datasets/GenImage-tiny-all/0_real"
-
- # fake_folder = "../../datasets/ArtiFact_test_small/1_fake"
- # real_folder = "../../datasets/ArtiFact_test_small/0_real"
-
- # fake_train_folder = "/media/external_16TB_1/amirtaha_amanzadi/datasets/IMAGINET_train_all/1_fake"
- # real_train_folder = "/media/external_16TB_1/amirtaha_amanzadi/datasets/IMAGINET_train_all/0_real"
-
- fake_folder = "/media/external_16TB_1/amirtaha_amanzadi/datasets/GenImage-tiny-all/imagenet_all/val/1_fake"
- real_folder = "/media/external_16TB_1/amirtaha_amanzadi/datasets/GenImage-tiny-all/imagenet_all/val/0_real"
-
- output_dir = "./TSNE/GenImage-tiny" # Replace with the desired output directory for plots
- # tsne_visualize(real_folder, fake_folder, output_dir, finetuned_dino_path='./saved_models/dino_ep_17_acc_0.5598_ap_0.5360.pth')
-
- # tsne_visualize(real_folder, fake_folder, output_dir, dim=2)
- tsne_visualize(real_folder, fake_folder, output_dir, dim=3)
-
-
- # visualize_embeddings_4dirs_3d(
- # real_train_folder, fake_train_folder, real_test_folder, fake_test_folder,
- # title="3D t-SNE for Real/Fake Train and Test Data",
- # out_path=os.path.join(output_dir, "3d_clip_ft.html"),
- # method='clip'
- # )
- # visualize_embeddings_4dirs_3d(
- # real_train_folder, fake_train_folder, real_test_folder, fake_test_folder,
- # title="3D t-SNE for Real/Fake Train and Test Data",
- # out_path=os.path.join(output_dir, "3d_dino_ft.html"),
- # method='dino'
- # )
- # visualize_embeddings_4dirs_3d(
- # real_train_folder, fake_train_folder, real_test_folder, fake_test_folder,
- # title="3D t-SNE for Real/Fake Train and Test Data",
- # out_path=os.path.join(output_dir, "3d_clip_dino_ft.html"),
- # method='concat'
- # )
|