import os import torch import clip import pandas as pd import plotly.express as px import numpy as np import matplotlib.pyplot as plt from PIL import Image from torchvision import transforms from sklearn.manifold import TSNE from tqdm import tqdm from sklearn.decomposition import PCA import warnings warnings.filterwarnings('ignore') # Ensure models are in evaluation mode and use GPU if available device = "cuda" if torch.cuda.is_available() else "cpu" print(device) # Load DINOv2 model and CLIP model dino_variant = "dinov2_vitl14" # Change to the desired DINO variant dino_model = torch.hub.load('facebookresearch/dinov2', dino_variant) dino_model.eval().to(device) clip_model, _ = clip.load("ViT-L/14", device=device, jit=False) clip_model.eval().to(device) # Define preprocessing transformations transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) def load_finetuned_dino(model_variant, state_dict_path): # Load the base DINO model model = torch.hub.load('facebookresearch/dinov2', model_variant) model.eval().to(device) state_dict = torch.load(state_dict_path, map_location=device) model.load_state_dict(state_dict, strict=False) print(f"Loaded fine-tuned weights from {state_dict_path}") return model # Add Gaussian noise def add_noise(image, std_dev=0.1): noise = torch.randn_like(image) * std_dev return image + noise # Extract embeddings using the DINO model def extract_dino_embeddings(image_path, model, add_noise_flag=False, noise_std=0.1): image = Image.open(image_path).convert("RGB") image_tensor = transform(image).unsqueeze(0).to(device) with torch.no_grad(): if add_noise_flag: noisy_image_tensor = add_noise(image_tensor, std_dev=noise_std) embedding = model(noisy_image_tensor).squeeze() else: embedding = model(image_tensor).squeeze() return embedding.cpu().numpy() # Extract embeddings using the CLIP model def extract_clip_embeddings(image_path, model): image = Image.open(image_path).convert("RGB") image_tensor = transform(image).unsqueeze(0).to(device) with torch.no_grad(): embedding = model.encode_image(image_tensor).squeeze() return embedding.cpu().numpy() # Extract embeddings for all images in a folder def extract_all_embeddings(folder_path, model, method, add_noise_flag=False, noise_std=0.1): embeddings = [] for image_name in tqdm(os.listdir(folder_path), desc=f"Processing {folder_path}"): image_path = os.path.join(folder_path, image_name) if os.path.isfile(image_path): if method == "dino": embedding = extract_dino_embeddings(image_path, model, add_noise_flag, noise_std) elif method == "clip": embedding = extract_clip_embeddings(image_path, model) embeddings.append(embedding) return np.array(embeddings) # Visualize embeddings using t-SNE def visualize_embeddings_2d(real_embeddings, fake_embeddings, title, out_path, perplexity=40): all_embeddings = np.vstack([real_embeddings, fake_embeddings]) labels = ["Real"] * len(real_embeddings) + ["Fake"] * len(fake_embeddings) tsne = TSNE(n_components=2, random_state=42, perplexity=perplexity) tsne_results = tsne.fit_transform(all_embeddings) plt.figure(figsize=(10, 8)) for label, color in zip(["Real", "Fake"], ["blue", "red"]): indices = [i for i, l in enumerate(labels) if l == label] plt.scatter(tsne_results[indices, 0], tsne_results[indices, 1], label=label, alpha=0.7, s=50, c=color) plt.title(title) plt.legend() plt.savefig(out_path) plt.close() def visualize_embeddings_3d(real_embeddings, fake_embeddings, title, out_path, perplexity=40): all_embeddings = np.vstack([real_embeddings, fake_embeddings]) labels = ["Real"] * len(real_embeddings) + ["Fake"] * len(fake_embeddings) tsne = TSNE(n_components=3, random_state=42, perplexity=perplexity) tsne_results = tsne.fit_transform(all_embeddings) # Create a DataFrame for Plotly tsne_df = pd.DataFrame(tsne_results, columns=['x', 'y', 'z']) tsne_df['label'] = labels # Create the 3D scatter plot fig = px.scatter_3d(tsne_df, x='x', y='y', z='z', color='label', title=title, color_discrete_map={"Real": "blue", "Fake": "red"}, labels={'label': 'Image Type'}) fig.update_traces(marker=dict(size=5, opacity=0.8), selector=dict(mode='markers')) # Save the figure as an interactive HTML file fig.write_html(out_path) print(f"3D plot saved as {out_path}") fig.data = None fig.layout = None def visualize_embeddings_4dirs_3d(real_train_folder, fake_train_folder, real_test_folder, fake_test_folder, title, out_path, method, perplexity=50, finetuned_dino_path=None): # Use fine-tuned DINO if a path is provided if finetuned_dino_path: print("Using fine-tuned DINO model.") dino_model = load_finetuned_dino(dino_variant, finetuned_dino_path) dino_model.eval().to(device) else: dino_variant = "dinov2_vitl14" # Change to the desired DINO variant dino_model = torch.hub.load('facebookresearch/dinov2', dino_variant) dino_model.eval().to(device) print("Using original DINO model.") if 'clip' in method: # Extract embeddings for all folders CLIP real_train_embeddings = extract_all_embeddings(real_train_folder, clip_model, method="clip") fake_train_embeddings = extract_all_embeddings(fake_train_folder, clip_model, method="clip") real_test_embeddings = extract_all_embeddings(real_test_folder, clip_model, method="clip") fake_test_embeddings = extract_all_embeddings(fake_test_folder, clip_model, method="clip") if 'dino' in method: # Extract embeddings for all folders DINO real_train_embeddings = extract_all_embeddings(real_train_folder, dino_model, method="dino") fake_train_embeddings = extract_all_embeddings(fake_train_folder, dino_model, method="dino") real_test_embeddings = extract_all_embeddings(real_test_folder, dino_model, method="dino") fake_test_embeddings = extract_all_embeddings(fake_test_folder, dino_model, method="dino") if 'concat' in method: # CLIP clip_real_train_embeddings = extract_all_embeddings(real_train_folder, clip_model, method="clip") clip_fake_train_embeddings = extract_all_embeddings(fake_train_folder, clip_model, method="clip") clip_real_test_embeddings = extract_all_embeddings(real_test_folder, clip_model, method="clip") clip_fake_test_embeddings = extract_all_embeddings(fake_test_folder, clip_model, method="clip") # DINO dino_real_train_embeddings = extract_all_embeddings(real_train_folder, dino_model, method="dino") dino_fake_train_embeddings = extract_all_embeddings(fake_train_folder, dino_model, method="dino") dino_real_test_embeddings = extract_all_embeddings(real_test_folder, dino_model, method="dino") dino_fake_test_embeddings = extract_all_embeddings(fake_test_folder, dino_model, method="dino") real_train_embeddings = dino_clip_concat(clip_real_train_embeddings, dino_real_train_embeddings) fake_train_embeddings = dino_clip_concat(clip_fake_train_embeddings, dino_fake_train_embeddings) real_test_embeddings = dino_clip_concat(clip_real_test_embeddings, dino_real_test_embeddings) fake_test_embeddings = dino_clip_concat(clip_fake_test_embeddings, dino_fake_test_embeddings) # Combine embeddings and create labels all_embeddings = np.vstack([ real_train_embeddings, fake_train_embeddings, real_test_embeddings, fake_test_embeddings ]) labels = (["Real Train"] * len(real_train_embeddings) + ["Fake Train"] * len(fake_train_embeddings) + ["Real Test"] * len(real_test_embeddings) + ["Fake Test"] * len(fake_test_embeddings)) # Perform t-SNE on the combined embeddings tsne = TSNE(n_components=3, random_state=42, perplexity=perplexity) tsne_results = tsne.fit_transform(all_embeddings) # Create a DataFrame for Plotly tsne_df = pd.DataFrame(tsne_results, columns=['x', 'y', 'z']) tsne_df['label'] = labels # Create the 3D scatter plot fig = px.scatter_3d(tsne_df, x='x', y='y', z='z', color='label', title=title, color_discrete_map={ "Real Train": "blue", "Fake Train": "red", "Real Test": "green", "Fake Test": "orange" }, labels={'label': 'Image Type'}) fig.update_traces(marker=dict(size=5, opacity=0.8), selector=dict(mode='markers')) # Save the figure as an interactive HTML file fig.write_html(out_path) print(f"3D plot saved as {out_path}") fig.data = None fig.layout = None # Compute cosine similarities between original and noisy embeddings def compute_cosine_similarities(folder_path, model, noise_std=0.1): similarities = [] labels = [] # 0 for real, 1 for fake for image_name in tqdm(os.listdir(folder_path), desc=f"Processing {folder_path} for Cosine Similarities"): image_path = os.path.join(folder_path, image_name) if os.path.isfile(image_path): original_embedding = extract_dino_embeddings(image_path, model, add_noise_flag=False, noise_std=noise_std) noisy_embedding = extract_dino_embeddings(image_path, model, add_noise_flag=True, noise_std=noise_std) similarity = np.dot(original_embedding, noisy_embedding) / ( np.linalg.norm(original_embedding) * np.linalg.norm(noisy_embedding) ) similarities.append(similarity) return np.array(similarities) # Visualize cosine similarities using t-SNE def visualize_cosine_similarities(real_similarities, fake_similarities, output_path, title, perplexity=40): all_similarities = np.hstack([real_similarities, fake_similarities]) labels = ["Real"] * len(real_similarities) + ["Fake"] * len(fake_similarities) tsne = TSNE(n_components=1, perplexity=perplexity, random_state=42) tsne_results = tsne.fit_transform(all_similarities.reshape(-1, 1)) plt.figure(figsize=(10, 8)) for label, color in zip(["Real", "Fake"], ["blue", "red"]): indices = [i for i, l in enumerate(labels) if l == label] plt.scatter(tsne_results[indices, 0], np.zeros_like(tsne_results[indices, 0]), label=label, alpha=0.7, s=50, c=color) plt.title(title) plt.legend() plt.savefig(output_path) plt.close() def align_embeddings(clip_embeddings, dino_embeddings): if clip_embeddings.shape[1] < dino_embeddings.shape[1]: pca = PCA(n_components=clip_embeddings.shape[1]) dino_embeddings_aligned = pca.fit_transform(dino_embeddings) return clip_embeddings, dino_embeddings_aligned elif clip_embeddings.shape[1] > dino_embeddings.shape[1]: pca = PCA(n_components=dino_embeddings.shape[1]) clip_embeddings_aligned = pca.fit_transform(clip_embeddings) return clip_embeddings_aligned, dino_embeddings else: return clip_embeddings, dino_embeddings # Function to compute weighted concatenation of embeddings def dino_clip_concat(clip_embeddings, dino_embeddings, clip_weight=0.5, dino_weight=0.5): clip_embeddings, dino_embeddings = align_embeddings(clip_embeddings, dino_embeddings) return clip_weight * clip_embeddings + dino_weight * dino_embeddings def tsne_visualize(real_folder, fake_folder, output_dir, finetuned_dino_path=None, dim=2): os.makedirs(output_dir, exist_ok=True) # Use fine-tuned DINO if a path is provided if finetuned_dino_path: print("Using fine-tuned DINO model.") dino_model = load_finetuned_dino(dino_variant, finetuned_dino_path) dino_model.eval().to(device) else: dino_variant = "dinov2_vitl14" # Change to the desired DINO variant dino_model = torch.hub.load('facebookresearch/dinov2', dino_variant) dino_model.eval().to(device) print("Using original DINO model.") if dim == 2: visualize_embeddings = visualize_embeddings_2d file_type = 'png' elif dim == 3: visualize_embeddings = visualize_embeddings_3d file_type = 'html' # CLIP-VITL14 embeddings real_clip_embeddings = extract_all_embeddings(real_folder, clip_model, method="clip") fake_clip_embeddings = extract_all_embeddings(fake_folder, clip_model, method="clip") visualize_embeddings(real_clip_embeddings, fake_clip_embeddings, "CLIP-VITL14 Embeddings", os.path.join(output_dir, f"{dim}d_clip_embeddings.{file_type}")) # DINOv2-VITL14 embeddings real_dino_embeddings = extract_all_embeddings(real_folder, dino_model, method="dino") fake_dino_embeddings = extract_all_embeddings(fake_folder, dino_model, method="dino") visualize_embeddings(real_dino_embeddings, fake_dino_embeddings, "DINOv2-VITL14 Embeddings", os.path.join(output_dir, f"{dim}d_dino_embeddings.{file_type}")) # Weighted concatenation of CLIP and DINO embeddings real_combined_embeddings = dino_clip_concat(real_clip_embeddings, real_dino_embeddings) fake_combined_embeddings = dino_clip_concat(fake_clip_embeddings, fake_dino_embeddings) visualize_embeddings(real_combined_embeddings, fake_combined_embeddings, "CLIP+DINO Combined Embeddings", os.path.join(output_dir, f"{dim}d_clip_dino_embeddings.{file_type}")) # # RIGID: DINOv2 original vs noisy embeddings (real) # real_dino_noisy_embeddings = extract_all_embeddings(real_folder, dino_model, method="dino", add_noise_flag=True) # visualize_embeddings(real_dino_embeddings, real_dino_noisy_embeddings, "Real: DINOv2 Original vs Noisy", os.path.join(output_dir, "real_dino_noisy.{file_type}")) # # RIGID: DINOv2 original vs noisy embeddings (fake) # fake_dino_noisy_embeddings = extract_all_embeddings(fake_folder, dino_model, method="dino", add_noise_flag=True) # visualize_embeddings(fake_dino_embeddings, fake_dino_noisy_embeddings, "Fake: DINOv2 Original vs Noisy", os.path.join(output_dir, "fake_dino_noisy.{file_type}")) # # RIGID: Cosine similarities # real_cosine_similarities = compute_cosine_similarities(real_folder, dino_model) # fake_cosine_similarities = compute_cosine_similarities(fake_folder, dino_model) # visualize_cosine_similarities(real_cosine_similarities, fake_cosine_similarities, os.path.join(output_dir, "cosine_similarities.png"), "Cosine Similarities of Real vs Fake") if __name__ == "__main__": # fake_test_folder = "/media/external_16TB_1/amirtaha_amanzadi/datasets/GenImage-tiny-all/1_fake" # real_test_folder = "/media/external_16TB_1/amirtaha_amanzadi/datasets/GenImage-tiny-all/0_real" # fake_folder = "../../datasets/GenImage-tiny-all/1_fake" # real_folder = "../../datasets/GenImage-tiny-all/0_real" # fake_folder = "../../datasets/ArtiFact_test_small/1_fake" # real_folder = "../../datasets/ArtiFact_test_small/0_real" # fake_train_folder = "/media/external_16TB_1/amirtaha_amanzadi/datasets/IMAGINET_train_all/1_fake" # real_train_folder = "/media/external_16TB_1/amirtaha_amanzadi/datasets/IMAGINET_train_all/0_real" fake_folder = "/media/external_16TB_1/amirtaha_amanzadi/datasets/GenImage-tiny-all/imagenet_all/val/1_fake" real_folder = "/media/external_16TB_1/amirtaha_amanzadi/datasets/GenImage-tiny-all/imagenet_all/val/0_real" output_dir = "./TSNE/GenImage-tiny" # Replace with the desired output directory for plots # tsne_visualize(real_folder, fake_folder, output_dir, finetuned_dino_path='./saved_models/dino_ep_17_acc_0.5598_ap_0.5360.pth') # tsne_visualize(real_folder, fake_folder, output_dir, dim=2) tsne_visualize(real_folder, fake_folder, output_dir, dim=3) # visualize_embeddings_4dirs_3d( # real_train_folder, fake_train_folder, real_test_folder, fake_test_folder, # title="3D t-SNE for Real/Fake Train and Test Data", # out_path=os.path.join(output_dir, "3d_clip_ft.html"), # method='clip' # ) # visualize_embeddings_4dirs_3d( # real_train_folder, fake_train_folder, real_test_folder, fake_test_folder, # title="3D t-SNE for Real/Fake Train and Test Data", # out_path=os.path.join(output_dir, "3d_dino_ft.html"), # method='dino' # ) # visualize_embeddings_4dirs_3d( # real_train_folder, fake_train_folder, real_test_folder, fake_test_folder, # title="3D t-SNE for Real/Fake Train and Test Data", # out_path=os.path.join(output_dir, "3d_clip_dino_ft.html"), # method='concat' # )