In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold, StratifiedKFold
import librosa
import librosa.display
import IPython.display as ipd
import matplotlib.pyplot as plt
import numpy as np
import scipy.io
from tqdm import tqdm
import glob
import os
import json
import pickle
from einops import rearrange
from captum.attr import DeepLift
from captum.attr import visualization as viz

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
with open("data/normal_all_data.pkl", "rb") as f:
    all_data = pickle.load(f)
with open("data/all_label.pkl", "rb") as f:
    labels = pickle.load(f)
with open("data/vowel_label.pkl", "rb") as f:
    vowel_label = pickle.load(f)
with open("data/bilab_label.pkl", "rb") as f:
    bilab_label = pickle.load(f)
with open("data/nasal_label.pkl", "rb") as f:
    nasal_label = pickle.load(f)
with open("data/iy_label.pkl", "rb") as f:
    iy_label = pickle.load(f)
with open("data/uw_label.pkl", "rb") as f:
    uw_label = pickle.load(f)

print(all_data.shape)

(1913, 62, 20, 11)


In [220]:
#calculate MFCCs with windowing
n_mfcc = 20
framesize = 1 * 250
hop_size = int(framesize/2)

trials = []
for i, trial in enumerate(tqdm(data)):
    channels = []
    for j, channel in enumerate(trial):
        mfccs = librosa.feature.mfcc(y=channel, n_mfcc=n_mfcc, n_fft=framesize, hop_length=hop_size, sr=250)
        channels.append(np.array(mfccs))
    trials.append(np.array(channels)) 
mfc_data = np.array(trials)

print(mfc_data.shape)

  return f(*args, **kwargs)
  5%|███▉                                                                            | 95/1913 [00:23<07:36,  3.98it/s]

KeyboardInterrupt



In [7]:
#save as (windows MFCCs)
with open('data/11_20mfc.pkl', 'wb') as f:
    pickle.dump(mfc_data, f)

In [3]:
class Dataset():
    def __init__(self, data, label, oversample=True):
        self.data = data
        self.label = label
        self.over = oversample
        self.train = None
        self.val = None
        self.test = None
    
    def picturize(self):
        trials = []
        depth = self.data.shape[2]
        for trial in self.data:
            pic = np.zeros((7,9,depth,11))
            pic[0,2] = trial[3]
            pic[0,3] = trial[0]
            pic[0,4] = trial[1]
            pic[0,5] = trial[2]
            pic[0,6] = trial[4]
            pic[1,:] = trial[5:14]
            pic[2,:] = trial[14:23]
            pic[3,:] = trial[23:32]
            pic[4,:] = trial[32:41]
            pic[5,:] = trial[41:50]
            pic[6,0] = trial[50]
            pic[6,1] = trial[51]
            pic[6,2] = trial[52]
            pic[6,3] = trial[58]
            pic[6,4] = trial[53]
            pic[6,5] = trial[60]
            pic[6,6] = trial[54]
            pic[6,7] = trial[55]
            pic[6,8] = trial[56]
            trials.append(pic)
        self.data = np.array(trials)
        return self.data
    
    def split(self, train_idx, test_idx, val_size=0.1, norm=False):
        train_val_data = np.stack([self.data[index] for index in train_idx])
        train_val_label = [self.label[index] for index in train_idx]
        test_data = np.stack([self.data[index] for index in test_idx])
        test_label = [self.label[index] for index in test_idx]
        
        if norm:
            Max = np.max(train_val_data, axis=(0,1,2,4), keepdims=True)
            Min = np.min(train_val_data, axis=(0,1,2,4), keepdims=True)
            train_val_data = (train_val_data-Min)/(Max-Min)

            Max_test = np.max(test_data, axis=(0,1,2,4), keepdims=True)
            Min_test = np.min(test_data, axis=(0,1,2,4), keepdims=True)
            test_data = (test_data-Min)/(Max-Min)
            
        train_val = [[train_val_data[i], train_val_label[i]] for i in range(len(train_val_data))]
        self.test = [[test_data[i], test_label[i]] for i in range(len(test_data))]
        
        num_train_val = len(train_val)
        indices = list(range(num_train_val))
        np.random.shuffle(indices)
        split = int(np.floor(val_size*num_train_val))
        train, val = [train_val[i] for i in indices[split:]] ,[train_val[i] for i in indices[:split]]
        
        if self.over:
            train_labels = [data[1] for data in train]
            _, counts = np.unique(train_labels, return_counts=True)
            print(counts)
            if counts[1]>counts[0]:
                label0 = [data for data in train if data[1]==0]
                coef = int(counts[1]/counts[0])
                for i in range(coef):
                    train = train + label0
            elif counts[1]<counts[0]:
                label1 = [data for data in train if data[1]==1]
                coef = int(counts[0]/counts[1])
                for i in range(coef):
                    train = train + label1
        self.train = train
        self.val = val
        
        return self.train, self.val, self.test
    
    
    def show(self):
        print('data shape = ', self.data.shape)
        
        if self.train is None:
            print('train not creaeted!')
        else:
            print('train shape = ', len(self.train))
            
        if self.val is None:
            print('validation not creaeted!')
        else:
            print('validation shape = ', len(self.val))
            
        if self.test is None:
            print('test not creaeted!')
        else:
            print('test shape = ', len(self.test))

In [9]:
def train_model(train_loader, val_loader, epochs, lr, fold, steps):
    print('creating model...')
    model = cnn3d().float()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.BCELoss()

    scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, total_steps=steps, max_lr=lr*10)
    scheduler1 = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.1)
    l1_lambda = 0.0001
    
    min_val_loss = np.inf
    max_val_acc = 0
    for epoch in range(epochs):
        print('epoch: ', epoch+1)
        train_loss = 0
        train_correct = 0
        model.train()
        for iteration, (data,label) in enumerate(train_loader):
            optimizer.zero_grad()
            output = model(data.float())
            label = torch.reshape(label, (-1,1))
            label = label.float()
            loss = criterion(output, label)
            for W in model.parameters():
                loss = loss + l1_lambda*W.norm(1)
            loss.backward()
            optimizer.step()
            scheduler.step()
            targets = [1 if output[i].round()==label[i] else 0 for i in range(len(label))]
            #print([output[i].round().item() for i in range(len(label))])
            train_correct += sum(targets)
            train_loss += loss.item()*data.shape[0]
        #scheduler1.step()    
        train_acc = train_correct/len(train_loader.sampler)       
        train_loss = train_loss/len(train_loader.sampler)
        
        val_loss = 0
        val_correct = 0
        model.eval()
        for data, label in val_loader:
            output = model(data.float())
            label = torch.reshape(label, (-1,1))
            label = label.float()
            loss = criterion(output, label) 
            val_loss += loss.item()*data.shape[0]
            targets = [1 if output[i].round()==label[i] else 0 for i in range(len(label))]
            val_correct += sum(targets)
            
        val_loss = val_loss/len(val_loader.sampler)
        val_acc = val_correct/len(val_loader.sampler)
        if val_loss <= min_val_loss:
            print("validation loss decreased ({:.6f} ---> {:.6f}),   val_acc = {}".format(min_val_loss, val_loss, val_acc))
            torch.save(model.state_dict(), 'train/model'+str(fold)+'.pt')
            min_val_loss = val_loss
        torch.save(model.state_dict(), 'train/last_model'+str(fold)+'.pt')    
        print('epoch {}: train loss = {},   train acc = {},\nval_loss = {},     val_acc = {}\n'
              .format(epoch+1, train_loss, train_acc, val_loss, val_acc))
        
        if int(train_acc)==1:
            print('!!! overfitted !!!')
            break
        model.train()

In [10]:
def evaluate_model(test_loader, fold):
    model =cnn3d().float()
    model.load_state_dict(torch.load('train/model'+str(fold)+'.pt'))
    
    n_correct = 0
    model.eval()
    for data, label in test_loader:
        output = model(data.float())
        targets = [1 if output[i].round()==label[i] else 0 for i in range(len(label))]
        print(targets)
        n_correct += sum(targets)    
    test_accs = n_correct/len(test_loader.sampler)
    print('early stoping results:\n\t', test_accs)
    
    n_correct = 0
    model.eval()
    for data, label in train_loader:
        output = model(data.float())
        targets = [1 if output[i].round()==label[i] else 0 for i in range(len(label))]
        n_correct += sum(targets)
        
    train_accs = n_correct/len(train_loader.sampler)
    print('\t', train_accs)
    
    model = cnn3d().float()
    model.load_state_dict(torch.load('train/last_model'+str(fold)+'.pt'))
    
    n_correct = 0
    model.eval()
    for data, label in test_loader:
        output = model(data.float())
        targets = [1 if output[i].round()==label[i] else 0 for i in range(len(label))]
        print(targets)
        n_correct += sum(targets)
    test_accs_over = n_correct/len(test_loader.sampler)
    print('full train results:\n\t', test_accs_over)
    
    n_correct = 0
    model.eval()
    for data, label in train_loader:
        output = model(data.float())
        targets = [1 if output[i].round()==label[i] else 0 for i in range(len(label))]
        n_correct += sum(targets)
    train_accs_over = n_correct/len(train_loader.sampler)
    print('\t', train_accs_over)

In [11]:
def calculate_steps(train_loader, epochs):
    steps = 0
    for epoch in range(epochs):
        for data, label in train_loader:
            steps += 1
    return steps

In [43]:
class cnn3d(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv3d(20, 16, kernel_size=(3, 3, 3), padding=1)
        self.conv2 = nn.Conv3d(16, 32, kernel_size=(3, 3, 3), padding=0)
        self.pool = nn.MaxPool3d((2, 2, 2), stride=2)
        self.fc1 = nn.Linear(192, 128)
        self.fc2 = nn.Linear(128, 1)
        self.drop = nn.Dropout(0.25)
        self.batch1 = nn.BatchNorm3d(16)
        self.batch2 = nn.BatchNorm3d(32)
        self.batch3 = nn.BatchNorm1d(128)
        
    def forward(self, x):
        x = rearrange(x, 'n h w m t -> n m t h w')
        out = self.pool(F.relu(self.batch1(self.conv1(x))))
        out = F.relu(self.batch2(self.conv2(out)))
        out = out.view(out.size(0), -1)
        out = self.drop(F.relu(self.batch3(self.fc1(out))))
        out = F.sigmoid(self.fc2(out))
        return out

In [44]:
#test model
model = cnn3d()
sample = torch.rand((4,7,9,20,11))
model(sample)

tensor([[0.6052],
        [0.5052],
        [0.2035],
        [0.6347]], grad_fn=<SigmoidBackward0>)

In [45]:
#congig

val_size = 0.25
n_epochs = 100
batch_size = 128
print_every = 10
lr = 0.00001
k = 10
skf=StratifiedKFold(n_splits=k, shuffle=True, random_state=32)

In [46]:
print(all_data[:,:,3:,:].shape)

(1913, 62, 17, 11)


In [47]:
dataset = Dataset(all_data, vowel_label)
data = dataset.picturize()

for fold, (train_idx, test_idx) in enumerate(skf.split(data, labels)):
    print('------------fold {}-----------'.format(fold))
    train, val, test = dataset.split(train_idx, test_idx)
    train_label = [item[1] for item in train]
    print(sum(train_label)/len(train_label))
    dataset.show()
    train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val, batch_size=batch_size, shuffle=True)
    test_loader = torch.utils.data.DataLoader(test, batch_size=batch_size, shuffle=True)
    
    print('calculating total steps...')
    steps = calculate_steps(train_loader, n_epochs)
    train_model(train_loader, val_loader, epochs=n_epochs, lr=lr, fold=fold, steps=steps)
    evaluate_model(test_loader, fold=fold)

------------fold 0-----------
[ 291 1258]
0.4636933284187247
data shape =  (1913, 7, 9, 20, 11)
train shape =  2713
validation shape =  172
test shape =  192
calculating total steps...
creating model...
epoch:  1




validation loss decreased (inf ---> 0.679123),   val_acc = 0.7093023255813954
epoch 1: train loss = 0.8683970651853318,   train acc = 0.5208256542572798,
val_loss = 0.6791226988614991,     val_acc = 0.7093023255813954

epoch:  2
epoch 2: train loss = 0.8702899437086019,   train acc = 0.524511610762993,
val_loss = 0.7166987934777903,     val_acc = 0.4186046511627907

epoch:  3
epoch 3: train loss = 0.8651134706141103,   train acc = 0.5344636933284187,
val_loss = 0.7987130630848019,     val_acc = 0.3430232558139535

epoch:  4
epoch 4: train loss = 0.8593147574778724,   train acc = 0.5407298193881312,
val_loss = 0.8141745814057284,     val_acc = 0.3023255813953488

epoch:  5
epoch 5: train loss = 0.8594019789780104,   train acc = 0.539255436785846,
val_loss = 0.81581727987112,     val_acc = 0.3081395348837209

epoch:  6
epoch 6: train loss = 0.855004380604371,   train acc = 0.5477331367489864,
val_loss = 0.8083413853201755,     val_acc = 0.3372093023255814

epoch:  7
epoch 7: train loss =

creating model...
epoch:  1
validation loss decreased (inf ---> 0.646471),   val_acc = 0.8255813953488372
epoch 1: train loss = 0.86992137868295,   train acc = 0.5330100708690787,
val_loss = 0.6464709279149078,     val_acc = 0.8255813953488372

epoch:  2
epoch 2: train loss = 0.8734584440939553,   train acc = 0.505408429690414,
val_loss = 0.6706514441689779,     val_acc = 0.6046511627906976

epoch:  3
epoch 3: train loss = 0.8688026436315193,   train acc = 0.5210742260350616,
val_loss = 0.6759761059006979,     val_acc = 0.5697674418604651

epoch:  4
epoch 4: train loss = 0.8661403424790527,   train acc = 0.5203282357329355,
val_loss = 0.6691561743270519,     val_acc = 0.5872093023255814

epoch:  5
epoch 5: train loss = 0.8571998774227212,   train acc = 0.5389779932860873,
val_loss = 0.6676994409672049,     val_acc = 0.5872093023255814

epoch:  6
epoch 6: train loss = 0.8471142522704463,   train acc = 0.5479298769116001,
val_loss = 0.6665740276491919,     val_acc = 0.5872093023255814

e

KeyboardInterrupt: 