mohsennezhad
/
ThyroidProject


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
							import concurrent.futures
import math

import cv2
import matplotlib.pyplot as plt
import numpy as np

from config import Config
from database_crawlers.image_patcher.image_patcher import ImageAndSlidePatcher, ThyroidFragmentFilters
from utils import check_if_generator_is_empty


def imul(a, b):
    return math.ceil(a * b)


def calculate_acc_and_sensitivity(image_path, zarr_loader_mask, zarr_loader, frag_generator, scaled_masked_image,
                                  generated_mask_scale, laplacian_threshold, slide_patch_size,
                                  save_generated_image=True):
    def process_frag(args):
        next_test_item, frag_pos, condition = args
        frag_shape = next_test_item.shape
        mask_scaled_frag_shape = list((imul(frag_shape[i], mask_scale) for i in range(2)))

        mask_frag_pos = list((imul(frag_pos[i], mask_scale) for i in range(2)))
        mask_w1, mask_w2 = mask_frag_pos[0], mask_frag_pos[0] + mask_scaled_frag_shape[0]
        mask_h1, mask_h2 = mask_frag_pos[1], mask_frag_pos[1] + mask_scaled_frag_shape[1]
        mask_item = zarr_loader_mask[mask_w1:mask_w2, mask_h1:mask_h2]
        mask_item = cv2.resize(mask_item, dsize=(0, 0), fx=1 / mask_scale, fy=1 / mask_scale)

        fragment_size = next_test_item.shape
        scaled_frag_size = (imul(fragment_size[0], generated_mask_scale), imul(fragment_size[1], generated_mask_scale))
        scaled_frag = cv2.resize(next_test_item[:, :, :3], dsize=scaled_frag_size, interpolation=cv2.INTER_CUBIC)
        scaled_frag_size = scaled_frag.shape

        if next_test_item is not None:
            mask_item = mask_item[:, :, 0]
            masked = mask_item.mean() > 256 * .3
            if condition and masked:
                background_dict["TP"] += 1
            elif condition and not masked:
                background_dict["FP"] += 1
            elif not condition and masked:
                background_dict["FN"] += 1
                # show_and_wait(next_test_item)
                # show_and_wait(mask_item)
            elif not condition and not masked:
                background_dict["TN"] += 1
        else:
            return None
        if not condition:
            # background patches get dark
            scaled_frag = (scaled_frag * 0.3).astype(np.int8)
        scaled_pos = list((imul(frag_pos[i], generated_mask_scale) for i in range(2)))
        try:
            mask_g_w1, mask_g_w2 = scaled_pos[0], scaled_pos[0] + scaled_frag_size[0]
            mask_g_h1, mask_g_h2 = scaled_pos[1], scaled_pos[1] + scaled_frag_size[1]
            scaled_masked_image[mask_g_w1:mask_g_w2, mask_g_h1:mask_g_h2] = scaled_frag
        except Exception as e:
            print(e)
        return True

    mask_scale = zarr_loader_mask.shape[0] / zarr_loader.shape[0]

    filter_func_list = [ThyroidFragmentFilters.func_laplacian_threshold(laplacian_threshold)]
    background_dict = {"TP": 0, "FP": 0, "TN": 0, "FN": 0}
    total_frags = slide_patch_size if slide_patch_size else ImageAndSlidePatcher._get_number_of_initial_frags(
        zarr_loader)
    frag_filtered = ImageAndSlidePatcher._filter_frag_from_generator(frag_generator, filter_func_list,
                                                                     return_all_with_condition=True,
                                                                     all_frag_count=total_frags)
    with concurrent.futures.ThreadPoolExecutor(max_workers=Config.workers) as executor:
        futures = []
        patch_count = 0
        for args in frag_filtered:
            patch_count += 1
            future_res = executor.submit(process_frag, args)
            futures.append(future_res)
            if len(futures) >= Config.workers or patch_count == slide_patch_size:
                for future in concurrent.futures.as_completed(futures):
                    future.result()
                futures = []
                if patch_count == slide_patch_size:
                    break

    if save_generated_image:
        masked_image_path = ".".join(image_path.split(".")[:-1]) + "_generated_mask.jpg"
        cv2.imwrite(masked_image_path, scaled_masked_image)

    return background_dict


def score_calculator(accuracy, specificity, acc_w=0.75):
    return accuracy * acc_w + specificity * (1 - acc_w)


def get_zarr_loaders_and_generators():
    zarr_loaders_and_generators = []
    for _img_mask_path, _img_path in image_lists:
        _zarr_loader_mask = ImageAndSlidePatcher._zarr_loader(_img_mask_path)
        _zarr_loader = ImageAndSlidePatcher._zarr_loader(_img_path)
        _frag_generator = ImageAndSlidePatcher._generate_raw_fragments_from_image_array_or_zarr(_zarr_loader,
                                                                                                shuffle=True)
        _zarr_shape = _zarr_loader.shape

        _generated_mask_scale = 10 / 512
        _scaled_zarr_shape = (
            imul(_zarr_shape[0], _generated_mask_scale) + 5, imul(_zarr_shape[1], _generated_mask_scale) + 5, 3)
        _scaled_masked_image = np.zeros(_scaled_zarr_shape)

        zarr_loaders_and_generators.append([
            _zarr_loader_mask, _zarr_loader, _frag_generator, _scaled_masked_image, _generated_mask_scale
        ])
    return zarr_loaders_and_generators


def update_and_find_best_threshold(initial_thresh, learn_threshold_and_log_cf_matrix_per_patch=True):
    initial_threshold_jump_size_const = 120
    threshold_jump_size = initial_threshold_jump_size_const
    decay_const = 0.85
    decay_count = 0

    threshold_jump_increase = 1

    threshold_score = None
    # update after initial run
    laplacian_threshold = initial_thresh

    threshold_history = []
    score_history = []
    for epoch in range((Config.n_epoch_for_image_patcher if learn_threshold_and_log_cf_matrix_per_patch else 1)):
        print("New Epoch")
        zarr_loaders_and_generators = get_zarr_loaders_and_generators()
        whole_background_dict_per_slide = [{} for i in range(len(zarr_loaders_and_generators))]
        whole_background_dict = {}

        while sum([item is not None for item in zarr_loaders_and_generators]) >= 1:
            none_empty_generators = [i for i in range(len(zarr_loaders_and_generators)) if
                                     zarr_loaders_and_generators[i] is not None]

            if learn_threshold_and_log_cf_matrix_per_patch:
                whole_background_dict = {}
            if len(none_empty_generators) >= 6 or not learn_threshold_and_log_cf_matrix_per_patch:
                for slide_pick in none_empty_generators:
                    img_path = image_lists[slide_pick][1]
                    zarr_loader_mask = zarr_loaders_and_generators[slide_pick][0]
                    zarr_loader = zarr_loaders_and_generators[slide_pick][1]
                    frag_generator = zarr_loaders_and_generators[slide_pick][2]

                    generated_scaled_mask_image = zarr_loaders_and_generators[slide_pick][3]
                    generated_mask_scale = zarr_loaders_and_generators[slide_pick][4]

                    group_dict = calculate_acc_and_sensitivity(img_path,
                                                               zarr_loader_mask,
                                                               zarr_loader,
                                                               frag_generator,
                                                               generated_scaled_mask_image,
                                                               generated_mask_scale,
                                                               laplacian_threshold,
                                                               slide_patch_size=2000,
                                                               save_generated_image=not learn_threshold_and_log_cf_matrix_per_patch)
                    for i in range(len(zarr_loaders_and_generators)):
                        if zarr_loaders_and_generators[i]:
                            generator = check_if_generator_is_empty(zarr_loaders_and_generators[i][2])
                            if generator:
                                zarr_loaders_and_generators[i][2] = generator
                            else:
                                zarr_loaders_and_generators[i] = None

                    for key, value in group_dict.items():
                        whole_background_dict[key] = whole_background_dict.get(key, 0) + value
                        whole_background_dict_per_slide[slide_pick][key] = whole_background_dict_per_slide[
                                                                               slide_pick].get(key, 0) + value

                if learn_threshold_and_log_cf_matrix_per_patch:
                    e = .000001
                    total_preds = (sum(list(whole_background_dict.values())) + e)
                    acc = (whole_background_dict["TP"] + whole_background_dict["TN"]) / total_preds
                    positive_preds = (whole_background_dict["TP"] + whole_background_dict["FP"] + e)
                    precision = whole_background_dict["TP"] / positive_preds
                    next_score = score_calculator(acc, precision)
                    if threshold_score is None:
                        threshold_score = next_score
                    else:
                        threshold_history.append(laplacian_threshold)
                        score_history.append(next_score)
                        if next_score > threshold_score:
                            threshold_score = next_score

                            laplacian_threshold += threshold_jump_increase * threshold_jump_size
                        elif next_score <= threshold_score:
                            threshold_score = next_score

                            threshold_jump_increase *= -1
                            threshold_jump_size *= decay_const

                            laplacian_threshold += threshold_jump_increase * threshold_jump_size
                            decay_count += 1
                        save_threshold_and_score_chart(threshold_history, score_history)

                    acc = round(acc, 3)
                    precision = round(precision, 3)
                    threshold_score_rounded = round(threshold_score, 3)
                    print(
                        f"acc:{acc},precision:{precision},score:{threshold_score_rounded},table:{whole_background_dict}" +
                        f"thresh:{laplacian_threshold},jump_size:{threshold_jump_size}")
                else:
                    print(f"table:{whole_background_dict},table_per_slide:{whole_background_dict_per_slide}" +
                          f"threshold:{laplacian_threshold},jump_size:{threshold_jump_size}")
            else:
                break
    return laplacian_threshold


def save_threshold_and_score_chart(threshold_history, score_history):
    fig_save_path = "laplacian_threshold_history_chart.jpeg"
    plt.plot(range(len(threshold_history)), threshold_history)
    plt.xlabel('Batch')
    plt.ylabel('Laplacian threshold')
    plt.savefig(fig_save_path)
    plt.clf()

    fig_save_path = "laplacian_threshold_score_history_chart.jpeg"
    plt.plot(range(len(score_history)), score_history)
    plt.xlabel('Batch')
    plt.ylabel('Objective function - Sore')
    plt.savefig(fig_save_path)
    plt.clf()


if __name__ == '__main__':
    image_lists = [
        (  # "('0', '100', '0')"
            "./TCGA-BJ-A3F0-01A-01-TSA.728CE583-95BE-462B-AFDF-FC0B228DF3DE__3_masked.tiff",
            "./TCGA-BJ-A3F0-01A-01-TSA.728CE583-95BE-462B-AFDF-FC0B228DF3DE__3.svs"
        ),
        (  # "('0', '100', '0')"
            "./TCGA-DJ-A1QG-01A-01-TSA.04c62c21-dd45-49ea-a74f-53822defe097__2000_masked.tiff",
            "./TCGA-DJ-A1QG-01A-01-TSA.04c62c21-dd45-49ea-a74f-53822defe097__2000.svs"
        ),
        # (  # "('0', '100', '0')"
        #     "./TCGA-EL-A3ZQ-01A-01-TS1.344610D2-AB50-41C6-916E-FF0F08940BF1__2000_masked.tiff",
        #     "./TCGA-EL-A3ZQ-01A-01-TS1.344610D2-AB50-41C6-916E-FF0F08940BF1__2000.svs"
        # ),
        (  # "('45', '55', '0')"
            "./TCGA-ET-A39N-01A-01-TSA.C38FCE19-9558-4035-9F0B-AD05B9BE321D___198_masked.tiff",
            "./TCGA-ET-A39N-01A-01-TSA.C38FCE19-9558-4035-9F0B-AD05B9BE321D___198.svs"
        ),
        # (  # "('0', '40', '60')"
        #     "./TCGA-J8-A42S-01A-01-TSA.7B80CBEB-7B85-417E-AA0C-11C79DE40250__0_masked.tiff",
        #     "./TCGA-J8-A42S-01A-01-TSA.7B80CBEB-7B85-417E-AA0C-11C79DE40250__0.svs"
        # ),
        (  # "('0', '90', '10')"
            "./TCGA-ET-A39O-01A-01-TSA.3829C900-7597-4EA9-AFC7-AA238221CE69_7000_masked.tiff",
            "./TCGA-ET-A39O-01A-01-TSA.3829C900-7597-4EA9-AFC7-AA238221CE69_7000.svs"
        ),
        (  # "('100', '0', '0')"
            "./TCGA-EL-A4K7-11A-01-TS1.C08B59AA-87DF-4ABB-8B70-25FEF9893C7F__70_masked.tiff",
            "./TCGA-EL-A4K7-11A-01-TS1.C08B59AA-87DF-4ABB-8B70-25FEF9893C7F__70.svs"
        ),
        (  # "('100', '0', '0')"
            "./TCGA-EL-A3TB-11A-01-TS1.6E0966C9-1552-4B30-9008-8ACF737CA8C3__2000_masked.tiff",
            "./TCGA-EL-A3TB-11A-01-TS1.6E0966C9-1552-4B30-9008-8ACF737CA8C3__2000.svs"
        ),
    ]

    learned_threshold = update_and_find_best_threshold(500, learn_threshold_and_log_cf_matrix_per_patch=True)
    update_and_find_best_threshold(learned_threshold, learn_threshold_and_log_cf_matrix_per_patch=False)

# Start with 500 with jump size 120 and decay 0.85
# table:{'TP': 15018, 'FP': 412, 'TN': 66898, 'FN': 2389},
# table_per_slide:[
# {'TP': 460, 'FP': 0, 'TN': 19618, 'FN': 1426},
# {'TP': 4624, 'FP': 126, 'TN': 14100, 'FN': 226},
# {'TP': 1138, 'FP': 4, 'TN': 6671, 'FN': 492},
# {'TP': 7615, 'FP': 92, 'TN': 20871, 'FN': 234},
# {'TP': 78, 'FP': 18, 'TN': 1880, 'FN': 4},
# {'TP': 1103, 'FP': 172, 'TN': 3758, 'FN': 7}
# ]
# threshold:298.86314585743395,jump_size:120