Module src.ref.analysis_raw_images.data_raw_images
Expand source code
from matplotlib import pyplot as plt
import numpy as np
from skimage.external.tifffile import imread
from skimage import io
import os
from os.path import join as oj
from sklearn.feature_extraction.image import extract_patches_2d
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import cross_validate, train_test_split
from scipy import ndimage
auxilin_dir = '/accounts/grad/xsli/auxilin_data'
# auxilin_dir = '/scratch/users/vision/data/abc_data/auxilin_data/'
def get_data(cell_name, auxilin_dir=auxilin_dir, normalize=True):
'''Loads in X and Y for one cell
Returns
-------
X : np.ndarray
has shape (W, H, num_images)
Y : np.ndarray
has shape (W, H, num_images)
'''
cell_name = 'Cell1_1s'
data_dir = oj(auxilin_dir, 'A7D2', cell_name) # 'A7D2', 'EGFB-GAK-F6'
fname1 = os.listdir(oj(data_dir, 'TagRFP'))[0]
fname2 = os.listdir(oj(data_dir, 'EGFP'))[0]
X = imread(oj(data_dir, 'TagRFP', fname1)) #.astype(np.float32) # X = RFP(clathrin) (num_images x H x W)
Y = imread(oj(data_dir, 'EGFP', fname2)) #.astype(np.float32) # Y = EGFP (auxilin) (num_image x H x W)
if normalize:
X = (X - np.mean(X)) / np.std(X)
Y = (Y - np.mean(Y)) / np.std(Y)
return X, Y
def extract_single_pixel_features(X, Y, normalize=True):
'''Extract time-series for single pixels as features
'''
X_feat = X.transpose() # W x H x num_images
X_feat = X_feat.reshape(X_feat.shape[0] * X_feat.shape[1], -1) # num_pixels x num_images
X_feat = X_feat.reshape(-1, 1)
y_max = np.expand_dims(Y.sum(axis=0).flatten(), 1) # num_pixels x 1
y_max = (y_max - np.mean(y_max)) / np.std(y_max)
return X_feat, y_max
def extract_single_pixel_max_features(X, Y):
'''Take max over X (after some preprocessing) and use it to predict max over Y
'''
X_log = ndimage.gaussian_laplace(X, sigma=10) #Laplacian-of-Gaussian filter
X_max_log = X_log.max(axis=0)
X_max_log = X_max_log.flatten()
Y_max = np.expand_dims(Y.max(axis=0).flatten(), 1)
#plt.hist(Y_max)
return X_max_log, Y_max
def extract_patch_features(X, Y, patch_size=9):
'''Extract time-series for patches as features
Returns
-------
X : np.ndarray
has shape (num_patches, patch_size, patch_size, num_images)
Y : np.ndarray
has shape (num_images)
'''
X_feat = X.transpose() # W x H x num_images
X_patches = extract_patches_2d(X_feat, patch_size=(patch_size, patch_size), max_patches=None).astype(np.float32) # num_patches x patch_size x patch_size x num_images
# X_patches_flat = X_patches.reshape(X_patches.shape[0], -1)
# take only the center of the y matches
Y_max = np.max(Y, axis=0) # H x W
Y_patches = extract_patches_2d(Y_max.transpose(), patch_size=(patch_size, patch_size), max_patches=None) # num_patches x patch_size x patch_size x num_images
patch_center = patch_size // 2
Y_centers = Y_patches[:, patch_center, patch_center].astype(np.float32)
return X_patches, Y_centers
def extract_next_frame_prediction_data(X, Y, window_length=10, test_size=0.33, LoG_var=10):
'''
extract training and test samples for next frame prediction
Input:
X, Y: np.ndarray
clathrin and auxilin movies
window_length: int
num of previous images used for prediction
test_size: float (between 0 and 1)
% of images used as test data
LoG_var: float
var of log filter
Output:
X_train, Y_train, X_test, Y_test: np.array
train and test data, by default, the movies are partitioned into n = (len(X) - window_length) / window_length
non-overlapping segments; the last n*test_size segments are used for testing
'''
X_log = ndimage.gaussian_laplace(X, sigma=10)
n = int((len(X) - window_length) / window_length)
n_train = int(n * (1 - test_size))
X_train, Y_train, X_test, Y_test = [], [], [], []
for im in range(n_train):
Y_train += list(Y[im*window_length + window_length].flatten())
X_train += list(np.transpose([X_log[im*window_length + i].flatten() for i in range(window_length)]))
for im in range(n_train, n):
Y_test += list(Y[im*window_length + window_length].flatten())
X_test += list(np.transpose([X_log[im*window_length + i].flatten() for i in range(window_length)]))
return np.array(X_train), np.array(Y_train), np.array(X_test), np.array(Y_test)
Functions
def extract_next_frame_prediction_data(X, Y, window_length=10, test_size=0.33, LoG_var=10)
-
extract training and test samples for next frame prediction
Input
X
,Y
:np.ndarray
- clathrin and auxilin movies
window_length
- int num of previous images used for prediction
test_size
- float (between 0 and 1) % of images used as test data
LoG_var
- float var of log filter
Output
X_train
,Y_train
,X_test
,Y_test
:np.array
- train and test data, by default, the movies are partitioned into n = (len(X) - window_length) / window_length non-overlapping segments; the last n*test_size segments are used for testing
Expand source code
def extract_next_frame_prediction_data(X, Y, window_length=10, test_size=0.33, LoG_var=10): ''' extract training and test samples for next frame prediction Input: X, Y: np.ndarray clathrin and auxilin movies window_length: int num of previous images used for prediction test_size: float (between 0 and 1) % of images used as test data LoG_var: float var of log filter Output: X_train, Y_train, X_test, Y_test: np.array train and test data, by default, the movies are partitioned into n = (len(X) - window_length) / window_length non-overlapping segments; the last n*test_size segments are used for testing ''' X_log = ndimage.gaussian_laplace(X, sigma=10) n = int((len(X) - window_length) / window_length) n_train = int(n * (1 - test_size)) X_train, Y_train, X_test, Y_test = [], [], [], [] for im in range(n_train): Y_train += list(Y[im*window_length + window_length].flatten()) X_train += list(np.transpose([X_log[im*window_length + i].flatten() for i in range(window_length)])) for im in range(n_train, n): Y_test += list(Y[im*window_length + window_length].flatten()) X_test += list(np.transpose([X_log[im*window_length + i].flatten() for i in range(window_length)])) return np.array(X_train), np.array(Y_train), np.array(X_test), np.array(Y_test)
def extract_patch_features(X, Y, patch_size=9)
-
Extract time-series for patches as features
Returns
X
:np.ndarray
- has shape (num_patches, patch_size, patch_size, num_images)
Y
:np.ndarray
- has shape (num_images)
Expand source code
def extract_patch_features(X, Y, patch_size=9): '''Extract time-series for patches as features Returns ------- X : np.ndarray has shape (num_patches, patch_size, patch_size, num_images) Y : np.ndarray has shape (num_images) ''' X_feat = X.transpose() # W x H x num_images X_patches = extract_patches_2d(X_feat, patch_size=(patch_size, patch_size), max_patches=None).astype(np.float32) # num_patches x patch_size x patch_size x num_images # X_patches_flat = X_patches.reshape(X_patches.shape[0], -1) # take only the center of the y matches Y_max = np.max(Y, axis=0) # H x W Y_patches = extract_patches_2d(Y_max.transpose(), patch_size=(patch_size, patch_size), max_patches=None) # num_patches x patch_size x patch_size x num_images patch_center = patch_size // 2 Y_centers = Y_patches[:, patch_center, patch_center].astype(np.float32) return X_patches, Y_centers
def extract_single_pixel_features(X, Y, normalize=True)
-
Extract time-series for single pixels as features
Expand source code
def extract_single_pixel_features(X, Y, normalize=True): '''Extract time-series for single pixels as features ''' X_feat = X.transpose() # W x H x num_images X_feat = X_feat.reshape(X_feat.shape[0] * X_feat.shape[1], -1) # num_pixels x num_images X_feat = X_feat.reshape(-1, 1) y_max = np.expand_dims(Y.sum(axis=0).flatten(), 1) # num_pixels x 1 y_max = (y_max - np.mean(y_max)) / np.std(y_max) return X_feat, y_max
def extract_single_pixel_max_features(X, Y)
-
Take max over X (after some preprocessing) and use it to predict max over Y
Expand source code
def extract_single_pixel_max_features(X, Y): '''Take max over X (after some preprocessing) and use it to predict max over Y ''' X_log = ndimage.gaussian_laplace(X, sigma=10) #Laplacian-of-Gaussian filter X_max_log = X_log.max(axis=0) X_max_log = X_max_log.flatten() Y_max = np.expand_dims(Y.max(axis=0).flatten(), 1) #plt.hist(Y_max) return X_max_log, Y_max
def get_data(cell_name, auxilin_dir='/accounts/grad/xsli/auxilin_data', normalize=True)
-
Loads in X and Y for one cell
Returns
X
:np.ndarray
- has shape (W, H, num_images)
Y
:np.ndarray
- has shape (W, H, num_images)
Expand source code
def get_data(cell_name, auxilin_dir=auxilin_dir, normalize=True): '''Loads in X and Y for one cell Returns ------- X : np.ndarray has shape (W, H, num_images) Y : np.ndarray has shape (W, H, num_images) ''' cell_name = 'Cell1_1s' data_dir = oj(auxilin_dir, 'A7D2', cell_name) # 'A7D2', 'EGFB-GAK-F6' fname1 = os.listdir(oj(data_dir, 'TagRFP'))[0] fname2 = os.listdir(oj(data_dir, 'EGFP'))[0] X = imread(oj(data_dir, 'TagRFP', fname1)) #.astype(np.float32) # X = RFP(clathrin) (num_images x H x W) Y = imread(oj(data_dir, 'EGFP', fname2)) #.astype(np.float32) # Y = EGFP (auxilin) (num_image x H x W) if normalize: X = (X - np.mean(X)) / np.std(X) Y = (Y - np.mean(Y)) / np.std(Y) return X, Y