File size: 5,614 Bytes
45856e0 341a65f 45856e0 9b37b0e 45856e0 b9f7415 9b37b0e b91f3bc 9b37b0e b91f3bc 9b37b0e b91f3bc 9b37b0e 45856e0 9b37b0e b91f3bc 9b37b0e b91f3bc 9b37b0e 45856e0 e76f24f b91f3bc e76f24f b91f3bc e76f24f 45856e0 9b37b0e 45856e0 9b37b0e 45856e0 fb2fb4c 45856e0 341a65f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
import pandas as pd
import numpy as np
from .constants import TIME_ALIASES, LEFT_X_ALIASES, LEFT_Y_ALIASES, RIGHT_X_ALIASES, RIGHT_Y_ALIASES, X_ALIASES, \
Y_ALIASES
from .detectors import saccade_detection
from typing import Union
class Eye2SacExtractor:
data: pd.DataFrame = None
x: np.array = None
y: np.array = None
time: np.array = None
def _load_data(self, file_path: str):
if file_path.endswith('.csv'):
sep = self.__derive_separator(file_path)
return pd.read_csv(file_path, sep)
elif file_path.endswith('.txt'):
return pd.read_csv(file_path, sep='\t')
else:
raise ValueError('File format not supported. Please provide a csv or txt file.')
def _clean_data(self):
self.data.dropna(inplace=True)
def _map_relevant_data(self, time_header: str, x_headers: Union[str, list], y_headers: Union[str, list]):
# map and extract relevant data
try:
self.time = self._get_value_array(time_header, TIME_ALIASES)
if isinstance(x_headers, str):
self.x = self._get_value_array(x_headers, X_ALIASES)
# if the user accidently specified the header as list with a single entry, capture it
elif isinstance(x_headers, list) and len(x_headers) == 1:
self.x = self._get_value_array(x_headers[0], X_ALIASES)
elif isinstance(x_headers, list) and len(x_headers) == 2:
left_x_header = x_headers[0]
right_x_header = x_headers[1]
left_x = self._get_value_array(left_x_header, LEFT_X_ALIASES)
right_x = self._get_value_array(right_x_header, RIGHT_X_ALIASES)
self.x = np.mean([left_x, right_x], axis=0)
else:
raise ValueError('invalid size of x_headers')
if isinstance(y_headers, str):
self.y = self._get_value_array(y_headers, Y_ALIASES)
# if the user accidently specified the header as list with a single entry, capture it
elif isinstance(y_headers, list) and len(y_headers) == 1:
self.y = self._get_value_array(y_headers[0], Y_ALIASES)
elif isinstance(y_headers, list) and len(y_headers) == 2:
left_y_header = y_headers[0]
right_y_header = y_headers[1]
left_y = self._get_value_array(left_y_header, LEFT_Y_ALIASES)
right_y = self._get_value_array(right_y_header, RIGHT_Y_ALIASES)
self.y = np.mean([left_y, right_y], axis=0)
else:
raise ValueError('invalid size of y_headers')
except KeyError:
raise ValueError('Required data columns are missing or not in the correct naming format.')
def _get_value_array(self, header_name: str, known_names: list) -> np.array:
# convert column names to lowercase
data_columns_lowercase = self.data.columns.str.lower()
# if the specified header is non-existent, check against known header names
if header_name not in self.data.columns:
# if no known header name matches, throw value error
matching_columns = data_columns_lowercase.intersection(known_names)
print(f"Using alternative columns: {matching_columns}")
if len(matching_columns) == 0:
raise ValueError(f'Invalid data format: header {header_name} not found.')
else:
return self.data[matching_columns].to_numpy().flatten()
else:
return self.data[header_name].to_numpy().flatten()
def extract_features(self, data: Union[pd.DataFrame, str], time_header: str, x_headers: Union[str, list], y_headers: Union[str, list], missing: float, minlen: int, maxvel: int, maxacc: int):
if isinstance(data, pd.DataFrame):
self.data = data
elif isinstance(data, str):
self.data = self._load_data(data)
else:
raise ValueError('Data must be a pandas DataFrame or a file path to a csv or txt file.')
self._clean_data()
self._map_relevant_data(time_header, x_headers, y_headers)
return self._extract_features(missing, minlen, maxvel, maxacc)
def _extract_features(self, missing: float = 0.0, minlen: int = 5, maxvel: int = 40, maxacc: int = 340) -> pd.DataFrame :
_, esac = saccade_detection(self.x, self.y, self.time, missing=missing, minlen=minlen, maxvel=maxvel, maxacc=maxacc)
esac_df = pd.DataFrame(esac, columns=['starttime', 'endtime', 'duration', 'startx', 'starty', 'endx', 'endy'])
return esac_df
def __derive_separator(self, file_path):
# Versuche, die Datei mit Komma als Separator zu lesen
try:
df_comma = pd.read_csv(file_path, sep=',')
comma_columns = len(df_comma.columns)
except Exception:
comma_columns = 0
# Versuche, die Datei mit Semikolon als Separator zu lesen
try:
df_semicolon = pd.read_csv(file_path, sep=';')
semicolon_columns = len(df_semicolon.columns)
except Exception:
semicolon_columns = 0
# Vergleiche die Anzahl der Spalten und bestimme den Separator
if comma_columns > semicolon_columns:
return ',' # Komma als Separator
elif semicolon_columns > comma_columns:
return ';' # Semikolon als Separator
else:
raise ValueError('Columns separator in CSV not supported. Make sure to use either , or ; as separator')
|