diff --git a/Makefile b/Makefile index 0ed9334..b57d6d7 100644 --- a/Makefile +++ b/Makefile @@ -1,2 +1,5 @@ link: - pip install -e ./ \ No newline at end of file + pip install -e ./ + +test: + pytest -v -log_cli=True --log-cli-level=INFO tests \ No newline at end of file diff --git a/src/paveit/labtest/base.py b/src/paveit/labtest/base.py index 508b47b..0c4222a 100644 --- a/src/paveit/labtest/base.py +++ b/src/paveit/labtest/base.py @@ -18,19 +18,22 @@ class DataSineLoad(): def __init__(self, filename: str, metadata: dict, - archive: bool = True, + logger=None, debug: bool = False, data: None | io.BytesIO = None): + self.filename = filename self.metadata = metadata if isinstance(data, io.BytesIO): self.data = data - self.archive_data = archive self.debug = debug - self._logger = logging.getLogger(__name__) + if logger == None: + self._logger = logging.getLogger(__name__) + else: + self._logger = logger self._logger.info( f'filename s3: {self.filename}, metadata: {self.metadata}') @@ -38,16 +41,23 @@ class DataSineLoad(): self._pre_run() def _set_parameter(self): + self._logger.debug('run _set_parameter') self.split_data_based_on_parameter = ['T', 'sigma', 'f'] self.col_as_int = ['N'] - self.col_as_float = ['T', 'F', 's_piston', 's_hor_1', 'f', 's_hor_1', 's_hor_2'] + self.col_as_float = [ + 'T', 'F', 's_piston', 's_hor_1', 'f', 's_hor_1', 's_hor_2' + ] + + self.val_col_names = [ + 'time', 'T', 'f', 'sigma', 'N', 'F', 's_hor_1', 's_hor_2' + ] + + self.columns_analyse = [ + 'F', 's_hor_sum', 's_hor_1', 's_hor_2', 's_piston' + ] - self.val_col_names = ['time', 'T', 'f', 'sigma', 'N', 'F', 's_hor_1', 's_hor_2'] - - self.columns_analyse = ['F','s_hor_sum','s_hor_1','s_hor_2','s_piston'] - # Header names after standardization; check if exists self.val_header_names = ['speciment_height', 'speciment_diameter'] @@ -56,7 +66,7 @@ class DataSineLoad(): self.meta_names_of_parameter = { 'sigma': ['Max. Spannung'] } #list of names - + self.data_column_names = { 'time': ['Time Series'], 'F': ['Load Series'], @@ -75,12 +85,12 @@ class DataSineLoad(): self.unit_t = 1 / 1000. #s def _connect_to_s3(self): - self._logger.info('connect to db') + self._logger.debug('run _connect to db') self.__minioClient = get_minio_client_processing() def _read_from_s3_to_bytesio(self): - self._logger.info('read bytes') + self._logger.debug('run _read bytes') try: self._connect_to_s3() @@ -92,55 +102,57 @@ class DataSineLoad(): response.release_conn() self.data = io.BytesIO(self.data) + self._logger.debug('data read from s3') def _calc_hash_of_bytesio(self): + self._logger.debug('run _calc_hash_of_bytesio') self.filehash = calc_hash_of_bytes(self.data) self.data.seek(0) self._logger.debug(f'Hash of file: {self.filehash}') def _process_data(self): - """ convert self.data (BytesIO) to pandas.DataFrame, update self.metadata with informations from file """ - + self._logger.debug('convert bytes to pandas.DataFrame') encoding = 'utf-8' self.data = pd.read_csv(self.data, encoding=encoding) def _standardize_data(self): - + self._logger.debug('run _standardize_data') + colnames = list(self.data.columns) - + for par, names in self.data_column_names.items(): for name in names: colnames = [sub.replace(name, par) for sub in colnames] self.data.columns = colnames - - print(self.data.head(5)) - def _standardize_meta(self): - + self._logger.debug('run _standardize_meta') + for par, names in self.meta_names_of_parameter.items(): for name in names: if name in self.metadata: - + self.metadata[par] = self.metadata[name] self.metadata.pop(name) - + break - + def _validate_data(self): + self._logger.debug('run _validate_data') for name in self.val_col_names: if not name in self.data.columns: raise - def _validate_meta(self): + self._logger.debug('run _validate_meta') + for name in self.val_header_names: if not name in self.metadata: raise @@ -160,14 +172,12 @@ class DataSineLoad(): return True def _post_select_importent_columns(self): - + # TODO: add more columns, check datamodel - + self.data = self.data[self.val_col_names] - def _post_calc_missiong_values(self): - cols = self.data.columns @@ -191,7 +201,8 @@ class DataSineLoad(): return True def _fit_split_data(self): - + self._logger.debug('run _fit_split_data') + data_gp = self.data.groupby(self.split_data_based_on_parameter) data_list = [] @@ -229,6 +240,9 @@ class DataSineLoad(): self.data = data_list #break + nchunks = len(self.data) + self._logger.debug(f'data splited in {nchunks} chunks') + def _fit_select_data(self): """ select N load cycles from original data @@ -236,9 +250,11 @@ class DataSineLoad(): (b) last N cycles """ - + + self._logger.debug('run _fit_select_data') + def sel_df(df, num=5): - + N = df['N'].unique() freq = float(df['f'].unique()[0]) @@ -264,21 +280,20 @@ class DataSineLoad(): else: Nfrom = None Nto = None - - + # Fall 1: nicht alle LW in Datei if (max(N) < Nto) & (len(N) >= num): df_sel = df[(df['N'] >= N[-num]) & (df['N'] <= N[-1])] - + # Fall 2: else: if Nfrom != None: if len(N) > Nto - Nfrom: df_sel = df[(df['N'] >= Nfrom) & (df['N'] <= Nto)] - + return df_sel - + if not isinstance(self.data, list): if self.number_of_load_cycles_for_analysis > 1: df_sel = [ @@ -292,7 +307,8 @@ class DataSineLoad(): df_sel = [] for d in self.data: if self.number_of_load_cycles_for_analysis > 1: - d_sel = sel_df(d,num=self.number_of_load_cycles_for_analysis) + d_sel = sel_df(d, + num=self.number_of_load_cycles_for_analysis) else: d_sel = d @@ -300,38 +316,35 @@ class DataSineLoad(): # replace data self.data = df_sel - + def _calc(self): - print(len(self.data)) - self.fit = [] for idx_data, data in enumerate(self.data): - + if data is None: continue if len(data) < 10: continue - data.index = data.index - data.index[0] - + res_temp = {} x = data.index.values - - freq = np.round(float(data['f'].unique()),2) + + freq = np.round(float(data['f'].unique()), 2) sigma = float(data['sigma'].unique()) temperature = float(data['T'].unique()) - + for idxcol, col in enumerate(self.columns_analyse): if not col in data.columns: continue y = data[col].values - - res = fit_cos(x,y, freq=freq) - + + res = fit_cos(x, y, freq=freq) + for key, value in res.items(): res_temp[f'fit_{col}_{key}'] = value - + res_temp[f'fit_{col}_max'] = max(y) res_temp[f'fit_{col}_min'] = min(y) @@ -343,9 +356,9 @@ class DataSineLoad(): deltaF = res_temp['fit_F_amp'] nu = calc_nu(temperature) res_temp['nu'] = nu - + h = float(self.metadata['speciment_height']) - + deltaU = res_temp['fit_s_hor_sum_amp'] res_temp['E'] = (deltaF * (0.274 + nu)) / (h * deltaU) @@ -353,19 +366,12 @@ class DataSineLoad(): self.fit.append(res_temp) self.fit = pd.DataFrame.from_records(self.fit) - - self.fit = self.fit.set_index(['T', 'f', 'sigma']) - - print(self.fit) - - - def _archive_binary_data(self): - self._logger.debug('send file to archive') - app.send_task( - 'ArchiveFile', - args=[self.filename, self.metadata, self.filehash, 'org', 'citt'], - queue='archive') + self.fit = self.fit.set_index(['T', 'f', 'sigma']) + + nsamples = len(self.fit) + self._logger.info(f'fitting finished, add {nsamples} samples') + self._logger.debug(self.fit['E']) def _pre_run(self): @@ -396,9 +402,4 @@ class DataSineLoad(): self._fit_select_data() self._calc() - #self._logger.debug(f'results: {res}') - - #if self.archive_data: - # self._archive_binary_data() - - #return res + #self._logger.info(f'results: {self.fit['E']}') \ No newline at end of file diff --git a/src/paveit/labtest/citt.py b/src/paveit/labtest/citt.py index 745f0b4..3931821 100644 --- a/src/paveit/labtest/citt.py +++ b/src/paveit/labtest/citt.py @@ -5,19 +5,21 @@ from csv import reader import numpy as np import pandas as pd from paveit.labtest import DataSineLoad -from torch import isin class CITTBase(DataSineLoad): + def _calc(self): return (self.df.mean().mean(), self.df.max().max()) + class CITT_KIT(DataSineLoad): + def _calc(self): return (self.df.mean().mean(), self.df.max().max()) def _process_data(self): - logger.debug('convert bytes to pandas.DataFrame') + self._logger.debug('convert bytes to pandas.DataFrame') self.data.seek(0) with io.TextIOWrapper(self.data, encoding='latin-1') as read_obj: @@ -82,7 +84,6 @@ class CITT_KIT(DataSineLoad): idx = t[(t['ZEIT'] >= tmin) & (t['ZEIT'] < tmax)].index N[idx] = i - t['N'] = N res.append(t) @@ -96,8 +97,9 @@ class CITT_KIT(DataSineLoad): #define in class self.data = res.reset_index() + class CITT_PTMDortmund(DataSineLoad): - + def _define_units(self): self.unit_s = 1 #mm @@ -106,15 +108,16 @@ class CITT_PTMDortmund(DataSineLoad): def update_parameter(self): - self.meta_names_of_parameter = {'sigma': ['Max. Spannung', 'Max Stress'], - 'f': ['Frequenz', 'Frequency'], - 'T': ['Versuchstemperatur', 'Target Test Temperature'], - 'Nfrom': ['Erster Aufzeichnungslastwechsel', 'Start Cycle'], - 'Nto': ['Letzer Aufzeichnungslastwechsel', 'Last Cycle'], - 't': ['Zeitfolgen', 'Time Series'], - 'speciment_diameter': ['Durchmesser (mm)', 'Diameter (mm)'], - 'speciment_height': ['Länge (mm)', 'Length (mm)'], - } #list of names + self.meta_names_of_parameter = { + 'sigma': ['Max. Spannung', 'Max Stress'], + 'f': ['Frequenz', 'Frequency'], + 'T': ['Versuchstemperatur', 'Target Test Temperature'], + 'Nfrom': ['Erster Aufzeichnungslastwechsel', 'Start Cycle'], + 'Nto': ['Letzer Aufzeichnungslastwechsel', 'Last Cycle'], + 't': ['Zeitfolgen', 'Time Series'], + 'speciment_diameter': ['Durchmesser (mm)', 'Diameter (mm)'], + 'speciment_height': ['Länge (mm)', 'Length (mm)'], + } #list of names self.data_column_names = { 'time': ['Time Series'], @@ -132,7 +135,6 @@ class CITT_PTMDortmund(DataSineLoad): diameter = [] height = [] - for sheetid in range(num_sheets): temp = pd.read_excel(self.data, sheetid, skiprows=97) temp = temp.drop(index=0) @@ -141,24 +143,23 @@ class CITT_PTMDortmund(DataSineLoad): for col in temp.columns: temp[col] = pd.to_numeric(temp[col]) - #read metadata from file - meta = pd.read_excel(self.data, sheetid, - skiprows=1, - nrows=80) + meta = pd.read_excel(self.data, sheetid, skiprows=1, nrows=80) meta = meta[meta.columns[[0, 2]]] - meta = meta.set_index( - meta.columns[0]) - + meta = meta.set_index(meta.columns[0]) + meta = meta.dropna(axis=0) meta = meta[meta.columns[0]] - + meta = meta.to_dict() - + #remove whitespace in dict keys: - meta = {x.strip(): v for x, v in meta.items() if isinstance(x, str)} + meta = { + x.strip(): v + for x, v in meta.items() if isinstance(x, str) + } frequency_test = None # add metadata to dataframe @@ -167,21 +168,21 @@ class CITT_PTMDortmund(DataSineLoad): v = None for name in names: try: - v = np.round(float(meta[name]),5) - + v = np.round(float(meta[name]), 5) + if par == 'f': - v = np.round(v,2) - + v = np.round(v, 2) + break except: pass - + assert v is not None temp[par] = v - + if par == 'f': frequency_test = v - + # read additional parameters names = self.meta_names_of_parameter['Nfrom'] for name in names: @@ -191,7 +192,7 @@ class CITT_PTMDortmund(DataSineLoad): except: Nfrom = None assert Nfrom is not None - + names = self.meta_names_of_parameter['Nto'] for name in names: try: @@ -209,32 +210,29 @@ class CITT_PTMDortmund(DataSineLoad): break except: time_idx = None - assert time_idx is not None - - temp['N'] = 0 + assert time_idx is not None - self._logger.info(f'cycles from {Nfrom} to {Nto}') + temp['N'] = 0 #BUG: Ist in Messdatei falsch definiert und wird von PTM angepasst. ''' #for cycle in range(Nfrom, Nto+1): - - dt = 1.0/frequency_test - + + dt = 1.0 / frequency_test + tmax = dt max_timeindex = max(time_idx) - + cycle = 0 while tmax < max_timeindex: # time window - tmin = (cycle) * dt + tmin = (cycle) * dt tmax = (cycle + 1) * dt #filter data - idx = temp[(time_idx >= tmin) - & (time_idx < tmax)].index - + idx = temp[(time_idx >= tmin) & (time_idx < tmax)].index + #set cycle number temp.loc[idx, 'N'] = cycle - + cycle += 1 # add diameter and height to list @@ -247,7 +245,7 @@ class CITT_PTMDortmund(DataSineLoad): v = None assert v is not None diameter.append(v) - + names = self.meta_names_of_parameter['speciment_height'] for name in names: try: @@ -257,7 +255,7 @@ class CITT_PTMDortmund(DataSineLoad): v = None assert v is not None height.append(v) - + #append data to final dataframe res.append(temp) @@ -270,13 +268,11 @@ class CITT_PTMDortmund(DataSineLoad): # self.metadata['speciment_diameter'] = np.mean(diameter) #if not 'speciment_height' in self.metadata: # self.metadata['speciment_height'] = np.mean(height) - - #define in class self.data = res.reset_index() self.metadata.update(meta) # log infos - self._logger.debug(self.metadata) - self._logger.debug(self.data.head()) \ No newline at end of file + self._logger.info(self.metadata) + self._logger.info(self.data.head()) \ No newline at end of file diff --git a/tests/analysis/citt_test.py b/tests/analysis/citt_test.py index 93f85c5..326b0b3 100644 --- a/tests/analysis/citt_test.py +++ b/tests/analysis/citt_test.py @@ -34,7 +34,7 @@ def test_citt_ptmdortmund(): res.run() fit = res.fit.reset_index() - + logger.info(fit.head()) assert len(fit) == 5 m = res_dict[filename] @@ -42,7 +42,9 @@ def test_citt_ptmdortmund(): for col in ['F', 's_hor_sum', 's_hor_1', 's_hor_2']: assert all(fit[f'fit_{col}_r2'] >= m['min_r2']) - sel = fit[(fit['f']==10.0) & (fit['sigma']==0.2) & (fit['T']==20.0)].iloc[0] + + + sel = fit[(fit['f']==10.0) & (fit['T']==20.0)].iloc[0] Emin = (1-m['max_diff'])*m['stiffness_10Hz'] Emax = (1+m['max_diff'])*m['stiffness_10Hz'] diff --git a/tests/data/citt/PTM_Dortmund/meta.toml b/tests/data/citt/PTM_Dortmund/meta.toml index 00a41e7..e79979c 100644 --- a/tests/data/citt/PTM_Dortmund/meta.toml +++ b/tests/data/citt/PTM_Dortmund/meta.toml @@ -1,4 +1,14 @@ ["sample_01.xlsm"] min_r2 = 0.993 max_diff = 0.005 #% -stiffness_10Hz = 2269.0 #MPa \ No newline at end of file +stiffness_10Hz = 2269.0 #MPa + +["sample_02.xlsm"] +min_r2 = 0.993 +max_diff = 0.005 #% +stiffness_10Hz = 2250.0 #MPa + +["sample_03.xlsm"] +min_r2 = 0.993 +max_diff = 0.005 #% +stiffness_10Hz = 2231.0 #MPa \ No newline at end of file diff --git a/tests/data/citt/PTM_Dortmund/sample_02.xlsm b/tests/data/citt/PTM_Dortmund/sample_02.xlsm new file mode 100755 index 0000000..a34c748 Binary files /dev/null and b/tests/data/citt/PTM_Dortmund/sample_02.xlsm differ diff --git a/tests/data/citt/PTM_Dortmund/sample_03.xlsm b/tests/data/citt/PTM_Dortmund/sample_03.xlsm new file mode 100755 index 0000000..95d32fe Binary files /dev/null and b/tests/data/citt/PTM_Dortmund/sample_03.xlsm differ