CITT Dortmund läuft, Tests hinzugefügt

This commit is contained in:
Markus Clauß
2023-02-28 16:11:55 +01:00
parent e861dbf10e
commit e5c9f6904c
7 changed files with 136 additions and 124 deletions

View File

@@ -1,2 +1,5 @@
link: link:
pip install -e ./ pip install -e ./
test:
pytest -v -log_cli=True --log-cli-level=INFO tests

View File

@@ -18,19 +18,22 @@ class DataSineLoad():
def __init__(self, def __init__(self,
filename: str, filename: str,
metadata: dict, metadata: dict,
archive: bool = True, logger=None,
debug: bool = False, debug: bool = False,
data: None | io.BytesIO = None): data: None | io.BytesIO = None):
self.filename = filename self.filename = filename
self.metadata = metadata self.metadata = metadata
if isinstance(data, io.BytesIO): if isinstance(data, io.BytesIO):
self.data = data self.data = data
self.archive_data = archive
self.debug = debug self.debug = debug
if logger == None:
self._logger = logging.getLogger(__name__) self._logger = logging.getLogger(__name__)
else:
self._logger = logger
self._logger.info( self._logger.info(
f'filename s3: {self.filename}, metadata: {self.metadata}') f'filename s3: {self.filename}, metadata: {self.metadata}')
@@ -38,15 +41,22 @@ class DataSineLoad():
self._pre_run() self._pre_run()
def _set_parameter(self): def _set_parameter(self):
self._logger.debug('run _set_parameter')
self.split_data_based_on_parameter = ['T', 'sigma', 'f'] self.split_data_based_on_parameter = ['T', 'sigma', 'f']
self.col_as_int = ['N'] self.col_as_int = ['N']
self.col_as_float = ['T', 'F', 's_piston', 's_hor_1', 'f', 's_hor_1', 's_hor_2'] self.col_as_float = [
'T', 'F', 's_piston', 's_hor_1', 'f', 's_hor_1', 's_hor_2'
]
self.val_col_names = ['time', 'T', 'f', 'sigma', 'N', 'F', 's_hor_1', 's_hor_2'] self.val_col_names = [
'time', 'T', 'f', 'sigma', 'N', 'F', 's_hor_1', 's_hor_2'
]
self.columns_analyse = ['F','s_hor_sum','s_hor_1','s_hor_2','s_piston'] self.columns_analyse = [
'F', 's_hor_sum', 's_hor_1', 's_hor_2', 's_piston'
]
# Header names after standardization; check if exists # Header names after standardization; check if exists
self.val_header_names = ['speciment_height', 'speciment_diameter'] self.val_header_names = ['speciment_height', 'speciment_diameter']
@@ -75,12 +85,12 @@ class DataSineLoad():
self.unit_t = 1 / 1000. #s self.unit_t = 1 / 1000. #s
def _connect_to_s3(self): def _connect_to_s3(self):
self._logger.info('connect to db') self._logger.debug('run _connect to db')
self.__minioClient = get_minio_client_processing() self.__minioClient = get_minio_client_processing()
def _read_from_s3_to_bytesio(self): def _read_from_s3_to_bytesio(self):
self._logger.info('read bytes') self._logger.debug('run _read bytes')
try: try:
self._connect_to_s3() self._connect_to_s3()
@@ -92,15 +102,16 @@ class DataSineLoad():
response.release_conn() response.release_conn()
self.data = io.BytesIO(self.data) self.data = io.BytesIO(self.data)
self._logger.debug('data read from s3')
def _calc_hash_of_bytesio(self): def _calc_hash_of_bytesio(self):
self._logger.debug('run _calc_hash_of_bytesio')
self.filehash = calc_hash_of_bytes(self.data) self.filehash = calc_hash_of_bytes(self.data)
self.data.seek(0) self.data.seek(0)
self._logger.debug(f'Hash of file: {self.filehash}') self._logger.debug(f'Hash of file: {self.filehash}')
def _process_data(self): def _process_data(self):
""" convert self.data (BytesIO) to pandas.DataFrame, update """ convert self.data (BytesIO) to pandas.DataFrame, update
self.metadata with informations from file """ self.metadata with informations from file """
@@ -110,6 +121,7 @@ class DataSineLoad():
self.data = pd.read_csv(self.data, encoding=encoding) self.data = pd.read_csv(self.data, encoding=encoding)
def _standardize_data(self): def _standardize_data(self):
self._logger.debug('run _standardize_data')
colnames = list(self.data.columns) colnames = list(self.data.columns)
@@ -119,10 +131,8 @@ class DataSineLoad():
self.data.columns = colnames self.data.columns = colnames
print(self.data.head(5))
def _standardize_meta(self): def _standardize_meta(self):
self._logger.debug('run _standardize_meta')
for par, names in self.meta_names_of_parameter.items(): for par, names in self.meta_names_of_parameter.items():
for name in names: for name in names:
@@ -134,13 +144,15 @@ class DataSineLoad():
break break
def _validate_data(self): def _validate_data(self):
self._logger.debug('run _validate_data')
for name in self.val_col_names: for name in self.val_col_names:
if not name in self.data.columns: if not name in self.data.columns:
raise raise
def _validate_meta(self): def _validate_meta(self):
self._logger.debug('run _validate_meta')
for name in self.val_header_names: for name in self.val_header_names:
if not name in self.metadata: if not name in self.metadata:
raise raise
@@ -165,10 +177,8 @@ class DataSineLoad():
self.data = self.data[self.val_col_names] self.data = self.data[self.val_col_names]
def _post_calc_missiong_values(self): def _post_calc_missiong_values(self):
cols = self.data.columns cols = self.data.columns
if not 's_hor_sum' in cols: if not 's_hor_sum' in cols:
@@ -191,6 +201,7 @@ class DataSineLoad():
return True return True
def _fit_split_data(self): def _fit_split_data(self):
self._logger.debug('run _fit_split_data')
data_gp = self.data.groupby(self.split_data_based_on_parameter) data_gp = self.data.groupby(self.split_data_based_on_parameter)
@@ -229,6 +240,9 @@ class DataSineLoad():
self.data = data_list self.data = data_list
#break #break
nchunks = len(self.data)
self._logger.debug(f'data splited in {nchunks} chunks')
def _fit_select_data(self): def _fit_select_data(self):
""" """
select N load cycles from original data select N load cycles from original data
@@ -237,6 +251,8 @@ class DataSineLoad():
""" """
self._logger.debug('run _fit_select_data')
def sel_df(df, num=5): def sel_df(df, num=5):
N = df['N'].unique() N = df['N'].unique()
@@ -265,7 +281,6 @@ class DataSineLoad():
Nfrom = None Nfrom = None
Nto = None Nto = None
# Fall 1: nicht alle LW in Datei # Fall 1: nicht alle LW in Datei
if (max(N) < Nto) & (len(N) >= num): if (max(N) < Nto) & (len(N) >= num):
df_sel = df[(df['N'] >= N[-num]) & (df['N'] <= N[-1])] df_sel = df[(df['N'] >= N[-num]) & (df['N'] <= N[-1])]
@@ -292,7 +307,8 @@ class DataSineLoad():
df_sel = [] df_sel = []
for d in self.data: for d in self.data:
if self.number_of_load_cycles_for_analysis > 1: if self.number_of_load_cycles_for_analysis > 1:
d_sel = sel_df(d,num=self.number_of_load_cycles_for_analysis) d_sel = sel_df(d,
num=self.number_of_load_cycles_for_analysis)
else: else:
d_sel = d d_sel = d
@@ -303,15 +319,12 @@ class DataSineLoad():
def _calc(self): def _calc(self):
print(len(self.data))
self.fit = [] self.fit = []
for idx_data, data in enumerate(self.data): for idx_data, data in enumerate(self.data):
if data is None: continue if data is None: continue
if len(data) < 10: continue if len(data) < 10: continue
data.index = data.index - data.index[0] data.index = data.index - data.index[0]
res_temp = {} res_temp = {}
@@ -356,16 +369,9 @@ class DataSineLoad():
self.fit = self.fit.set_index(['T', 'f', 'sigma']) self.fit = self.fit.set_index(['T', 'f', 'sigma'])
print(self.fit) nsamples = len(self.fit)
self._logger.info(f'fitting finished, add {nsamples} samples')
self._logger.debug(self.fit['E'])
def _archive_binary_data(self):
self._logger.debug('send file to archive')
app.send_task(
'ArchiveFile',
args=[self.filename, self.metadata, self.filehash, 'org', 'citt'],
queue='archive')
def _pre_run(self): def _pre_run(self):
@@ -396,9 +402,4 @@ class DataSineLoad():
self._fit_select_data() self._fit_select_data()
self._calc() self._calc()
#self._logger.debug(f'results: {res}') #self._logger.info(f'results: {self.fit['E']}')
#if self.archive_data:
# self._archive_binary_data()
#return res

View File

@@ -5,19 +5,21 @@ from csv import reader
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from paveit.labtest import DataSineLoad from paveit.labtest import DataSineLoad
from torch import isin
class CITTBase(DataSineLoad): class CITTBase(DataSineLoad):
def _calc(self): def _calc(self):
return (self.df.mean().mean(), self.df.max().max()) return (self.df.mean().mean(), self.df.max().max())
class CITT_KIT(DataSineLoad): class CITT_KIT(DataSineLoad):
def _calc(self): def _calc(self):
return (self.df.mean().mean(), self.df.max().max()) return (self.df.mean().mean(), self.df.max().max())
def _process_data(self): def _process_data(self):
logger.debug('convert bytes to pandas.DataFrame') self._logger.debug('convert bytes to pandas.DataFrame')
self.data.seek(0) self.data.seek(0)
with io.TextIOWrapper(self.data, encoding='latin-1') as read_obj: with io.TextIOWrapper(self.data, encoding='latin-1') as read_obj:
@@ -82,7 +84,6 @@ class CITT_KIT(DataSineLoad):
idx = t[(t['ZEIT'] >= tmin) & (t['ZEIT'] < tmax)].index idx = t[(t['ZEIT'] >= tmin) & (t['ZEIT'] < tmax)].index
N[idx] = i N[idx] = i
t['N'] = N t['N'] = N
res.append(t) res.append(t)
@@ -96,6 +97,7 @@ class CITT_KIT(DataSineLoad):
#define in class #define in class
self.data = res.reset_index() self.data = res.reset_index()
class CITT_PTMDortmund(DataSineLoad): class CITT_PTMDortmund(DataSineLoad):
def _define_units(self): def _define_units(self):
@@ -106,7 +108,8 @@ class CITT_PTMDortmund(DataSineLoad):
def update_parameter(self): def update_parameter(self):
self.meta_names_of_parameter = {'sigma': ['Max. Spannung', 'Max Stress'], self.meta_names_of_parameter = {
'sigma': ['Max. Spannung', 'Max Stress'],
'f': ['Frequenz', 'Frequency'], 'f': ['Frequenz', 'Frequency'],
'T': ['Versuchstemperatur', 'Target Test Temperature'], 'T': ['Versuchstemperatur', 'Target Test Temperature'],
'Nfrom': ['Erster Aufzeichnungslastwechsel', 'Start Cycle'], 'Nfrom': ['Erster Aufzeichnungslastwechsel', 'Start Cycle'],
@@ -132,7 +135,6 @@ class CITT_PTMDortmund(DataSineLoad):
diameter = [] diameter = []
height = [] height = []
for sheetid in range(num_sheets): for sheetid in range(num_sheets):
temp = pd.read_excel(self.data, sheetid, skiprows=97) temp = pd.read_excel(self.data, sheetid, skiprows=97)
temp = temp.drop(index=0) temp = temp.drop(index=0)
@@ -141,16 +143,12 @@ class CITT_PTMDortmund(DataSineLoad):
for col in temp.columns: for col in temp.columns:
temp[col] = pd.to_numeric(temp[col]) temp[col] = pd.to_numeric(temp[col])
#read metadata from file #read metadata from file
meta = pd.read_excel(self.data, sheetid, meta = pd.read_excel(self.data, sheetid, skiprows=1, nrows=80)
skiprows=1,
nrows=80)
meta = meta[meta.columns[[0, 2]]] meta = meta[meta.columns[[0, 2]]]
meta = meta.set_index( meta = meta.set_index(meta.columns[0])
meta.columns[0])
meta = meta.dropna(axis=0) meta = meta.dropna(axis=0)
meta = meta[meta.columns[0]] meta = meta[meta.columns[0]]
@@ -158,7 +156,10 @@ class CITT_PTMDortmund(DataSineLoad):
meta = meta.to_dict() meta = meta.to_dict()
#remove whitespace in dict keys: #remove whitespace in dict keys:
meta = {x.strip(): v for x, v in meta.items() if isinstance(x, str)} meta = {
x.strip(): v
for x, v in meta.items() if isinstance(x, str)
}
frequency_test = None frequency_test = None
# add metadata to dataframe # add metadata to dataframe
@@ -213,8 +214,6 @@ class CITT_PTMDortmund(DataSineLoad):
temp['N'] = 0 temp['N'] = 0
self._logger.info(f'cycles from {Nfrom} to {Nto}')
#BUG: Ist in Messdatei falsch definiert und wird von PTM angepasst. ''' #BUG: Ist in Messdatei falsch definiert und wird von PTM angepasst. '''
#for cycle in range(Nfrom, Nto+1): #for cycle in range(Nfrom, Nto+1):
@@ -229,8 +228,7 @@ class CITT_PTMDortmund(DataSineLoad):
tmin = (cycle) * dt tmin = (cycle) * dt
tmax = (cycle + 1) * dt tmax = (cycle + 1) * dt
#filter data #filter data
idx = temp[(time_idx >= tmin) idx = temp[(time_idx >= tmin) & (time_idx < tmax)].index
& (time_idx < tmax)].index
#set cycle number #set cycle number
temp.loc[idx, 'N'] = cycle temp.loc[idx, 'N'] = cycle
@@ -271,12 +269,10 @@ class CITT_PTMDortmund(DataSineLoad):
#if not 'speciment_height' in self.metadata: #if not 'speciment_height' in self.metadata:
# self.metadata['speciment_height'] = np.mean(height) # self.metadata['speciment_height'] = np.mean(height)
#define in class #define in class
self.data = res.reset_index() self.data = res.reset_index()
self.metadata.update(meta) self.metadata.update(meta)
# log infos # log infos
self._logger.debug(self.metadata) self._logger.info(self.metadata)
self._logger.debug(self.data.head()) self._logger.info(self.data.head())

View File

@@ -34,7 +34,7 @@ def test_citt_ptmdortmund():
res.run() res.run()
fit = res.fit.reset_index() fit = res.fit.reset_index()
logger.info(fit.head())
assert len(fit) == 5 assert len(fit) == 5
m = res_dict[filename] m = res_dict[filename]
@@ -42,7 +42,9 @@ def test_citt_ptmdortmund():
for col in ['F', 's_hor_sum', 's_hor_1', 's_hor_2']: for col in ['F', 's_hor_sum', 's_hor_1', 's_hor_2']:
assert all(fit[f'fit_{col}_r2'] >= m['min_r2']) assert all(fit[f'fit_{col}_r2'] >= m['min_r2'])
sel = fit[(fit['f']==10.0) & (fit['sigma']==0.2) & (fit['T']==20.0)].iloc[0]
sel = fit[(fit['f']==10.0) & (fit['T']==20.0)].iloc[0]
Emin = (1-m['max_diff'])*m['stiffness_10Hz'] Emin = (1-m['max_diff'])*m['stiffness_10Hz']
Emax = (1+m['max_diff'])*m['stiffness_10Hz'] Emax = (1+m['max_diff'])*m['stiffness_10Hz']

View File

@@ -2,3 +2,13 @@
min_r2 = 0.993 min_r2 = 0.993
max_diff = 0.005 #% max_diff = 0.005 #%
stiffness_10Hz = 2269.0 #MPa stiffness_10Hz = 2269.0 #MPa
["sample_02.xlsm"]
min_r2 = 0.993
max_diff = 0.005 #%
stiffness_10Hz = 2250.0 #MPa
["sample_03.xlsm"]
min_r2 = 0.993
max_diff = 0.005 #%
stiffness_10Hz = 2231.0 #MPa

Binary file not shown.

Binary file not shown.