CITT Dortmund läuft, Tests hinzugefügt

This commit is contained in:
Markus Clauß
2023-02-28 16:11:55 +01:00
parent e861dbf10e
commit e5c9f6904c
7 changed files with 136 additions and 124 deletions

View File

@@ -1,2 +1,5 @@
link: link:
pip install -e ./ pip install -e ./
test:
pytest -v -log_cli=True --log-cli-level=INFO tests

View File

@@ -18,19 +18,22 @@ class DataSineLoad():
def __init__(self, def __init__(self,
filename: str, filename: str,
metadata: dict, metadata: dict,
archive: bool = True, logger=None,
debug: bool = False, debug: bool = False,
data: None | io.BytesIO = None): data: None | io.BytesIO = None):
self.filename = filename self.filename = filename
self.metadata = metadata self.metadata = metadata
if isinstance(data, io.BytesIO): if isinstance(data, io.BytesIO):
self.data = data self.data = data
self.archive_data = archive
self.debug = debug self.debug = debug
self._logger = logging.getLogger(__name__) if logger == None:
self._logger = logging.getLogger(__name__)
else:
self._logger = logger
self._logger.info( self._logger.info(
f'filename s3: {self.filename}, metadata: {self.metadata}') f'filename s3: {self.filename}, metadata: {self.metadata}')
@@ -38,16 +41,23 @@ class DataSineLoad():
self._pre_run() self._pre_run()
def _set_parameter(self): def _set_parameter(self):
self._logger.debug('run _set_parameter')
self.split_data_based_on_parameter = ['T', 'sigma', 'f'] self.split_data_based_on_parameter = ['T', 'sigma', 'f']
self.col_as_int = ['N'] self.col_as_int = ['N']
self.col_as_float = ['T', 'F', 's_piston', 's_hor_1', 'f', 's_hor_1', 's_hor_2'] self.col_as_float = [
'T', 'F', 's_piston', 's_hor_1', 'f', 's_hor_1', 's_hor_2'
]
self.val_col_names = [
'time', 'T', 'f', 'sigma', 'N', 'F', 's_hor_1', 's_hor_2'
]
self.columns_analyse = [
'F', 's_hor_sum', 's_hor_1', 's_hor_2', 's_piston'
]
self.val_col_names = ['time', 'T', 'f', 'sigma', 'N', 'F', 's_hor_1', 's_hor_2']
self.columns_analyse = ['F','s_hor_sum','s_hor_1','s_hor_2','s_piston']
# Header names after standardization; check if exists # Header names after standardization; check if exists
self.val_header_names = ['speciment_height', 'speciment_diameter'] self.val_header_names = ['speciment_height', 'speciment_diameter']
@@ -56,7 +66,7 @@ class DataSineLoad():
self.meta_names_of_parameter = { self.meta_names_of_parameter = {
'sigma': ['Max. Spannung'] 'sigma': ['Max. Spannung']
} #list of names } #list of names
self.data_column_names = { self.data_column_names = {
'time': ['Time Series'], 'time': ['Time Series'],
'F': ['Load Series'], 'F': ['Load Series'],
@@ -75,12 +85,12 @@ class DataSineLoad():
self.unit_t = 1 / 1000. #s self.unit_t = 1 / 1000. #s
def _connect_to_s3(self): def _connect_to_s3(self):
self._logger.info('connect to db') self._logger.debug('run _connect to db')
self.__minioClient = get_minio_client_processing() self.__minioClient = get_minio_client_processing()
def _read_from_s3_to_bytesio(self): def _read_from_s3_to_bytesio(self):
self._logger.info('read bytes') self._logger.debug('run _read bytes')
try: try:
self._connect_to_s3() self._connect_to_s3()
@@ -92,55 +102,57 @@ class DataSineLoad():
response.release_conn() response.release_conn()
self.data = io.BytesIO(self.data) self.data = io.BytesIO(self.data)
self._logger.debug('data read from s3')
def _calc_hash_of_bytesio(self): def _calc_hash_of_bytesio(self):
self._logger.debug('run _calc_hash_of_bytesio')
self.filehash = calc_hash_of_bytes(self.data) self.filehash = calc_hash_of_bytes(self.data)
self.data.seek(0) self.data.seek(0)
self._logger.debug(f'Hash of file: {self.filehash}') self._logger.debug(f'Hash of file: {self.filehash}')
def _process_data(self): def _process_data(self):
""" convert self.data (BytesIO) to pandas.DataFrame, update """ convert self.data (BytesIO) to pandas.DataFrame, update
self.metadata with informations from file """ self.metadata with informations from file """
self._logger.debug('convert bytes to pandas.DataFrame') self._logger.debug('convert bytes to pandas.DataFrame')
encoding = 'utf-8' encoding = 'utf-8'
self.data = pd.read_csv(self.data, encoding=encoding) self.data = pd.read_csv(self.data, encoding=encoding)
def _standardize_data(self): def _standardize_data(self):
self._logger.debug('run _standardize_data')
colnames = list(self.data.columns) colnames = list(self.data.columns)
for par, names in self.data_column_names.items(): for par, names in self.data_column_names.items():
for name in names: for name in names:
colnames = [sub.replace(name, par) for sub in colnames] colnames = [sub.replace(name, par) for sub in colnames]
self.data.columns = colnames self.data.columns = colnames
print(self.data.head(5))
def _standardize_meta(self): def _standardize_meta(self):
self._logger.debug('run _standardize_meta')
for par, names in self.meta_names_of_parameter.items(): for par, names in self.meta_names_of_parameter.items():
for name in names: for name in names:
if name in self.metadata: if name in self.metadata:
self.metadata[par] = self.metadata[name] self.metadata[par] = self.metadata[name]
self.metadata.pop(name) self.metadata.pop(name)
break break
def _validate_data(self): def _validate_data(self):
self._logger.debug('run _validate_data')
for name in self.val_col_names: for name in self.val_col_names:
if not name in self.data.columns: if not name in self.data.columns:
raise raise
def _validate_meta(self): def _validate_meta(self):
self._logger.debug('run _validate_meta')
for name in self.val_header_names: for name in self.val_header_names:
if not name in self.metadata: if not name in self.metadata:
raise raise
@@ -160,14 +172,12 @@ class DataSineLoad():
return True return True
def _post_select_importent_columns(self): def _post_select_importent_columns(self):
# TODO: add more columns, check datamodel # TODO: add more columns, check datamodel
self.data = self.data[self.val_col_names] self.data = self.data[self.val_col_names]
def _post_calc_missiong_values(self): def _post_calc_missiong_values(self):
cols = self.data.columns cols = self.data.columns
@@ -191,7 +201,8 @@ class DataSineLoad():
return True return True
def _fit_split_data(self): def _fit_split_data(self):
self._logger.debug('run _fit_split_data')
data_gp = self.data.groupby(self.split_data_based_on_parameter) data_gp = self.data.groupby(self.split_data_based_on_parameter)
data_list = [] data_list = []
@@ -229,6 +240,9 @@ class DataSineLoad():
self.data = data_list self.data = data_list
#break #break
nchunks = len(self.data)
self._logger.debug(f'data splited in {nchunks} chunks')
def _fit_select_data(self): def _fit_select_data(self):
""" """
select N load cycles from original data select N load cycles from original data
@@ -236,9 +250,11 @@ class DataSineLoad():
(b) last N cycles (b) last N cycles
""" """
self._logger.debug('run _fit_select_data')
def sel_df(df, num=5): def sel_df(df, num=5):
N = df['N'].unique() N = df['N'].unique()
freq = float(df['f'].unique()[0]) freq = float(df['f'].unique()[0])
@@ -264,21 +280,20 @@ class DataSineLoad():
else: else:
Nfrom = None Nfrom = None
Nto = None Nto = None
# Fall 1: nicht alle LW in Datei # Fall 1: nicht alle LW in Datei
if (max(N) < Nto) & (len(N) >= num): if (max(N) < Nto) & (len(N) >= num):
df_sel = df[(df['N'] >= N[-num]) & (df['N'] <= N[-1])] df_sel = df[(df['N'] >= N[-num]) & (df['N'] <= N[-1])]
# Fall 2: # Fall 2:
else: else:
if Nfrom != None: if Nfrom != None:
if len(N) > Nto - Nfrom: if len(N) > Nto - Nfrom:
df_sel = df[(df['N'] >= Nfrom) & (df['N'] <= Nto)] df_sel = df[(df['N'] >= Nfrom) & (df['N'] <= Nto)]
return df_sel return df_sel
if not isinstance(self.data, list): if not isinstance(self.data, list):
if self.number_of_load_cycles_for_analysis > 1: if self.number_of_load_cycles_for_analysis > 1:
df_sel = [ df_sel = [
@@ -292,7 +307,8 @@ class DataSineLoad():
df_sel = [] df_sel = []
for d in self.data: for d in self.data:
if self.number_of_load_cycles_for_analysis > 1: if self.number_of_load_cycles_for_analysis > 1:
d_sel = sel_df(d,num=self.number_of_load_cycles_for_analysis) d_sel = sel_df(d,
num=self.number_of_load_cycles_for_analysis)
else: else:
d_sel = d d_sel = d
@@ -300,38 +316,35 @@ class DataSineLoad():
# replace data # replace data
self.data = df_sel self.data = df_sel
def _calc(self): def _calc(self):
print(len(self.data))
self.fit = [] self.fit = []
for idx_data, data in enumerate(self.data): for idx_data, data in enumerate(self.data):
if data is None: continue if data is None: continue
if len(data) < 10: continue if len(data) < 10: continue
data.index = data.index - data.index[0] data.index = data.index - data.index[0]
res_temp = {} res_temp = {}
x = data.index.values x = data.index.values
freq = np.round(float(data['f'].unique()),2) freq = np.round(float(data['f'].unique()), 2)
sigma = float(data['sigma'].unique()) sigma = float(data['sigma'].unique())
temperature = float(data['T'].unique()) temperature = float(data['T'].unique())
for idxcol, col in enumerate(self.columns_analyse): for idxcol, col in enumerate(self.columns_analyse):
if not col in data.columns: continue if not col in data.columns: continue
y = data[col].values y = data[col].values
res = fit_cos(x,y, freq=freq) res = fit_cos(x, y, freq=freq)
for key, value in res.items(): for key, value in res.items():
res_temp[f'fit_{col}_{key}'] = value res_temp[f'fit_{col}_{key}'] = value
res_temp[f'fit_{col}_max'] = max(y) res_temp[f'fit_{col}_max'] = max(y)
res_temp[f'fit_{col}_min'] = min(y) res_temp[f'fit_{col}_min'] = min(y)
@@ -343,9 +356,9 @@ class DataSineLoad():
deltaF = res_temp['fit_F_amp'] deltaF = res_temp['fit_F_amp']
nu = calc_nu(temperature) nu = calc_nu(temperature)
res_temp['nu'] = nu res_temp['nu'] = nu
h = float(self.metadata['speciment_height']) h = float(self.metadata['speciment_height'])
deltaU = res_temp['fit_s_hor_sum_amp'] deltaU = res_temp['fit_s_hor_sum_amp']
res_temp['E'] = (deltaF * (0.274 + nu)) / (h * deltaU) res_temp['E'] = (deltaF * (0.274 + nu)) / (h * deltaU)
@@ -353,19 +366,12 @@ class DataSineLoad():
self.fit.append(res_temp) self.fit.append(res_temp)
self.fit = pd.DataFrame.from_records(self.fit) self.fit = pd.DataFrame.from_records(self.fit)
self.fit = self.fit.set_index(['T', 'f', 'sigma'])
print(self.fit)
def _archive_binary_data(self):
self._logger.debug('send file to archive') self.fit = self.fit.set_index(['T', 'f', 'sigma'])
app.send_task(
'ArchiveFile', nsamples = len(self.fit)
args=[self.filename, self.metadata, self.filehash, 'org', 'citt'], self._logger.info(f'fitting finished, add {nsamples} samples')
queue='archive') self._logger.debug(self.fit['E'])
def _pre_run(self): def _pre_run(self):
@@ -396,9 +402,4 @@ class DataSineLoad():
self._fit_select_data() self._fit_select_data()
self._calc() self._calc()
#self._logger.debug(f'results: {res}') #self._logger.info(f'results: {self.fit['E']}')
#if self.archive_data:
# self._archive_binary_data()
#return res

View File

@@ -5,19 +5,21 @@ from csv import reader
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from paveit.labtest import DataSineLoad from paveit.labtest import DataSineLoad
from torch import isin
class CITTBase(DataSineLoad): class CITTBase(DataSineLoad):
def _calc(self): def _calc(self):
return (self.df.mean().mean(), self.df.max().max()) return (self.df.mean().mean(), self.df.max().max())
class CITT_KIT(DataSineLoad): class CITT_KIT(DataSineLoad):
def _calc(self): def _calc(self):
return (self.df.mean().mean(), self.df.max().max()) return (self.df.mean().mean(), self.df.max().max())
def _process_data(self): def _process_data(self):
logger.debug('convert bytes to pandas.DataFrame') self._logger.debug('convert bytes to pandas.DataFrame')
self.data.seek(0) self.data.seek(0)
with io.TextIOWrapper(self.data, encoding='latin-1') as read_obj: with io.TextIOWrapper(self.data, encoding='latin-1') as read_obj:
@@ -82,7 +84,6 @@ class CITT_KIT(DataSineLoad):
idx = t[(t['ZEIT'] >= tmin) & (t['ZEIT'] < tmax)].index idx = t[(t['ZEIT'] >= tmin) & (t['ZEIT'] < tmax)].index
N[idx] = i N[idx] = i
t['N'] = N t['N'] = N
res.append(t) res.append(t)
@@ -96,8 +97,9 @@ class CITT_KIT(DataSineLoad):
#define in class #define in class
self.data = res.reset_index() self.data = res.reset_index()
class CITT_PTMDortmund(DataSineLoad): class CITT_PTMDortmund(DataSineLoad):
def _define_units(self): def _define_units(self):
self.unit_s = 1 #mm self.unit_s = 1 #mm
@@ -106,15 +108,16 @@ class CITT_PTMDortmund(DataSineLoad):
def update_parameter(self): def update_parameter(self):
self.meta_names_of_parameter = {'sigma': ['Max. Spannung', 'Max Stress'], self.meta_names_of_parameter = {
'f': ['Frequenz', 'Frequency'], 'sigma': ['Max. Spannung', 'Max Stress'],
'T': ['Versuchstemperatur', 'Target Test Temperature'], 'f': ['Frequenz', 'Frequency'],
'Nfrom': ['Erster Aufzeichnungslastwechsel', 'Start Cycle'], 'T': ['Versuchstemperatur', 'Target Test Temperature'],
'Nto': ['Letzer Aufzeichnungslastwechsel', 'Last Cycle'], 'Nfrom': ['Erster Aufzeichnungslastwechsel', 'Start Cycle'],
't': ['Zeitfolgen', 'Time Series'], 'Nto': ['Letzer Aufzeichnungslastwechsel', 'Last Cycle'],
'speciment_diameter': ['Durchmesser (mm)', 'Diameter (mm)'], 't': ['Zeitfolgen', 'Time Series'],
'speciment_height': ['Länge (mm)', 'Length (mm)'], 'speciment_diameter': ['Durchmesser (mm)', 'Diameter (mm)'],
} #list of names 'speciment_height': ['Länge (mm)', 'Length (mm)'],
} #list of names
self.data_column_names = { self.data_column_names = {
'time': ['Time Series'], 'time': ['Time Series'],
@@ -132,7 +135,6 @@ class CITT_PTMDortmund(DataSineLoad):
diameter = [] diameter = []
height = [] height = []
for sheetid in range(num_sheets): for sheetid in range(num_sheets):
temp = pd.read_excel(self.data, sheetid, skiprows=97) temp = pd.read_excel(self.data, sheetid, skiprows=97)
temp = temp.drop(index=0) temp = temp.drop(index=0)
@@ -141,24 +143,23 @@ class CITT_PTMDortmund(DataSineLoad):
for col in temp.columns: for col in temp.columns:
temp[col] = pd.to_numeric(temp[col]) temp[col] = pd.to_numeric(temp[col])
#read metadata from file #read metadata from file
meta = pd.read_excel(self.data, sheetid, meta = pd.read_excel(self.data, sheetid, skiprows=1, nrows=80)
skiprows=1,
nrows=80)
meta = meta[meta.columns[[0, 2]]] meta = meta[meta.columns[[0, 2]]]
meta = meta.set_index( meta = meta.set_index(meta.columns[0])
meta.columns[0])
meta = meta.dropna(axis=0) meta = meta.dropna(axis=0)
meta = meta[meta.columns[0]] meta = meta[meta.columns[0]]
meta = meta.to_dict() meta = meta.to_dict()
#remove whitespace in dict keys: #remove whitespace in dict keys:
meta = {x.strip(): v for x, v in meta.items() if isinstance(x, str)} meta = {
x.strip(): v
for x, v in meta.items() if isinstance(x, str)
}
frequency_test = None frequency_test = None
# add metadata to dataframe # add metadata to dataframe
@@ -167,21 +168,21 @@ class CITT_PTMDortmund(DataSineLoad):
v = None v = None
for name in names: for name in names:
try: try:
v = np.round(float(meta[name]),5) v = np.round(float(meta[name]), 5)
if par == 'f': if par == 'f':
v = np.round(v,2) v = np.round(v, 2)
break break
except: except:
pass pass
assert v is not None assert v is not None
temp[par] = v temp[par] = v
if par == 'f': if par == 'f':
frequency_test = v frequency_test = v
# read additional parameters # read additional parameters
names = self.meta_names_of_parameter['Nfrom'] names = self.meta_names_of_parameter['Nfrom']
for name in names: for name in names:
@@ -191,7 +192,7 @@ class CITT_PTMDortmund(DataSineLoad):
except: except:
Nfrom = None Nfrom = None
assert Nfrom is not None assert Nfrom is not None
names = self.meta_names_of_parameter['Nto'] names = self.meta_names_of_parameter['Nto']
for name in names: for name in names:
try: try:
@@ -209,32 +210,29 @@ class CITT_PTMDortmund(DataSineLoad):
break break
except: except:
time_idx = None time_idx = None
assert time_idx is not None assert time_idx is not None
temp['N'] = 0
self._logger.info(f'cycles from {Nfrom} to {Nto}') temp['N'] = 0
#BUG: Ist in Messdatei falsch definiert und wird von PTM angepasst. ''' #BUG: Ist in Messdatei falsch definiert und wird von PTM angepasst. '''
#for cycle in range(Nfrom, Nto+1): #for cycle in range(Nfrom, Nto+1):
dt = 1.0/frequency_test dt = 1.0 / frequency_test
tmax = dt tmax = dt
max_timeindex = max(time_idx) max_timeindex = max(time_idx)
cycle = 0 cycle = 0
while tmax < max_timeindex: while tmax < max_timeindex:
# time window # time window
tmin = (cycle) * dt tmin = (cycle) * dt
tmax = (cycle + 1) * dt tmax = (cycle + 1) * dt
#filter data #filter data
idx = temp[(time_idx >= tmin) idx = temp[(time_idx >= tmin) & (time_idx < tmax)].index
& (time_idx < tmax)].index
#set cycle number #set cycle number
temp.loc[idx, 'N'] = cycle temp.loc[idx, 'N'] = cycle
cycle += 1 cycle += 1
# add diameter and height to list # add diameter and height to list
@@ -247,7 +245,7 @@ class CITT_PTMDortmund(DataSineLoad):
v = None v = None
assert v is not None assert v is not None
diameter.append(v) diameter.append(v)
names = self.meta_names_of_parameter['speciment_height'] names = self.meta_names_of_parameter['speciment_height']
for name in names: for name in names:
try: try:
@@ -257,7 +255,7 @@ class CITT_PTMDortmund(DataSineLoad):
v = None v = None
assert v is not None assert v is not None
height.append(v) height.append(v)
#append data to final dataframe #append data to final dataframe
res.append(temp) res.append(temp)
@@ -270,13 +268,11 @@ class CITT_PTMDortmund(DataSineLoad):
# self.metadata['speciment_diameter'] = np.mean(diameter) # self.metadata['speciment_diameter'] = np.mean(diameter)
#if not 'speciment_height' in self.metadata: #if not 'speciment_height' in self.metadata:
# self.metadata['speciment_height'] = np.mean(height) # self.metadata['speciment_height'] = np.mean(height)
#define in class #define in class
self.data = res.reset_index() self.data = res.reset_index()
self.metadata.update(meta) self.metadata.update(meta)
# log infos # log infos
self._logger.debug(self.metadata) self._logger.info(self.metadata)
self._logger.debug(self.data.head()) self._logger.info(self.data.head())

View File

@@ -34,7 +34,7 @@ def test_citt_ptmdortmund():
res.run() res.run()
fit = res.fit.reset_index() fit = res.fit.reset_index()
logger.info(fit.head())
assert len(fit) == 5 assert len(fit) == 5
m = res_dict[filename] m = res_dict[filename]
@@ -42,7 +42,9 @@ def test_citt_ptmdortmund():
for col in ['F', 's_hor_sum', 's_hor_1', 's_hor_2']: for col in ['F', 's_hor_sum', 's_hor_1', 's_hor_2']:
assert all(fit[f'fit_{col}_r2'] >= m['min_r2']) assert all(fit[f'fit_{col}_r2'] >= m['min_r2'])
sel = fit[(fit['f']==10.0) & (fit['sigma']==0.2) & (fit['T']==20.0)].iloc[0]
sel = fit[(fit['f']==10.0) & (fit['T']==20.0)].iloc[0]
Emin = (1-m['max_diff'])*m['stiffness_10Hz'] Emin = (1-m['max_diff'])*m['stiffness_10Hz']
Emax = (1+m['max_diff'])*m['stiffness_10Hz'] Emax = (1+m['max_diff'])*m['stiffness_10Hz']

View File

@@ -1,4 +1,14 @@
["sample_01.xlsm"] ["sample_01.xlsm"]
min_r2 = 0.993 min_r2 = 0.993
max_diff = 0.005 #% max_diff = 0.005 #%
stiffness_10Hz = 2269.0 #MPa stiffness_10Hz = 2269.0 #MPa
["sample_02.xlsm"]
min_r2 = 0.993
max_diff = 0.005 #%
stiffness_10Hz = 2250.0 #MPa
["sample_03.xlsm"]
min_r2 = 0.993
max_diff = 0.005 #%
stiffness_10Hz = 2231.0 #MPa

Binary file not shown.

Binary file not shown.