CITT Dortmund läuft, Tests hinzugefügt

2023-02-28 16:11:55 +01:00
parent e861dbf10e
commit e5c9f6904c
7 changed files with 136 additions and 124 deletions
--- a/3
+++ b/3
@@ -1,2 +1,5 @@
 link:
 	pip install -e ./
 test:
 	pytest -v -log_cli=True --log-cli-level=INFO tests
--- a/src/paveit/labtest/base.py
+++ b/src/paveit/labtest/base.py
@@ -18,19 +18,22 @@ class DataSineLoad():
    def __init__(self,
                 filename: str,
                 metadata: dict,
-                 archive: bool = True,
+                 logger=None,
                 debug: bool = False,
                 data: None | io.BytesIO = None):
        self.filename = filename
        self.metadata = metadata
        if isinstance(data, io.BytesIO):
            self.data = data
        self.archive_data = archive
        self.debug = debug
        if logger == None:
            self._logger = logging.getLogger(__name__)
        else:
            self._logger = logger
        self._logger.info(
            f'filename s3: {self.filename}, metadata: {self.metadata}')
@@ -38,15 +41,22 @@ class DataSineLoad():
        self._pre_run()
    def _set_parameter(self):
        self._logger.debug('run _set_parameter')
        self.split_data_based_on_parameter = ['T', 'sigma', 'f']
        self.col_as_int = ['N']
-        self.col_as_float = ['T', 'F', 's_piston', 's_hor_1', 'f', 's_hor_1', 's_hor_2']
+        self.col_as_float = [
            'T', 'F', 's_piston', 's_hor_1', 'f', 's_hor_1', 's_hor_2'
        ]
-        self.val_col_names = ['time', 'T', 'f', 'sigma', 'N', 'F', 's_hor_1', 's_hor_2']
+        self.val_col_names = [
            'time', 'T', 'f', 'sigma', 'N', 'F', 's_hor_1', 's_hor_2'
        ]
-        self.columns_analyse = ['F','s_hor_sum','s_hor_1','s_hor_2','s_piston']
+        self.columns_analyse = [
            'F', 's_hor_sum', 's_hor_1', 's_hor_2', 's_piston'
        ]
        # Header names after standardization; check if exists
        self.val_header_names = ['speciment_height', 'speciment_diameter']
@@ -75,12 +85,12 @@ class DataSineLoad():
        self.unit_t = 1 / 1000.  #s
    def _connect_to_s3(self):
-        self._logger.info('connect to db')
+        self._logger.debug('run _connect to db')
        self.__minioClient = get_minio_client_processing()
    def _read_from_s3_to_bytesio(self):
-        self._logger.info('read bytes')
+        self._logger.debug('run _read bytes')
        try:
            self._connect_to_s3()
@@ -92,15 +102,16 @@ class DataSineLoad():
            response.release_conn()
        self.data = io.BytesIO(self.data)
        self._logger.debug('data read from s3')
    def _calc_hash_of_bytesio(self):
        self._logger.debug('run _calc_hash_of_bytesio')
        self.filehash = calc_hash_of_bytes(self.data)
        self.data.seek(0)
        self._logger.debug(f'Hash of file: {self.filehash}')
    def _process_data(self):
        """ convert self.data (BytesIO) to pandas.DataFrame, update
        self.metadata with informations from file """
@@ -110,6 +121,7 @@ class DataSineLoad():
        self.data = pd.read_csv(self.data, encoding=encoding)
    def _standardize_data(self):
        self._logger.debug('run _standardize_data')
        colnames = list(self.data.columns)
@@ -119,10 +131,8 @@ class DataSineLoad():
        self.data.columns = colnames
        print(self.data.head(5))
    def _standardize_meta(self):
        self._logger.debug('run _standardize_meta')
        for par, names in self.meta_names_of_parameter.items():
            for name in names:
@@ -134,13 +144,15 @@ class DataSineLoad():
                    break
    def _validate_data(self):
        self._logger.debug('run _validate_data')
        for name in self.val_col_names:
            if not name in self.data.columns:
                raise
    def _validate_meta(self):
        self._logger.debug('run _validate_meta')
        for name in self.val_header_names:
            if not name in self.metadata:
                raise
@@ -165,10 +177,8 @@ class DataSineLoad():
        self.data = self.data[self.val_col_names]
    def _post_calc_missiong_values(self):
        cols = self.data.columns
        if not 's_hor_sum' in cols:
@@ -191,6 +201,7 @@ class DataSineLoad():
        return True
    def _fit_split_data(self):
        self._logger.debug('run _fit_split_data')
        data_gp = self.data.groupby(self.split_data_based_on_parameter)
@@ -229,6 +240,9 @@ class DataSineLoad():
            self.data = data_list
            #break
        nchunks = len(self.data)
        self._logger.debug(f'data splited in {nchunks} chunks')
    def _fit_select_data(self):
        """ 
        select N load cycles from original data 
@@ -237,6 +251,8 @@ class DataSineLoad():
        """
        self._logger.debug('run _fit_select_data')
        def sel_df(df, num=5):
            N = df['N'].unique()
@@ -265,7 +281,6 @@ class DataSineLoad():
                Nfrom = None
                Nto = None
            # Fall 1: nicht alle LW in Datei
            if (max(N) < Nto) & (len(N) >= num):
                df_sel = df[(df['N'] >= N[-num]) & (df['N'] <= N[-1])]
@@ -292,7 +307,8 @@ class DataSineLoad():
            df_sel = []
            for d in self.data:
                if self.number_of_load_cycles_for_analysis > 1:
-                    d_sel = sel_df(d,num=self.number_of_load_cycles_for_analysis)
+                    d_sel = sel_df(d,
                                   num=self.number_of_load_cycles_for_analysis)
                else:
                    d_sel = d
@@ -303,15 +319,12 @@ class DataSineLoad():
    def _calc(self):
        print(len(self.data))
        self.fit = []
        for idx_data, data in enumerate(self.data):
            if data is None: continue
            if len(data) < 10: continue
            data.index = data.index - data.index[0]
            res_temp = {}
@@ -356,16 +369,9 @@ class DataSineLoad():
        self.fit = self.fit.set_index(['T', 'f', 'sigma'])
-        print(self.fit)
+        nsamples = len(self.fit)
-        
+        self._logger.info(f'fitting finished, add {nsamples} samples')
-        
+        self._logger.debug(self.fit['E'])
    def _archive_binary_data(self):
        self._logger.debug('send file to archive')
        app.send_task(
            'ArchiveFile',
            args=[self.filename, self.metadata, self.filehash, 'org', 'citt'],
            queue='archive')
    def _pre_run(self):
@@ -396,9 +402,4 @@ class DataSineLoad():
        self._fit_select_data()
        self._calc()
-        #self._logger.debug(f'results: {res}')
+        #self._logger.info(f'results: {self.fit['E']}')
        #if self.archive_data:
        #    self._archive_binary_data()
        #return res
--- a/src/paveit/labtest/citt.py
+++ b/src/paveit/labtest/citt.py
@@ -5,19 +5,21 @@ from csv import reader
 import numpy as np
 import pandas as pd
 from paveit.labtest import DataSineLoad
 from torch import isin
 class CITTBase(DataSineLoad):
    def _calc(self):
        return (self.df.mean().mean(), self.df.max().max())
 class CITT_KIT(DataSineLoad):
    def _calc(self):
        return (self.df.mean().mean(), self.df.max().max())
    def _process_data(self):
-        logger.debug('convert bytes to pandas.DataFrame')
+        self._logger.debug('convert bytes to pandas.DataFrame')
        self.data.seek(0)
        with io.TextIOWrapper(self.data, encoding='latin-1') as read_obj:
@@ -82,7 +84,6 @@ class CITT_KIT(DataSineLoad):
                idx = t[(t['ZEIT'] >= tmin) & (t['ZEIT'] < tmax)].index
                N[idx] = i
            t['N'] = N
            res.append(t)
@@ -96,6 +97,7 @@ class CITT_KIT(DataSineLoad):
        #define in class
        self.data = res.reset_index()
 class CITT_PTMDortmund(DataSineLoad):
    def _define_units(self):
@@ -106,7 +108,8 @@ class CITT_PTMDortmund(DataSineLoad):
    def update_parameter(self):
-        self.meta_names_of_parameter = {'sigma': ['Max. Spannung', 'Max Stress'],
+        self.meta_names_of_parameter = {
            'sigma': ['Max. Spannung', 'Max Stress'],
            'f': ['Frequenz', 'Frequency'],
            'T': ['Versuchstemperatur', 'Target Test Temperature'],
            'Nfrom': ['Erster Aufzeichnungslastwechsel', 'Start Cycle'],
@@ -132,7 +135,6 @@ class CITT_PTMDortmund(DataSineLoad):
        diameter = []
        height = []
        for sheetid in range(num_sheets):
            temp = pd.read_excel(self.data, sheetid, skiprows=97)
            temp = temp.drop(index=0)
@@ -141,16 +143,12 @@ class CITT_PTMDortmund(DataSineLoad):
            for col in temp.columns:
                temp[col] = pd.to_numeric(temp[col])
            #read metadata from file
-            meta = pd.read_excel(self.data, sheetid,
+            meta = pd.read_excel(self.data, sheetid, skiprows=1, nrows=80)
                                      skiprows=1,
                                      nrows=80)
            meta = meta[meta.columns[[0, 2]]]
-            meta = meta.set_index(
+            meta = meta.set_index(meta.columns[0])
                meta.columns[0])
            meta = meta.dropna(axis=0)
            meta = meta[meta.columns[0]]
@@ -158,7 +156,10 @@ class CITT_PTMDortmund(DataSineLoad):
            meta = meta.to_dict()
            #remove whitespace in dict keys:
-            meta = {x.strip(): v for x, v in meta.items() if isinstance(x, str)}
+            meta = {
                x.strip(): v
                for x, v in meta.items() if isinstance(x, str)
            }
            frequency_test = None
            # add metadata to dataframe
@@ -213,8 +214,6 @@ class CITT_PTMDortmund(DataSineLoad):
            temp['N'] = 0
            self._logger.info(f'cycles from {Nfrom} to {Nto}')
            #BUG: Ist in Messdatei falsch definiert und wird von PTM angepasst.            '''
            #for cycle in range(Nfrom, Nto+1):
@@ -229,8 +228,7 @@ class CITT_PTMDortmund(DataSineLoad):
                tmin = (cycle) * dt
                tmax = (cycle + 1) * dt
                #filter data
-                idx = temp[(time_idx >= tmin)
+                idx = temp[(time_idx >= tmin) & (time_idx < tmax)].index
                           & (time_idx < tmax)].index
                #set cycle number
                temp.loc[idx, 'N'] = cycle
@@ -271,12 +269,10 @@ class CITT_PTMDortmund(DataSineLoad):
        #if not 'speciment_height' in self.metadata:
        #    self.metadata['speciment_height'] = np.mean(height)
        #define in class
        self.data = res.reset_index()
        self.metadata.update(meta)
        # log infos
-        self._logger.debug(self.metadata)
+        self._logger.info(self.metadata)
-        self._logger.debug(self.data.head())
+        self._logger.info(self.data.head())
--- a/tests/analysis/citt_test.py
+++ b/tests/analysis/citt_test.py
@@ -34,7 +34,7 @@ def test_citt_ptmdortmund():
        res.run()
        fit = res.fit.reset_index()
-        
+        logger.info(fit.head())
        assert len(fit) == 5
        m = res_dict[filename]
@@ -42,7 +42,9 @@ def test_citt_ptmdortmund():
        for col in ['F', 's_hor_sum', 's_hor_1', 's_hor_2']:
            assert all(fit[f'fit_{col}_r2'] >= m['min_r2'])
-        sel = fit[(fit['f']==10.0) & (fit['sigma']==0.2) & (fit['T']==20.0)].iloc[0]
+        
        sel = fit[(fit['f']==10.0) & (fit['T']==20.0)].iloc[0]
        Emin = (1-m['max_diff'])*m['stiffness_10Hz']
        Emax = (1+m['max_diff'])*m['stiffness_10Hz']
--- a/tests/data/citt/PTM_Dortmund/meta.toml
+++ b/tests/data/citt/PTM_Dortmund/meta.toml
@@ -2,3 +2,13 @@
 min_r2 = 0.993
 max_diff = 0.005 #%
 stiffness_10Hz = 2269.0 #MPa
 ["sample_02.xlsm"]
 min_r2 = 0.993
 max_diff = 0.005 #%
 stiffness_10Hz = 2250.0 #MPa
 ["sample_03.xlsm"]
 min_r2 = 0.993
 max_diff = 0.005 #%
 stiffness_10Hz = 2231.0 #MPa
--- a/tests/data/citt/PTM_Dortmund/sample_02.xlsm
+++ b/tests/data/citt/PTM_Dortmund/sample_02.xlsm
--- a/tests/data/citt/PTM_Dortmund/sample_03.xlsm
+++ b/tests/data/citt/PTM_Dortmund/sample_03.xlsm