CITT Dortmund läuft, Tests hinzugefügt

2023-02-28 16:11:55 +01:00
parent e861dbf10e
commit e5c9f6904c
7 changed files with 136 additions and 124 deletions
--- a/src/paveit/labtest/base.py
+++ b/src/paveit/labtest/base.py
@@ -18,19 +18,22 @@ class DataSineLoad():
    def __init__(self,
                 filename: str,
                 metadata: dict,
-                 archive: bool = True,
+                 logger=None,
                 debug: bool = False,
                 data: None | io.BytesIO = None):
+
        self.filename = filename
        self.metadata = metadata

        if isinstance(data, io.BytesIO):
            self.data = data

-        self.archive_data = archive
        self.debug = debug

-        self._logger = logging.getLogger(__name__)
+        if logger == None:
+            self._logger = logging.getLogger(__name__)
+        else:
+            self._logger = logger

        self._logger.info(
            f'filename s3: {self.filename}, metadata: {self.metadata}')
@@ -38,16 +41,23 @@ class DataSineLoad():
        self._pre_run()

    def _set_parameter(self):
+        self._logger.debug('run _set_parameter')

        self.split_data_based_on_parameter = ['T', 'sigma', 'f']

        self.col_as_int = ['N']
-        self.col_as_float = ['T', 'F', 's_piston', 's_hor_1', 'f', 's_hor_1', 's_hor_2']
+        self.col_as_float = [
+            'T', 'F', 's_piston', 's_hor_1', 'f', 's_hor_1', 's_hor_2'
+        ]
+
+        self.val_col_names = [
+            'time', 'T', 'f', 'sigma', 'N', 'F', 's_hor_1', 's_hor_2'
+        ]
+
+        self.columns_analyse = [
+            'F', 's_hor_sum', 's_hor_1', 's_hor_2', 's_piston'
+        ]

-        self.val_col_names = ['time', 'T', 'f', 'sigma', 'N', 'F', 's_hor_1', 's_hor_2']
-        
-        self.columns_analyse = ['F','s_hor_sum','s_hor_1','s_hor_2','s_piston']
-        
        # Header names after standardization; check if exists
        self.val_header_names = ['speciment_height', 'speciment_diameter']

@@ -56,7 +66,7 @@ class DataSineLoad():
        self.meta_names_of_parameter = {
            'sigma': ['Max. Spannung']
        }  #list of names
-        
+
        self.data_column_names = {
            'time': ['Time Series'],
            'F': ['Load Series'],
@@ -75,12 +85,12 @@ class DataSineLoad():
        self.unit_t = 1 / 1000.  #s

    def _connect_to_s3(self):
-        self._logger.info('connect to db')
+        self._logger.debug('run _connect to db')

        self.__minioClient = get_minio_client_processing()

    def _read_from_s3_to_bytesio(self):
-        self._logger.info('read bytes')
+        self._logger.debug('run _read bytes')

        try:
            self._connect_to_s3()
@@ -92,55 +102,57 @@ class DataSineLoad():
            response.release_conn()

        self.data = io.BytesIO(self.data)
+        self._logger.debug('data read from s3')

    def _calc_hash_of_bytesio(self):
+        self._logger.debug('run _calc_hash_of_bytesio')

        self.filehash = calc_hash_of_bytes(self.data)
        self.data.seek(0)
        self._logger.debug(f'Hash of file: {self.filehash}')

    def _process_data(self):
-        
        """ convert self.data (BytesIO) to pandas.DataFrame, update
        self.metadata with informations from file """
-        
+
        self._logger.debug('convert bytes to pandas.DataFrame')

        encoding = 'utf-8'
        self.data = pd.read_csv(self.data, encoding=encoding)

    def _standardize_data(self):
-        
+        self._logger.debug('run _standardize_data')
+
        colnames = list(self.data.columns)
-        
+
        for par, names in self.data_column_names.items():
            for name in names:
                colnames = [sub.replace(name, par) for sub in colnames]

        self.data.columns = colnames
-        
-        print(self.data.head(5))
-        

    def _standardize_meta(self):
-        
+        self._logger.debug('run _standardize_meta')
+
        for par, names in self.meta_names_of_parameter.items():
            for name in names:
                if name in self.metadata:
-                    
+
                    self.metadata[par] = self.metadata[name]
                    self.metadata.pop(name)
-                    
+
                    break
-                    
+
    def _validate_data(self):
+        self._logger.debug('run _validate_data')

        for name in self.val_col_names:
            if not name in self.data.columns:
                raise
-    

    def _validate_meta(self):
+        self._logger.debug('run _validate_meta')
+
        for name in self.val_header_names:
            if not name in self.metadata:
                raise
@@ -160,14 +172,12 @@ class DataSineLoad():
        return True

    def _post_select_importent_columns(self):
-        
+
        # TODO: add more columns, check datamodel
-        
+
        self.data = self.data[self.val_col_names]
-        

    def _post_calc_missiong_values(self):
-        

        cols = self.data.columns

@@ -191,7 +201,8 @@ class DataSineLoad():
        return True

    def _fit_split_data(self):
-        
+        self._logger.debug('run _fit_split_data')
+
        data_gp = self.data.groupby(self.split_data_based_on_parameter)

        data_list = []
@@ -229,6 +240,9 @@ class DataSineLoad():
            self.data = data_list
            #break

+        nchunks = len(self.data)
+        self._logger.debug(f'data splited in {nchunks} chunks')
+
    def _fit_select_data(self):
        """ 
        select N load cycles from original data 
@@ -236,9 +250,11 @@ class DataSineLoad():
        (b) last N cycles
        
        """
-        
+
+        self._logger.debug('run _fit_select_data')
+
        def sel_df(df, num=5):
-            
+
            N = df['N'].unique()
            freq = float(df['f'].unique()[0])

@@ -264,21 +280,20 @@ class DataSineLoad():
            else:
                Nfrom = None
                Nto = None
-            
-                
+
            # Fall 1: nicht alle LW in Datei
            if (max(N) < Nto) & (len(N) >= num):
                df_sel = df[(df['N'] >= N[-num]) & (df['N'] <= N[-1])]
-            
+
            # Fall 2:
            else:

                if Nfrom != None:
                    if len(N) > Nto - Nfrom:
                        df_sel = df[(df['N'] >= Nfrom) & (df['N'] <= Nto)]
-                        
+
            return df_sel
-            
+
        if not isinstance(self.data, list):
            if self.number_of_load_cycles_for_analysis > 1:
                df_sel = [
@@ -292,7 +307,8 @@ class DataSineLoad():
            df_sel = []
            for d in self.data:
                if self.number_of_load_cycles_for_analysis > 1:
-                    d_sel = sel_df(d,num=self.number_of_load_cycles_for_analysis)
+                    d_sel = sel_df(d,
+                                   num=self.number_of_load_cycles_for_analysis)
                else:
                    d_sel = d

@@ -300,38 +316,35 @@ class DataSineLoad():

        # replace data
        self.data = df_sel
-        
+
    def _calc(self):

-        print(len(self.data))
-        
        self.fit = []
        for idx_data, data in enumerate(self.data):
-            
+
            if data is None: continue
            if len(data) < 10: continue
-            

            data.index = data.index - data.index[0]
-            
+
            res_temp = {}

            x = data.index.values
-            
-            freq = np.round(float(data['f'].unique()),2)
+
+            freq = np.round(float(data['f'].unique()), 2)
            sigma = float(data['sigma'].unique())
            temperature = float(data['T'].unique())
-            
+
            for idxcol, col in enumerate(self.columns_analyse):

                if not col in data.columns: continue
                y = data[col].values
-                
-                res = fit_cos(x,y, freq=freq)
-                
+
+                res = fit_cos(x, y, freq=freq)
+
                for key, value in res.items():
                    res_temp[f'fit_{col}_{key}'] = value
-                
+
                res_temp[f'fit_{col}_max'] = max(y)
                res_temp[f'fit_{col}_min'] = min(y)

@@ -343,9 +356,9 @@ class DataSineLoad():
            deltaF = res_temp['fit_F_amp']
            nu = calc_nu(temperature)
            res_temp['nu'] = nu
-            
+
            h = float(self.metadata['speciment_height'])
-            
+
            deltaU = res_temp['fit_s_hor_sum_amp']

            res_temp['E'] = (deltaF * (0.274 + nu)) / (h * deltaU)
@@ -353,19 +366,12 @@ class DataSineLoad():
            self.fit.append(res_temp)

        self.fit = pd.DataFrame.from_records(self.fit)
-        
-        self.fit = self.fit.set_index(['T', 'f', 'sigma'])
-        
-        print(self.fit)
-        
-        
-    def _archive_binary_data(self):

-        self._logger.debug('send file to archive')
-        app.send_task(
-            'ArchiveFile',
-            args=[self.filename, self.metadata, self.filehash, 'org', 'citt'],
-            queue='archive')
+        self.fit = self.fit.set_index(['T', 'f', 'sigma'])
+
+        nsamples = len(self.fit)
+        self._logger.info(f'fitting finished, add {nsamples} samples')
+        self._logger.debug(self.fit['E'])

    def _pre_run(self):

@@ -396,9 +402,4 @@ class DataSineLoad():
        self._fit_select_data()

        self._calc()
-        #self._logger.debug(f'results: {res}')
-
-        #if self.archive_data:
-        #    self._archive_binary_data()
-
-        #return res
+        #self._logger.info(f'results: {self.fit['E']}')
--- a/src/paveit/labtest/citt.py
+++ b/src/paveit/labtest/citt.py
@@ -5,19 +5,21 @@ from csv import reader
 import numpy as np
 import pandas as pd
 from paveit.labtest import DataSineLoad
-from torch import isin


 class CITTBase(DataSineLoad):
+
    def _calc(self):
        return (self.df.mean().mean(), self.df.max().max())

+
 class CITT_KIT(DataSineLoad):
+
    def _calc(self):
        return (self.df.mean().mean(), self.df.max().max())

    def _process_data(self):
-        logger.debug('convert bytes to pandas.DataFrame')
+        self._logger.debug('convert bytes to pandas.DataFrame')

        self.data.seek(0)
        with io.TextIOWrapper(self.data, encoding='latin-1') as read_obj:
@@ -82,7 +84,6 @@ class CITT_KIT(DataSineLoad):
                idx = t[(t['ZEIT'] >= tmin) & (t['ZEIT'] < tmax)].index
                N[idx] = i

-
            t['N'] = N

            res.append(t)
@@ -96,8 +97,9 @@ class CITT_KIT(DataSineLoad):
        #define in class
        self.data = res.reset_index()

+
 class CITT_PTMDortmund(DataSineLoad):
-    
+
    def _define_units(self):

        self.unit_s = 1  #mm
@@ -106,15 +108,16 @@ class CITT_PTMDortmund(DataSineLoad):

    def update_parameter(self):

-        self.meta_names_of_parameter = {'sigma': ['Max. Spannung', 'Max Stress'],
-                                                                       'f': ['Frequenz', 'Frequency'],
-                                                                       'T': ['Versuchstemperatur', 'Target Test Temperature'], 
-                                                                       'Nfrom': ['Erster Aufzeichnungslastwechsel', 'Start Cycle'],
-                                                                       'Nto': ['Letzer Aufzeichnungslastwechsel', 'Last Cycle'],
-                                                                       't': ['Zeitfolgen', 'Time Series'],
-                                                                       'speciment_diameter': ['Durchmesser (mm)', 'Diameter (mm)'],
-                                                                       'speciment_height': ['Länge (mm)', 'Length (mm)'],
-                                                                    } #list of names
+        self.meta_names_of_parameter = {
+            'sigma': ['Max. Spannung', 'Max Stress'],
+            'f': ['Frequenz', 'Frequency'],
+            'T': ['Versuchstemperatur', 'Target Test Temperature'],
+            'Nfrom': ['Erster Aufzeichnungslastwechsel', 'Start Cycle'],
+            'Nto': ['Letzer Aufzeichnungslastwechsel', 'Last Cycle'],
+            't': ['Zeitfolgen', 'Time Series'],
+            'speciment_diameter': ['Durchmesser (mm)', 'Diameter (mm)'],
+            'speciment_height': ['Länge (mm)', 'Length (mm)'],
+        }  #list of names

        self.data_column_names = {
            'time': ['Time Series'],
@@ -132,7 +135,6 @@ class CITT_PTMDortmund(DataSineLoad):
        diameter = []
        height = []

-
        for sheetid in range(num_sheets):
            temp = pd.read_excel(self.data, sheetid, skiprows=97)
            temp = temp.drop(index=0)
@@ -141,24 +143,23 @@ class CITT_PTMDortmund(DataSineLoad):
            for col in temp.columns:
                temp[col] = pd.to_numeric(temp[col])

-
            #read metadata from file

-            meta = pd.read_excel(self.data, sheetid,
-                                      skiprows=1,
-                                      nrows=80)
+            meta = pd.read_excel(self.data, sheetid, skiprows=1, nrows=80)

            meta = meta[meta.columns[[0, 2]]]
-            meta = meta.set_index(
-                meta.columns[0])
-            
+            meta = meta.set_index(meta.columns[0])
+
            meta = meta.dropna(axis=0)
            meta = meta[meta.columns[0]]
-            
+
            meta = meta.to_dict()
-            
+
            #remove whitespace in dict keys:
-            meta = {x.strip(): v for x, v in meta.items() if isinstance(x, str)}
+            meta = {
+                x.strip(): v
+                for x, v in meta.items() if isinstance(x, str)
+            }

            frequency_test = None
            # add metadata to dataframe
@@ -167,21 +168,21 @@ class CITT_PTMDortmund(DataSineLoad):
                v = None
                for name in names:
                    try:
-                        v = np.round(float(meta[name]),5)
-                        
+                        v = np.round(float(meta[name]), 5)
+
                        if par == 'f':
-                            v = np.round(v,2)
-                        
+                            v = np.round(v, 2)
+
                        break
                    except:
                        pass
-                
+
                assert v is not None
                temp[par] = v
-                
+
                if par == 'f':
                    frequency_test = v
-                
+
            # read additional parameters
            names = self.meta_names_of_parameter['Nfrom']
            for name in names:
@@ -191,7 +192,7 @@ class CITT_PTMDortmund(DataSineLoad):
                except:
                    Nfrom = None
            assert Nfrom is not None
-            
+
            names = self.meta_names_of_parameter['Nto']
            for name in names:
                try:
@@ -209,32 +210,29 @@ class CITT_PTMDortmund(DataSineLoad):
                    break
                except:
                    time_idx = None
-            assert time_idx is not None    
-                    
-            temp['N'] = 0
+            assert time_idx is not None

-            self._logger.info(f'cycles from {Nfrom} to {Nto}')
+            temp['N'] = 0

            #BUG: Ist in Messdatei falsch definiert und wird von PTM angepasst.            '''
            #for cycle in range(Nfrom, Nto+1):
-            
-            dt = 1.0/frequency_test
-            
+
+            dt = 1.0 / frequency_test
+
            tmax = dt
            max_timeindex = max(time_idx)
-            
+
            cycle = 0
            while tmax < max_timeindex:
                # time window
-                tmin = (cycle) * dt 
+                tmin = (cycle) * dt
                tmax = (cycle + 1) * dt
                #filter data
-                idx = temp[(time_idx >= tmin)
-                           & (time_idx < tmax)].index
-                                
+                idx = temp[(time_idx >= tmin) & (time_idx < tmax)].index
+
                #set cycle number
                temp.loc[idx, 'N'] = cycle
-                
+
                cycle += 1

            # add diameter and height to list
@@ -247,7 +245,7 @@ class CITT_PTMDortmund(DataSineLoad):
                    v = None
            assert v is not None
            diameter.append(v)
-            
+
            names = self.meta_names_of_parameter['speciment_height']
            for name in names:
                try:
@@ -257,7 +255,7 @@ class CITT_PTMDortmund(DataSineLoad):
                    v = None
            assert v is not None
            height.append(v)
-            
+
            #append data to final dataframe
            res.append(temp)

@@ -270,13 +268,11 @@ class CITT_PTMDortmund(DataSineLoad):
        #    self.metadata['speciment_diameter'] = np.mean(diameter)
        #if not 'speciment_height' in self.metadata:
        #    self.metadata['speciment_height'] = np.mean(height)
-            
-        

        #define in class
        self.data = res.reset_index()
        self.metadata.update(meta)

        # log infos
-        self._logger.debug(self.metadata)
-        self._logger.debug(self.data.head())
+        self._logger.info(self.metadata)
+        self._logger.info(self.data.head())