# coding: utf-8 import io import pandas as pd from paveit.helper import calc_hash_of_bytes, get_minio_client_processing from worker import app, logger class DataSineLoad(): """ Base class for lab tests with sine load """ def __init__(self, filename:str , metadata: dict): self.filename = filename self.metadata = metadata self._logger = logger self._logger.info(f'filename s3: {self.filename}, metadata: {self.metadata}') def _connect_to_s3(self): self._logger.info('connect to db') self.__minioClient = get_minio_client_processing() def _read_from_s3_to_bytesio(self): self._logger.info('read bytes') try: self._connect_to_s3() response = self.__minioClient.get_object('processing', self.filename) self.data = response.data finally: response.close() response.release_conn() self.data = io.BytesIO(self.data) def _calc_hash_of_bytesio(self): self.filehash = calc_hash_of_bytes(self.data) self.data.seek(0) self._logger.debug(f'Hash of file: {self.filehash}') def _bytes_to_df(self): self._logger.debug('convert bytes to pandas.DataFrame') encoding='utf-8' self.df = pd.read_csv(self.data, encoding=encoding) def _calc(self): self._logger.debug('calc data') return self.df.mean().mean() def _archive_binary_data(self): self._logger.debug('send file to archive') app.send_task('ArchiveFile', args=[self.filename, self.metadata, self.filehash, 'org', 'citt' ], queue='archive' ) def run(self): self._logger.info('run task') self._read_from_s3_to_bytesio() self._calc_hash_of_bytesio() self._bytes_to_df() res = self._calc() self._logger.debug(f'results: {res}') self._archive_binary_data() return res