# RepTate: Rheology of Entangled Polymers: Toolkit for the Analysis of Theory and Experiments
# --------------------------------------------------------------------------------------------------------
#
# Authors:
# Jorge Ramirez, jorge.ramirez@upm.es
# Victor Boudara, victor.boudara@gmail.com
#
# Useful links:
# http://blogs.upm.es/compsoftmatter/software/reptate/
# https://github.com/jorge-ramirez-upm/RepTate
# http://reptate.readthedocs.io
#
# --------------------------------------------------------------------------------------------------------
#
# Copyright (2017-2023): Jorge Ramirez, Victor Boudara, Universidad Politécnica de Madrid, University of Leeds
#
# This file is part of RepTate.
#
# RepTate is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# RepTate is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with RepTate. If not, see <http://www.gnu.org/licenses/>.
#
# --------------------------------------------------------------------------------------------------------
"""Module FileType
Module for the basic definition of file types.
"""
import os
import numpy as np
# import logging
from openpyxl import load_workbook
from RepTate.core.File import File
[docs]
class TXTColumnFile(object):
"""Basic class for text-column based data files
Columns should be separated by espaces or tabs
BASIC Structure of FILE::
LINE CONTENTS
0 Param1=434;Param2=4355;
1 # Header line and/or comments [OPTIONAL, ANY NUMBER OF HEADER LINES IS POSSIBLE]
2 col1 col2 col3 [NAMES OF COLUMNS, OPTIONAL]
3 4343 434.5 535e-434 [DATA, ONLY NUMBERS ALLOWED]
The following examples can be declared with the line::
ftype=TXTColumnFile("LVE files", "tts", "LVE files", ['w','G\'','G\'\''], ['Mw','T'], ['rad/s','Pa','Pa'])
EXAMPLE 1: columns line, no header lines::
C1=8.77210163229153;C2=114.03;Rho0=0.928;C3=0.61;T=-35;CTg=14.65;dx12=0;isof=true;Mw=634.5;chem=PI;PDI=1.03;
w G' G'' T g
4.29882628773180E-0008 1.44001856995549E+0002 3.70207627600662E+0003 -3.30000000000000E-0003 0.00000000000000E+0000
6.30767835406968E-0008 2.56947504513849E+0002 5.39032089470917E+0003 3.14760000000000E-0004 0.00000000000000E+0000
9.25946098215800E-0008 4.87031807130633E+0002 7.86538338583378E+0003 -1.01000000000000E-0002 0.00000000000000E+0000
EXAMPLE 2: Neither columns line nor header lines::
C1=8.77210163229153;C2=114.03;Rho0=0.928;C3=0.61;T=-35;CTg=14.65;dx12=0;isof=true;Mw=23.4;chem=PI;PDI=1.03;
2.42782390212358E-0003 2.11182193155015E+0001 1.72559181398615E+0003 1.25000000000000E-0003 0.00000000000000E+0000
3.56351666244471E-0003 4.30476548641552E+0001 2.53490824331357E+0003 1.48400000000000E-0002 0.00000000000000E+0000
EXAMPLE 3: 2 Header lines, no column line::
T=160;chem=PE;
# Header 1
# Header 2
4.23333e-05 1.05E+00 2.96E+01
6.7e-05 2.02E+00 3.97E+01
EXAMPLE 4: 2 Header lines + column line::
T=160;chem=PE;
# Header 1
# Header 2
w G' G''
4.23333e-05 1.05E+00 2.96E+01
6.7e-05 2.02E+00 3.97E+01
"""
def __init__(
self,
name="TXTColumn",
extension="txt",
description="Generic text file with columns",
col_names=[],
basic_file_parameters=[],
col_units=[],
):
"""
**Constructor**
Keyword Arguments:
- name {str} -- Name of file type
- extension {str} -- File extension
- description {str} -- Description of file contents
- col_names {list of str}: list with names of columns to read
- basic_file_parameters {list of str}: list with file parameters that should always be included in the header line
- col_units {list of str}: Default units of columns
"""
self.name = name
self.extension = extension
self.description = description
self.col_names_line = 0
self.first_data_line = 0
self.col_names = col_names
self.col_index = list(range(len(self.col_names)))
self.basic_file_parameters = (
basic_file_parameters # Those that will show by default in the dataset
)
self.col_units = col_units
# self.logger = logging.getLogger('ReptateLogger')
[docs]
def is_number(self, s):
"""Checks if the input string contains a number"""
try:
float(s)
return True
except ValueError:
return False
[docs]
def get_parameters(self, line, file):
"""Get the file parameters"""
line = line.replace(" ", "") # remove all spaces
items = line.split(";")
file.file_parameters = {}
for i in range(len(items)):
par = items[i].split("=")
if len(par) > 1:
if self.is_number(par[1]):
file.file_parameters[par[0]] = float(par[1])
else:
file.file_parameters[par[0]] = par[1]
[docs]
def find_col_names_and_first_data_lines(self, lines, file):
"""Find column names and first row with data"""
colnameline = 0
firstdata = 0
for i in range(1, len(lines)):
if all(x in lines[i] for x in self.col_names):
# Column names line found
colnameline = i
elif all(self.is_number(x) for x in lines[i].split()):
# Data lines have been found
firstdata = i
break
else:
# Otherwise, this must be a header line
file.header_lines.append(lines[i])
return colnameline, firstdata
[docs]
def read_file(self, filename, parent_dataset, axarr):
"""Gets all the data from the file"""
if not os.path.isfile(filename):
print('File "%s" does not exists' % f)
return
file = File(filename, self, parent_dataset, axarr)
f = open(filename, "r", encoding="latin-1")
lines = f.readlines()
self.get_parameters(lines[0], file)
(
self.col_names_line,
self.first_data_line,
) = self.find_col_names_and_first_data_lines(lines, file)
self.col_index = []
if self.col_names_line > 0:
items = lines[self.col_names_line].split()
for col in self.col_names:
for j in range(len(items)):
if col == items[j]:
self.col_index.append(int(j))
break
else:
self.col_index = list(range(len(self.col_names)))
file.data_table.num_columns = len(self.col_index)
rawdata = []
for i in range(self.first_data_line, len(lines)):
items = lines[i].split()
if len(items) > 0:
for j in self.col_index:
try:
rawdata.append(float(items[j]))
except (IndexError, ValueError):
rawdata.append(float("nan"))
file.data_table.num_rows = int(len(rawdata) / file.data_table.num_columns)
file.data_table.data = np.reshape(
rawdata, newshape=(file.data_table.num_rows, file.data_table.num_columns)
)
file.data_table.data = file.data_table.data[
file.data_table.data[:, 0].argsort()
]
return file
[docs]
class ExcelFile(object):
"""Parse and read contents from Excel file"""
def __init__(
self,
name="Excel File",
extension="xlsx",
description="Generic Excel file",
col_names=[],
basic_file_parameters=[],
col_units=[],
):
self.name = name
self.extension = extension
self.description = description
self.col_names = col_names
self.col_index = list(range(len(self.col_names)))
self.basic_file_parameters = (
basic_file_parameters # Those that will show by default in the dataset
)
self.col_units = col_units
# self.logger = logging.getLogger('ReptateLogger')
[docs]
def read_file(self, filename, parent_dataset, axarr):
"""Read Excel File"""
if not os.path.isfile(filename):
print('File "%s" does not exists' % f)
return
file = File(filename, self, parent_dataset, axarr)
wb = load_workbook(filename)
for i, k in enumerate(wb.sheetnames):
print("%d: %s" % (i, k))
opt = int(
input(
"Select the Sheet that contains the data (number between 0 and %d) > "
% (len(wb.sheetnames) - 1)
)
)
if opt < 0 or opt >= len(wb.sheetnames):
print("Invalid option!")
ws = wb[wb.sheetnames[opt]]
cexcelnames = ["A", "B", "C", "D", "E", "F"]
for i in range(ws.max_column):
print("%10s" % cexcelnames[i], end=" ")
print("")
for i, row in enumerate(ws.rows):
for j, cell in enumerate(row):
a = cell.value
if type(a) is float:
print("%10.5g" % a, end=" ")
elif type(a) is str:
print("%10s" % a, end=" ")
elif type(a) is int:
print("%10d" % a, end=" ")
if j > 10:
break
print("")
if i > 4:
break
file.data_table.num_rows = ws.max_row - 2
file.data_table.num_columns = len(self.col_names)
file.data_table.data = np.zeros(
(file.data_table.num_rows, file.data_table.num_columns)
)
for j, n in enumerate(self.col_names):
opt = ""
while opt not in cexcelnames:
opt = input("Column that contains the data for %s > " % n)
for i in range(3, ws.max_row + 1):
cell_name = "{}{}".format(opt, i)
file.data_table.data[i - 3, j] = ws[cell_name].value
return file