Czy w miarę robię zgodnie ze sztuką?
Klasa ExcelTable
zawiera pary klucz-wartość (duplikaty klucza akceptowalne), gdzie kluczem są wiersze tabelki z excela a wartością kolumny innej tabelka też z excela. Klasa obsługuje głównie wyciąganie kolumny z tabeli wartości o nr odpowiadającym nr zadanego wiersza z tabeli kluczy. Pozostałe klasy są pomocnicze.
class Request:
def __init__(self, variable, code, period, currency):
self.variable = variable
self.code = code
self.period = period
self.currency = currency
def __str__(self):
return str(self.variable) + " " + str(self.code) + " " + str(self.period) + " " + str(self.currency)
class Result:
def __init__(self):
self.key = None
self.values = None
self.is_successfull = False
def __str__(self):
return str(self.is_successfull) + str(self.key) + "\n" + str(self.values)
.
class FindFun:
__BASIC__ = lambda df, req: ((df.VariableName == req.variable)
& (df.Code== req.code)
& (df.Period== req.period)).tolist()
__GROUP__ = lambda df, req: ((df.VariableName == req.variable)
& (df.Code.map(lambda x: x[:4]) == req.code[:4])
& (df.Period== req.period)).tolist()
.
class ExcelTable:
def __init__(self, key, values):
"""
i-row of `key` frame corresponds i-col of `values` frame
"""
self.key = key
self.values = self._init_values(values)
def _init_values(self, v):
"""
The name of the values' columns should be equal to key.VariableName if exists
"""
v.columns = self.key.VariableName if "VariableName" in self.key.columns else pd.Index([])
return v
@staticmethod
def create_empty():
key = pd.DataFrame()
values = pd.DataFrame(index=pd.Index(np.arange(START_YEAR, END_YEAR + 1)))
values.index.names = ['Year']
et = ExcelTable(key, values)
return et
def extrapolate(self):
"""
Linear extrapolation of the values' last rows if they are NaN.
"""
self.values = self.values.fillna(method='ffill')
return self
def get_key_by_idx(self, idx):
return self.key.iloc[idx, :]
def get_key_by_idx_first(self, idx):
return self.get_values_by_idx(idx.index(True))
def get_values_by_idx(self, idx):
return self.values.iloc[:, idx]
def get_values_by_idx_first(self, idx):
return self.get_values_by_idx(idx.index(True))
def find(self, request, find_fun = FindFun.__BASIC__):
"""
Gets the only occurence for the found idx.
Accepts only 1 found idx.
Args:
request: (Request)
find_fun: (function) optional
Returns:
(Result)
"""
found_idx = find_fun(self.key, request)
if sum(found_idx) > 1:
raise DuplicatedValues(
"Found more then 1 value corresponding to " +
request.variable + " " + request.code + " " + request.period +
"Check for duplicated in the input file."
)
result = Result()
if sum(found_idx) == 1:
result.is_successfull = True
result.key = get_key_by_idx(found_idx)
result.values = get_values_by_idx(found_idx)
return result
def find_first(self, request, find_fun = FindFun.__GROUP__):
"""
Gets the first occurence for the found idx.
Accepts more than 1 found idxes.
Args:
request: (Request)
find_fun: (function) optional
Returns:
(Result)
"""
found_idx = find_fun(self.key, request)
result = Result()
if sum(found_idx) >= 1:
result.is_successfull = True
result.key = get_key_by_idx_first(found_idx)
result.values = get_values_by_idx_first(found_idx)
return result
def put(key, values):
"""
Duplicates are acceptable.
"""
self.key = pd.concat([self.key, key])
values.name = key.VariableName
self.values = pd.concat([self.values, values])
return self
def to_excel(self, excel_out, sheet_name):
self.key.T.iloc[0:14].to_excel(excel_out, sheet_name=sheet_name, header=False)
self.values.to_excel(excel_out, sheet_name=sheet_name, startrow=15)