问题
A newbie, I am trying to increase familiarity with OOP, building a class structure to allow for a more organized way of coding. I have several files in my structure, formulas.py, df.py and main.py, and a test.py file. Basically, I am therefore targeting elimination of test.py and df.py. Also, in class Data_Load, I have get_EDA_shape, get_EDA_describe and get_EDA_info. These are also redundant, and should be replaced by the functionality presented in def common_stats().
Now I want to integrate with/make a part of the formulas.py class Data_Load:
- def common_stats() in the class Data_Load hierarchy
formulas.py (I know that it shows the same twice which is what I am trying to reduce):
import pandas as pd
from df import df
class Data_load:
def __init__(self, df):
self.df = pd.read_csv(df, delimiter=';')
def get_EDA_info(self):
return self.df.info()
def get_EDA_describe(self):
return self.df.describe()
def get_EDA_shape(self):
return self.df.shape
def common_stats(df, name): # How do I make this def part of the Data_Load class?
df.name = name
print('#{}\n'.format(df.name))
for col in df.columns:
# Stats
print('\nStats')
print('#{}\n'.format(col))
print('\nvalue_counts')
print(df[col].value_counts(dropna=False))
print('\n')
print(df.columns)
print('\nsort_values')
print(df[col].sort_values(ascending=True))
print('\nisnull')
print(df[col].isnull())
print('\nisna')
print(df[col].isna())
print('\ndescribe')
print(df[col].describe())
print('\ndtypes')
print(df[col].dtypes)
print('\nshape')
print(df[col].shape)
print('\nnunique')
print(df[col].nunique())
print('\nunique')
print(df[col].unique())
print('\nisnull().sum()')
print(df.isnull().sum())
def get_details(self):
return self.common_stats()
if __name__=='__main__':
common_stats(df, name='Analysis')
df.py:
import pandas as pd
df_ = r"C:\\...\\Python data\\test_line_ADFS.csv"
df = pd.read_csv(df_, sep=';',
usecols=['col1', 'col2', 'coln'])
...and main.py
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=DeprecationWarning)
warnings.simplefilter(action='ignore', category=RuntimeWarning)
warnings.simplefilter(action='ignore', category=UserWarning)
# from formulas import file_load
import pandas as pd
from formulas import Data_load
from formulas import common_stats, get_details
from df import df_, df
myData = Data_load(df_)
EDA_stats_1 = myData.get_EDA_shape()
EDA_stats_2 = common_stats(df, name='Analysis')
print("\nEDA_stats_1")
print(EDA_stats_1)
print("\nEDA_stats_2")
print(EDA_stats_2)
test.py - Also, how do I make this def part of the Data_Load class in formulas.py?
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=DeprecationWarning)
warnings.simplefilter(action='ignore', category=RuntimeWarning)
warnings.simplefilter(action='ignore', category=UserWarning)
from df import df
import pandas as pd
import numpy as np # for declaring an array
count = df['count'].astype(int)
def mean(count):
n = len(count)
mean = sum(count) / n
return mean
def variance(count):
n = len(count)
mean = sum(count) / n
deviations = [(x - mean) ** 2 for x in count]
variance = sum(deviations) / n
return variance
def stdev(count):
import math
var = variance(count)
std_dev = math.sqrt(var)
return std_dev
print("Variance of the sample is % s " % (variance(count)))
print("Standard Deviation of the sample is % s " % (stdev(count)))
print("Mean of the sample is % s " % (mean(count)))
Sample data (unique date=date of unique logon). Some students have more observations over time => several logons:
unique date User Name Start to_date
1 2020-01-06 00:22:00 192421 E19 2020-08-31
191 2020-05-21 08:41:00 193203 E19 2020-12-09
146 2020-05-20 17:51:00 193537 E20 2020-12-08
321 2020-05-22 20:19:00 193203 E19 2020-12-09
339 2020-05-23 09:02:00 193203 E19 2020-12-09
... ... ... ... ...
927 2020-05-29 21:19:00 165156 E20 2020-06-08
1051 2020-05-31 20:48:00 165156 E20 2020-06-08
88 2020-05-20 06:15:00 184806 E19 2020-05-26
239 2020-05-21 18:59:00 195350 E19 2020-05-25
547 2020-05-25 19:39:00 200580 F20 2020-05-26
来源:https://stackoverflow.com/questions/65538649/how-do-i-integrate-def-common-stats-into-class-data-load