I want to apply scaling (using StandardScaler() from sklearn.preprocessing) to a pandas dataframe. The following code returns a numpy array, so I lose all the column names a
from neuraxle.pipeline import Pipeline
from neuraxle.base import NonFittableMixin, BaseStep
class PandasToNumpy(NonFittableMixin, BaseStep):
def transform(self, data_inputs, expected_outputs):
return data_inputs.values
pipeline = Pipeline([
PandasToNumpy(),
StandardScaler(),
])
Then, you proceed as you intended:
features = df[["col1", "col2", "col3", "col4"]] # ... your df data
pipeline, scaled_features = pipeline.fit_transform(features)
You could even do this with a wrapper as such:
from neuraxle.pipeline import Pipeline
from neuraxle.base import MetaStepMixin, BaseStep
class PandasValuesChangerOf(MetaStepMixin, BaseStep):
def transform(self, data_inputs, expected_outputs):
new_data_inputs = self.wrapped.transform(data_inputs.values)
new_data_inputs = self._merge(data_inputs, new_data_inputs)
return new_data_inputs
def fit_transform(self, data_inputs, expected_outputs):
self.wrapped, new_data_inputs = self.wrapped.fit_transform(data_inputs.values)
new_data_inputs = self._merge(data_inputs, new_data_inputs)
return self, new_data_inputs
def _merge(self, data_inputs, new_data_inputs):
new_data_inputs = pd.DataFrame(
new_data_inputs,
index=data_inputs.index,
columns=data_inputs.columns
)
return new_data_inputs
df_scaler = PandasValuesChangerOf(StandardScaler())
Then, you proceed as you intended:
features = df[["col1", "col2", "col3", "col4"]] # ... your df data
df_scaler, scaled_features = df_scaler.fit_transform(features)