Source code for mFlow.Blocks.imputer


import sys, os
from mFlow.Workflow.compute_graph import node

from sklearn.impute import SimpleImputer
import pandas as pd
import numpy as np



[docs]def Imputer(*args, **kwargs): return node(function = __Imputer, args=args, kwargs=kwargs, name="Imputer")
def __Imputer(df, method="mean", show=False): model = SimpleImputer(missing_values=np.nan, strategy=method) df = df["dataframe"] features = list(set(df.columns) - {'target'}) numeric = df[features].values h,w = numeric.shape if(show): print(" Imputer: Running on matrix of size %dx%d"%(h,w)) if np.any(np.isnan(numeric)): model.fit(numeric) imp = model.transform(numeric) df1 = pd.DataFrame(data=imp, columns=features, index=df.index) if 'target' in df.columns: df1['target'] = df['target'] return({"dataframe":df1}) else: if(show): print(" Imputer: No missing values") return({"dataframe":df})