from GML import FeatureEngineering
fe = FeatureEngineering(train,'Selling_Price', fill_missing_data=True,method_cat='Mode',cat_cols=3,
numeric_cols=11,
method_num='Mean', encode_data=True,
normalize=True, remove_outliers=True,
new_features=True, feateng_steps=2 ) # feateng_steps = 0 for features selection without feature creation
X_new, y, test = fe.get_new_data()
==============================
Handling Missing Data
There is missing data
'int' object is not iterable
==============================
Encoding Data
Success
Data Encoded
==============================
Transforming Data
Data Transformed
==============================
Handling Outliers
Before outlier removal
interquartile range: 0.919066837284749
upper_inner_fence: 10.106504611810337
lower_inner_fence: 6.430237262671341
upper_outer_fence: 11.48510486773746
lower_outer_fence: 5.051637006744217
percentage of records out of inner fences: 5.62
percentage of records out of outer fences: 0.22
length of input dataframe: 6313
length of new dataframe after outlier removal: 5958
After outlier removal
==============================
Creating New Features with Features Selection
ValueError Traceback (most recent call last)
in
1 from GML import FeatureEngineering
2
----> 3 fe = FeatureEngineering(train,'Selling_Price', fill_missing_data=True,method_cat='Mode',cat_cols=3,
4 numeric_cols=11,
5 method_num='Mean', encode_data=True,
~\Anaconda3\lib\site-packages\GML\FEATURE_ENGINEERING.py in init(self, data, label, fill_missing_data, method_cat, method_num, drop, cat_cols, numeric_cols, thresh_cat, thresh_numeric, encode_data, method, thresh, normalize, method_transform, thresh_numeric_transform, remove_outliers, qu_fence, new_features, task, test_data, verbose, feateng_steps)
156 except:
157 pass
--> 158 X = afc.fit_transform(X, y)
159 if not test_data == None:
160 test_data = afc.transform(test_data)
~\Anaconda3\lib\site-packages\GML\AUTO_FEATURE_ENGINEERING\autofeat.py in fit_transform(self, X, y)
245 cols = [str(c) for c in X.columns] if isinstance(X, pd.DataFrame) else []
246 # check input variables
--> 247 X, target = check_X_y(X, y, y_numeric=self.problem_type == "regression", dtype=None)
248 if not cols:
249 # the additional zeros in the name are because of the variable check in _generate_features,
~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in inner_f(*args, **kwargs)
70 FutureWarning)
71 kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 72 return f(**kwargs)
73 return inner_f
74
~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, estimator)
793 raise ValueError("y cannot be None")
794
--> 795 X = check_array(X, accept_sparse=accept_sparse,
796 accept_large_sparse=accept_large_sparse,
797 dtype=dtype, order=order, copy=copy,
~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in inner_f(*args, **kwargs)
70 FutureWarning)
71 kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 72 return f(**kwargs)
73 return inner_f
74
~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator)
642
643 if force_all_finite:
--> 644 _assert_all_finite(array,
645 allow_nan=force_all_finite == 'allow-nan')
646
~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in _assert_all_finite(X, allow_nan, msg_dtype)
102 elif X.dtype == np.dtype('object') and not allow_nan:
103 if _object_dtype_isnan(X).any():
--> 104 raise ValueError("Input contains NaN")
105
106
ValueError: Input contains NaN