全部, 尝试在pipleline中使用KNN imputer时遇到问题。我的工作流程如下所示。
我已经将数值变量和类别变量分开,并构建了一个如下所示的pipleline
numeric_transformer = Pipeline(steps=[
('imputer',KNN(k=3)),('scaler',StandardScaler())])
categorical_transformer = Pipeline(steps=[
('imputer',SimpleImputer(strategy='most_frequent',fill_value='missing')),('onehot',OneHotEncoder(handle_unknown='ignore'))])
preprocessor = ColumnTransformer(
transformers=[
('num',numeric_transformer,num_attr),('cat',categorical_transformer,cat_attr)])
我想使用KNN imputer来估算数字列中的缺失值。
我进行了逻辑回归
clf_logreg = Pipeline(steps=[('preprocessor',preprocessor),('classifier',LogisticRegression())])
clf_logreg.fit(X_train,Y_train)
上面的代码块工作正常,但是当我尝试预测X_train时,出现以下错误。请帮帮我。谢谢
train_pred_logreg = clf_logreg.predict(X_train)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-121-f17e49913947> in <module>
1 #train_pred_logreg = clf_logreg.predict(X_train)
----> 2 test_pred_logreg = clf_logreg.predict(X_test)
3
4 print(confusion_matrix(y_true=Y_train,y_pred = train_pred_logreg))
5
/opt/conda/lib/python3.6/site-packages/sklearn/utils/metaestimators.py in <lambda>(*args,**kwargs)
114
115 # lambda,but not partial,allows help() to work with update_wrapper
--> 116 out = lambda *args,**kwargs: self.fn(obj,*args,**kwargs)
117 # update the docstring of the returned function
118 update_wrapper(out,self.fn)
/opt/conda/lib/python3.6/site-packages/sklearn/pipeline.py in predict(self,X,**predict_params)
419 Xt = X
420 for _,name,transform in self._iter(with_final=False):
--> 421 Xt = transform.transform(Xt)
422 return self.steps[-1][-1].predict(Xt,**predict_params)
423
/opt/conda/lib/python3.6/site-packages/sklearn/compose/_column_transformer.py in transform(self,X)
537 'remainder keyword')
538
--> 539 Xs = self._fit_transform(X,None,_transform_one,fitted=True)
540 self._validate_output(Xs)
541
/opt/conda/lib/python3.6/site-packages/sklearn/compose/_column_transformer.py in _fit_transform(self,y,func,fitted)
418 message=self._log_message(name,idx,len(transformers)))
419 for idx,(name,trans,column,weight) in enumerate(
--> 420 self._iter(fitted=fitted,replace_strings=True),1))
421 except ValueError as e:
422 if "Expected 2D array,got 1D array instead" in str(e):
/opt/conda/lib/python3.6/site-packages/joblib/parallel.py in __call__(self,iterable)
919 # remaining jobs.
920 self._iterating = False
--> 921 if self.dispatch_one_batch(iterator):
922 self._iterating = self._original_iterator is not None
923
/opt/conda/lib/python3.6/site-packages/joblib/parallel.py in dispatch_one_batch(self,iterator)
757 return False
758 else:
--> 759 self._dispatch(tasks)
760 return True
761
/opt/conda/lib/python3.6/site-packages/joblib/parallel.py in _dispatch(self,batch)
714 with self._lock:
715 job_idx = len(self._jobs)
--> 716 job = self._backend.apply_async(batch,callback=cb)
717 # A job can complete so quickly than its callback is
718 # called before we get here,causing self._jobs to
/opt/conda/lib/python3.6/site-packages/joblib/_parallel_backends.py in apply_async(self,callback)
180 def apply_async(self,callback=None):
181 """Schedule a func to be run"""
--> 182 result = ImmediateResult(func)
183 if callback:
184 callback(result)
/opt/conda/lib/python3.6/site-packages/joblib/_parallel_backends.py in __init__(self,batch)
547 # Don't delay the application,to avoid keeping the input
548 # arguments in memory
--> 549 self.results = batch()
550
551 def get(self):
/opt/conda/lib/python3.6/site-packages/joblib/parallel.py in __call__(self)
223 with parallel_backend(self._backend,n_jobs=self._n_jobs):
224 return [func(*args,**kwargs)
--> 225 for func,args,kwargs in self.items]
226
227 def __len__(self):
/opt/conda/lib/python3.6/site-packages/joblib/parallel.py in <listcomp>(.0)
223 with parallel_backend(self._backend,kwargs in self.items]
226
227 def __len__(self):
/opt/conda/lib/python3.6/site-packages/sklearn/pipeline.py in _transform_one(transformer,weight,**fit_params)
693
694 def _transform_one(transformer,**fit_params):
--> 695 res = transformer.transform(X)
696 # if we have a weight for this transformer,multiply output
697 if weight is None:
/opt/conda/lib/python3.6/site-packages/sklearn/pipeline.py in _transform(self,X)
538 Xt = X
539 for _,_,transform in self._iter():
--> 540 Xt = transform.transform(Xt)
541 return Xt
542
/opt/conda/lib/python3.6/site-packages/fancyimpute/solver.py in transform(self,y)
223 "doesn't support inductive mode. Only %s.fit_transform is "
224 "supported at this time." % (
--> 225 self.__class__.__name__,self.__class__.__name__))
ValueError: KNN.transform not implemented! This imputation algorithm likely doesn't support inductive mode. Only KNN.fit_transform is supported at this time.
当我尝试使用错误消息中所示的fit_transform时,出现以下错误
clf_logreg.fit_transform(X_train,Y_train)
AttributeError: 'LogisticRegression' object has no attribute 'transform'