Attempting to fit a grid estimator, recieving TypeError : '

问题

I've been attempting to fit a Grid Search K Nearest neighbors Classifier, but am receiving the following Error message

TypeError : '<' not supported between instances of 'str' and 'int'

X_train

   compact   sa     area    roofM3   h   o   glaz    glazing_area_distribution
0   0.66    759.5   318.5   220.50   3.5 2   0.40    3
1   0.76    661.5   416.5   122.50   7.0 3   0.10    1
2   0.66    759.5   318.5   220.50   3.5 3    0.10    1
3   0.74    686.0   245.0   220.50   3.5 5    0.10    4
4   0.64    784.0   343.0   220.50   3.5 2    0.40    4
... ... ... ... ... ... ... ... ...
609 0.98    514.5   294.0   110.25  7.0 4   0.40    2
X_train.describe()

count 614.000000   614.000000  614.000000  614.000000  614.000000  614.000000  614.000000  614.000000
mean   0.762606    673.271173  319.617264  176.826954  5.227199    3.495114    0.236645    2.802932
 std 0.106725    88.757699   43.705256   45.499990   1.751278    1.124751    0.133044    1.571128
 min 0.620000    514.500000  245.000000  110.250000  3.500000    2.000000    0.000000    0.000000
 25% 0.660000    612.500000  294.000000  122.500000  3.500000    2.000000    0.100000    1.000000
 75% 0.820000    759.500000  343.000000  220.500000  7.000000    4.000000    0.400000    4.000000
 max 0.980000    808.500000  416.500000  220.500000  7.000000    5.000000    0.400000    5.000000


y_train

0 15.16
1 32.12
2 11.69
3 10.14
4 19.06
   ...  
609 32.24

Attempting to create and fit a model

from sklearn.model_selection import KFold

model = KFold()
cv_object = KFold(n_splits=5, shuffle=True, random_state=50)


 grid_values = {'n_neighbors': ['1','2','3','4','5'],
      'weights': ['uniform', 'distance']
      }


from sklearn.model_selection import GridSearchCV

model = KNeighborsRegressor()

grid_estimator = GridSearchCV(KNeighborsRegressor(), cv=cv_object, 
param_grid=grid_values, 
scoring='neg_mean_absolute_error')


**grid_estimator.fit(X_train, y_train)**

I have attempted to transform my data with label and OneHotEncoder, and dropped the discrete features, but still receive the same error message:

TypeError                                  Traceback (most recent call last)
 <ipython-input-143-3054acff691c> in <module> 
     19 ohe = OneHotEncoder (df.iloc [-4, -2], 
      20                      sparse = False, handle_unknown = "ignore")
 ---> 21  df_processed_np = ohe . fit_transform ( df )

~ \ anaconda3 \ lib \ site-packages \ sklearn \ preprocessing \ _encoders.py in fit_transform (self, X, y) 
    408          "" "
     409          self . _validate_keywords ( ) 
-> 410 return super ( ) . fit_transform ( X , y )     411     412 def transform ( self , X ) :         
 
     

~ \ anaconda3 \ lib \ site-packages \ sklearn \ base.py in fit_transform (self, X, y, ** fit_params) 
    688          if y is  None : 
    689              # fit method of arity 1 (unsupervised transformation) 
-> 690 return self . fit ( X , ** fit_params ) . transform ( X )     691 else :     692 # fit method of arity 2 (supervised transformation)              
         
             

~ \ anaconda3 \ lib \ site-packages \ sklearn \ preprocessing \ _encoders.py in fit (self, X, y) 
    383          "" "
     384          self . _validate_keywords ( ) 
-> 385          self . _fit ( X , handle_unknown = self . handle_unknown ) 
    386          self . drop_idx_ = self . _compute_drop_idx ( ) 
    387          return self

~ \ anaconda3 \ lib \ site-packages \ sklearn \ preprocessing \ _encoders.py in _fit (self, X, handle_unknown) 
     75  
     76          if self . categories ! =  'auto' : 
---> 77 if len ( self . categories ) ! = n_features :      78                  raise ValueError ("Shape mismatch: if categories is an array,"
      79                                   "it has to be of shape (n_features,) . ")              


TypeError : object of type 'numpy.int64' has no len ()

Attempt to use labelEncoder and OneHotEncoder:

from sklearn.preprocessing import LabelEncoder, OneHotEncoder



cat_columns = df[['orientation', 'glazing_area_distribution']]
label_encoders = {}
for col in cat_columns:
    print("Encoding {}".format(col))
    new_le = LabelEncoder()
    df[col] = new_le.fit_transform(df[col])
    label_encoders[col] = new_le
    
df.index.to_frame().reset_index(drop=True)
    

#cat_columns_idx = [df.get_loc(col) 
#                   for col in cat_columns]

ohe = OneHotEncoder(df.iloc[-4,-2], 
                    sparse=False, handle_unknown="ignore")
df_processed_np = ohe.fit_transform(df)

Error after Categorical/discrete columns were dropped:

  warnings.warn ("Estimator fit failed. The score on this train-test"
C: \ Users \ SamWinter \ anaconda3 \ lib \ site-packages \ sklearn \ model_selection \ _validation.py: 548: FitFailedWarning: Estimator fit failed. The score on this train-test partition for these parameters will be set to nan. Details:
Traceback (most recent call last):
  File "C: \ Users \ SamWinter \ anaconda3 \ lib \ site-packages \ sklearn \ model_selection \ _validation.py", line 531, in _fit_and_score
    estimator.fit (X_train, y_train, ** fit_params)
  File "C: \ Users \ SamWinter \ anaconda3 \ lib \ site-packages \ sklearn \ neighbors \ _base.py", line 1110, in fit
    return self._fit (X)
  File "C: \ Users \ SamWinter \ anaconda3 \ lib \ site-packages \ sklearn \ neighbors \ _base.py", line 439, in _fit
    self.n_neighbors <self._fit_X.shape [0] // 2) and
TypeError: '<' not supported between instances of 'str' and 'int'

  warnings.warn ("Estimator fit failed. The score on this train-test"
-------------------------------------------------- ------------------------- 
TypeError                                  Traceback (most recent call last)
 <ipython-input-168-5fa0b664280a> in <module> 
     18  #random_search = RandomizedSearchCV (k_model, param_distributions = param_grid, 
     19    # n_iter = 10, cv = 5, scoring = 'accuracy') 
---> 20  gridsearch . fit ( X_train2 , y_train )

~ \ anaconda3 \ lib \ site-packages \ sklearn \ utils \ validation.py in inner_f (* args, ** kwargs) 
     70                            FutureWarning)
      71          kwargs . update ( { k : arg for k , arg in zip ( sig . parameters , args ) } ) 
---> 72 return f ( ** kwargs )      73 return inner_f
      74         
      

~ \ anaconda3 \ lib \ site-packages \ sklearn \ model_selection \ _search.py in fit (self, X, y, groups, ** fit_params) 
    763              refit_start_time = time . time ( ) 
    764              if y is  not  None : 
-> 765                  self . best_estimator_ . fit ( X , y ,  ** fit_params ) 
    766              else : 
    767                  self . best_estimator_ . fit (X ,  ** fit_params )

~ \ anaconda3 \ lib \ site-packages \ sklearn \ neighbors \ _base.py in fit (self, X, y) 
   1108                                         multi_output = True)
    1109          self . _y = y
 -> 1110 return self . _fit ( X )    1111    1112 def _more_tags ( self ) :         
 
     

~ \ anaconda3 \ lib \ site-packages \ sklearn \ neighbors \ _base.py in _fit (self, X) 
    437              # and KDTree is generally faster when available 
    438              if ((self.n_neighbors is None or
 -> 439                   self.n_neighbors <self._fit_X.shape [0] // 2) and
     440                      self.metric! = 'precomputed'):
     441                  if self . effective_metric_ in VALID_METRICS [ 'kd_tree' ] :

TypeError : '<' not supported between instances of 'str' and 'int'

来源：https://stackoverflow.com/questions/66070889/attempting-to-fit-a-grid-estimator-recieving-typeerror-not-supported-betw

标签

python

scikit-learn

grid

supervised-learning

gridsearchcv

Attempting to fit a grid estimator, recieving TypeError : '<' not supported between instances of 'str' and 'int'

问题