我曾尝试使用scikit-learn的RF回归,但由于我的标准模型(来自文档和教程)存在问题,所以有代码:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
db = pd.read_excel('/home/artyom/myprojects//valuevo/field2019/report/segs_inventar_dataframe/excel_var/invcents.xlsx')
age = df[['AGE_1','AGE_2','AGE_3','AGE_4','AGE_5']]
hight = df [['HIGHT_','HIGHT_1','HIGHT_2','HIGHT_3','HIGHT_4','HIGHT_5']]
diam = df[['DIAM_','DIAM_1','DIAM_2','DIAM_3','DIAM_4','DIAM_5']]
za = df[['ZAPSYR_','ZAPSYR_1','ZAPSYR_2','ZAPSYR_3','ZAPSYR_4','ZAPSYR_5']]
tova = df[['TOVARN_','TOVARN_1','TOVARN_2','TOVARN_3','TOVARN_4','TOVARN_5']]
#df['average'] = df.mean(numeric_only=True,axis=1)
df['meanage'] = age.mean(numeric_only=True,axis=1)
df['meanhight'] = hight.mean(numeric_only=True,axis=1)
df['mediandiam'] = diam.mean(numeric_only=True,axis=1)
df['medianza'] = za.mean(numeric_only=True,axis=1)
df['mediantova'] = tova.mean(numeric_only=True,axis=1)
unite = df[['gapA_segA','gapP_segP','A_median','p_median','circ_media','fdi_median','pfd_median','p_a_median','gsci_media','meanhight']].dropna()
from sklearn.model_selection import train_test_split as ttsplit
df_copy = unite.copy()
trainXset = df_copy[['gapA_segA','gsci_media']]
trainYset = df_copy [['meanhight']]
trainXset_train,trainXset_test,trainYset_train,trainYset_test = ttsplit(trainXset,trainYset,test_size=0.3) # 70% training and 30% test
rf = RandomForestRegressor(n_estimators = 100,random_state = 40)
rf.fit(trainXset_train,trainYset_train)
predictions = rf.predict(trainXset_test)
errors = abs(predictions - trainYset_test)
mape = 100 * (errors / trainYset_test)
accuracy = 100 - np.mean(mape)
print('accuracy:',round(accuracy,2),'%.')
但是输出效果不好:
---> 24 errors = abs(predictions - trainYset_test)
25 # Calculate mean absolute percentage error (MAPE)
26 mape = 100 * (errors / trainYset_test)
..... somemore track
ValueError: Unable to coerce to Series,length must be 1: given 780
我该如何解决? 780是trainYset_test的形状。我不要求简单的Solve(为我做代码),但要寻求建议,为什么会引发错误。我在教程中都喜欢。