思考题:
"""
测试数据集 test.csv
测试数据的目标值 submission_example.csv
"""
df_test = pd.read_csv("/Users/qinggeouye/Desktop/GeekTime/MathematicProgrammer/29_featureTrans/test.csv")
expected_test = pd.read_csv("/Users/qinggeouye/Desktop/GeekTime/MathematicProgrammer/29_featureTrans"
"/submission_example.csv")['medv']
# 归一化 预测结果
minMaxScaler_test = MinMaxScaler()
df_test_normalized = minMaxScaler_test.fit_transform(df_test.astype(dtype=float))
df_test_features_normalized = df_test_normalized[:, :]
predicted_normalized = regression_normalized.predict(df_test_features_normalized)
print("归一化预测结果与实际值的均方根误差:%s" % np.sqrt(np.mean((predicted_normalized - expected_test) ** 2)))
# 标准化 预测结果
standardScaler_test = StandardScaler()
standardScaler_test.fit(df_test.astype(dtype=float))
df_test_standardized = standardScaler_test.transform(df_test.astype(dtype=float))
df_test_features_standardized = df_test_standardized[:, :]
predicted_standardized = regression_standardized.predict(df_test_features_standardized)
print("标准化预测结果与实际值的均方根误差:%s" % np.sqrt(np.mean((predicted_standardized - expected_test) ** 2)))
# 预测结果,两种特征转换预测结果相差无几,但与实际值相差较大
归一化预测结果与实际值的均方根误差:22.40003520184502
标准化预测结果与实际值的均方根误差:22.785218713879576
展开