Skip to content
Snippets Groups Projects
Commit 86e70d89 authored by Sangamithra Panneer Selvam's avatar Sangamithra Panneer Selvam
Browse files

improved model

parent ee0dd24f
No related branches found
No related tags found
No related merge requests found
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import MinMaxScaler
class HPP():
def __init__(self, MSSubClass, LotArea, YearBuilt, TotRmsAbvGrd, GarageCars, GrLivArea, OverallQual) -> None:
......@@ -15,12 +14,14 @@ class HPP():
self.GrLivArea = GrLivArea
self.OverallQual = OverallQual
self.data_preparation()
self.normalization()
self.model()
self.prediction_metrics()
self.predict_from_UI()
self.prediction()
def data_preparation(self):
'''
This function reads the train.csv dataset. We further pick the most important and relevant features and further normalize
them to a standard scale.
'''
# Read the data and store in a dataframe called training_set
train_data_path = 'train.csv'
training_set = pd.read_csv(train_data_path)
......@@ -35,67 +36,39 @@ class HPP():
# Select the target variable and call it y
self.y = training_set.SalePrice
self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(self.X, self.y, test_size=0.2, random_state=42, shuffle=True)
def normalization(self):
# Normalization of features in the dataset
self.scaler_x = MinMaxScaler()
self.scaler_y = MinMaxScaler()
self.scaler_x.fit(self.X_train)
# Scale the training input variables
self.X_train_scaled = self.scaler_x.fit_transform(self.X)
# Scale the training and testing input variables
self.X_train_scaled = self.scaler_x.transform(self.X_train)
self.X_test_scaled = self.scaler_x.transform(self.X_test)
y_train_array = self.y_train.values.reshape(-1, 1)
y_test_array = self.y_test.values.reshape(-1, 1)
self.scaler_y.fit(y_train_array)
# Scale the training and testing target variables
self.y_train_scaled = self.scaler_y.transform(y_train_array)
self.y_test_scaled = self.scaler_y.transform(y_test_array)
# Scale the training target variables
y_train_array = self.y.values.reshape(-1, 1)
self.y_train_scaled = self.scaler_y.fit_transform(y_train_array)
def model(self):
# Define the model
self.tree_model = DecisionTreeRegressor()
# Fit model
self.tree_model.fit(self.X_train_scaled, self.y_train_scaled)
return self.tree_model
def prediction(self):
'''
This function defines the model and predicts the required output for the inputs entered by the user in the web-UI.
'''
def prediction_metrics(self):
y_pred = self.tree_model.predict(self.X_test_scaled)
# Define the model
tree_model = DecisionTreeRegressor(splitter='random', random_state=2)
# Measure the root mean squared error
mse = mean_squared_error(self.y_test_scaled, y_pred)
rmse = np.sqrt(mse)
print('RMSE for the model is: {}'.format(rmse))
# Calculate the regular R-squared score
r2 = r2_score(self.y_test_scaled, y_pred)
# Calculate the adjusted R-squared score
n = len(self.y_test_scaled)
p = self.X.shape[1] # Number of predictors/features
adjusted_r2 = 1 - (1 - r2) * (n - 1) / (n - p - 1)
print("Adjusted R-squared for the model is: {}".format(adjusted_r2))
def predict_from_UI(self):
# Predict the house-price for the inputs features from the web-UI
y_pred_UI = self.tree_model.predict(self.scaler_x.transform([[self.MSSubClass, self.LotArea, self.YearBuilt, self.TotRmsAbvGrd,
# Fit model
tree_model.fit(self.X_train_scaled, self.y_train_scaled)
# Predict the SalePrice output with the inputs from the web-UI as entered by the user
y_pred_UI = tree_model.predict(self.scaler_x.transform([[self.MSSubClass, self.LotArea, self.YearBuilt, self.TotRmsAbvGrd,
self.GarageCars, self.GrLivArea, self.OverallQual]]))
# Rescale the predicted SalePrice in accordance to the standard scale
print(y_pred_UI)
y_pred_UI = self.scaler_y.inverse_transform(y_pred_UI.reshape(-1, 1))
return y_pred_UI
return y_pred_UI
def predict_sale_price(MSSubClass, LotArea, YearBuilt, TotRmsAbvGrd, GarageCars, GrLivArea, OverallQual):
hpp = HPP(MSSubClass, LotArea, YearBuilt, TotRmsAbvGrd, GarageCars, GrLivArea, OverallQual)
SalePrice_UI = hpp.predict_from_UI()
return SalePrice_UI
SalePrice_prediction = hpp.prediction()
return SalePrice_prediction
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment