improved model

86e70d89 · Sangamithra Panneer Selvam · ee0dd24f · 86e70d89
Commit 86e70d89 authored 2 years ago by Sangamithra Panneer Selvam
--- a/House_Price_Prediction/hpp_model/predict_sale_price.py
+++ b/House_Price_Prediction/hpp_model/predict_sale_price.py
 import pandas as pd
 import numpy as np
 from sklearn.tree import DecisionTreeRegressor
-from sklearn.preprocessing import StandardScaler, MinMaxScaler
-from sklearn.model_selection import train_test_split
-from sklearn.metrics import mean_squared_error, r2_score
+from sklearn.preprocessing import MinMaxScaler
+

 class HPP():
    def __init__(self, MSSubClass, LotArea, YearBuilt, TotRmsAbvGrd, GarageCars, GrLivArea, OverallQual) -> None:
@@ -15,12 +14,14 @@ class HPP():
        self.GrLivArea = GrLivArea
        self.OverallQual = OverallQual
        self.data_preparation()
-        self.normalization()
-        self.model()
-        self.prediction_metrics()
-        self.predict_from_UI()
+        self.prediction()

    def data_preparation(self):
+        '''
+        This function reads the train.csv dataset. We further pick the most important and relevant features and further normalize
+        them to a standard scale.
+        '''
+
        # Read the data and store in a dataframe called training_set
        train_data_path = 'train.csv'
        training_set = pd.read_csv(train_data_path)
@@ -35,67 +36,39 @@ class HPP():
        # Select the target variable and call it y
        self.y = training_set.SalePrice

-        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(self.X, self.y, test_size=0.2, random_state=42, shuffle=True)
-
-    def normalization(self):
        # Normalization of features in the dataset
-
        self.scaler_x = MinMaxScaler() 
        self.scaler_y = MinMaxScaler()

-        self.scaler_x.fit(self.X_train)
+        # Scale the training input variables
+        self.X_train_scaled = self.scaler_x.fit_transform(self.X)

-        # Scale the training and testing input variables
-        self.X_train_scaled = self.scaler_x.transform(self.X_train)
-        self.X_test_scaled = self.scaler_x.transform(self.X_test)
-
-        y_train_array = self.y_train.values.reshape(-1, 1)
-        y_test_array = self.y_test.values.reshape(-1, 1)
-
-        self.scaler_y.fit(y_train_array)
-
-        # Scale the training and testing target variables
-        self.y_train_scaled = self.scaler_y.transform(y_train_array)
-        self.y_test_scaled = self.scaler_y.transform(y_test_array)
+        # Scale the training target variables
+        y_train_array = self.y.values.reshape(-1, 1)
+        self.y_train_scaled = self.scaler_y.fit_transform(y_train_array)
    
-    def model(self):
-        # Define the  model
-        self.tree_model = DecisionTreeRegressor()
-
-        # Fit model
-        self.tree_model.fit(self.X_train_scaled, self.y_train_scaled)
-        return self.tree_model
+    def prediction(self):
+        '''
+        This function defines the model and predicts the required output for the inputs entered by the user in the web-UI.
+        '''

-    def prediction_metrics(self):
-        y_pred = self.tree_model.predict(self.X_test_scaled)
+        # Define the model
+        tree_model = DecisionTreeRegressor(splitter='random', random_state=2)

-        # Measure the root mean squared error
-        mse = mean_squared_error(self.y_test_scaled, y_pred)
-        rmse = np.sqrt(mse)
-        print('RMSE for the model is: {}'.format(rmse))
-
-        # Calculate the regular R-squared score
-        r2 = r2_score(self.y_test_scaled, y_pred)
-
-        # Calculate the adjusted R-squared score
-        n = len(self.y_test_scaled)
-        p = self.X.shape[1]  # Number of predictors/features
-        adjusted_r2 = 1 - (1 - r2) * (n - 1) / (n - p - 1)
-
-        print("Adjusted R-squared for the model is: {}".format(adjusted_r2))
-    
-    def predict_from_UI(self):
-        # Predict the house-price for the inputs features from the web-UI
-        y_pred_UI = self.tree_model.predict(self.scaler_x.transform([[self.MSSubClass, self.LotArea, self.YearBuilt, self.TotRmsAbvGrd,
+        # Fit model 
+        tree_model.fit(self.X_train_scaled, self.y_train_scaled)

+        # Predict the SalePrice output with the inputs from the web-UI as entered by the user
+        y_pred_UI = tree_model.predict(self.scaler_x.transform([[self.MSSubClass, self.LotArea, self.YearBuilt, self.TotRmsAbvGrd, 
                                          self.GarageCars, self.GrLivArea, self.OverallQual]]))
+        
        # Rescale the predicted SalePrice in accordance to the standard scale
-        print(y_pred_UI)
        y_pred_UI = self.scaler_y.inverse_transform(y_pred_UI.reshape(-1, 1))
-        return y_pred_UI

+        return y_pred_UI

+    
 def predict_sale_price(MSSubClass, LotArea, YearBuilt, TotRmsAbvGrd, GarageCars, GrLivArea, OverallQual):
    hpp = HPP(MSSubClass, LotArea, YearBuilt, TotRmsAbvGrd, GarageCars, GrLivArea, OverallQual)
-    SalePrice_UI = hpp.predict_from_UI()
-    return SalePrice_UI
+    SalePrice_prediction = hpp.prediction()
+    return SalePrice_prediction
\ No newline at end of file