Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
T
tutorials
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Deploy
Releases
Model registry
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Sangamithra Panneer Selvam
tutorials
Commits
86e70d89
Commit
86e70d89
authored
1 year ago
by
Sangamithra Panneer Selvam
Browse files
Options
Downloads
Patches
Plain Diff
improved model
parent
ee0dd24f
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
House_Price_Prediction/hpp_model/predict_sale_price.py
+28
-55
28 additions, 55 deletions
House_Price_Prediction/hpp_model/predict_sale_price.py
with
28 additions
and
55 deletions
House_Price_Prediction/hpp_model/predict_sale_price.py
+
28
−
55
View file @
86e70d89
import
pandas
as
pd
import
numpy
as
np
from
sklearn.tree
import
DecisionTreeRegressor
from
sklearn.preprocessing
import
StandardScaler
,
MinMaxScaler
from
sklearn.model_selection
import
train_test_split
from
sklearn.metrics
import
mean_squared_error
,
r2_score
from
sklearn.preprocessing
import
MinMaxScaler
class
HPP
():
def
__init__
(
self
,
MSSubClass
,
LotArea
,
YearBuilt
,
TotRmsAbvGrd
,
GarageCars
,
GrLivArea
,
OverallQual
)
->
None
:
...
...
@@ -15,12 +14,14 @@ class HPP():
self
.
GrLivArea
=
GrLivArea
self
.
OverallQual
=
OverallQual
self
.
data_preparation
()
self
.
normalization
()
self
.
model
()
self
.
prediction_metrics
()
self
.
predict_from_UI
()
self
.
prediction
()
def
data_preparation
(
self
):
'''
This function reads the train.csv dataset. We further pick the most important and relevant features and further normalize
them to a standard scale.
'''
# Read the data and store in a dataframe called training_set
train_data_path
=
'
train.csv
'
training_set
=
pd
.
read_csv
(
train_data_path
)
...
...
@@ -35,67 +36,39 @@ class HPP():
# Select the target variable and call it y
self
.
y
=
training_set
.
SalePrice
self
.
X_train
,
self
.
X_test
,
self
.
y_train
,
self
.
y_test
=
train_test_split
(
self
.
X
,
self
.
y
,
test_size
=
0.2
,
random_state
=
42
,
shuffle
=
True
)
def
normalization
(
self
):
# Normalization of features in the dataset
self
.
scaler_x
=
MinMaxScaler
()
self
.
scaler_y
=
MinMaxScaler
()
self
.
scaler_x
.
fit
(
self
.
X_train
)
# Scale the training input variables
self
.
X_train_scaled
=
self
.
scaler_x
.
fit_transform
(
self
.
X
)
# Scale the training and testing input variables
self
.
X_train_scaled
=
self
.
scaler_x
.
transform
(
self
.
X_train
)
self
.
X_test_scaled
=
self
.
scaler_x
.
transform
(
self
.
X_test
)
y_train_array
=
self
.
y_train
.
values
.
reshape
(
-
1
,
1
)
y_test_array
=
self
.
y_test
.
values
.
reshape
(
-
1
,
1
)
self
.
scaler_y
.
fit
(
y_train_array
)
# Scale the training and testing target variables
self
.
y_train_scaled
=
self
.
scaler_y
.
transform
(
y_train_array
)
self
.
y_test_scaled
=
self
.
scaler_y
.
transform
(
y_test_array
)
# Scale the training target variables
y_train_array
=
self
.
y
.
values
.
reshape
(
-
1
,
1
)
self
.
y_train_scaled
=
self
.
scaler_y
.
fit_transform
(
y_train_array
)
def
model
(
self
):
# Define the model
self
.
tree_model
=
DecisionTreeRegressor
()
# Fit model
self
.
tree_model
.
fit
(
self
.
X_train_scaled
,
self
.
y_train_scaled
)
return
self
.
tree_model
def
prediction
(
self
):
'''
This function defines the model and predicts the required output for the inputs entered by the user in the web-UI.
'''
def
prediction_metrics
(
self
):
y_pred
=
self
.
tree_model
.
predict
(
self
.
X_test_scaled
)
# Define the model
tree_model
=
DecisionTreeRegressor
(
splitter
=
'
random
'
,
random_state
=
2
)
# Measure the root mean squared error
mse
=
mean_squared_error
(
self
.
y_test_scaled
,
y_pred
)
rmse
=
np
.
sqrt
(
mse
)
print
(
'
RMSE for the model is: {}
'
.
format
(
rmse
))
# Calculate the regular R-squared score
r2
=
r2_score
(
self
.
y_test_scaled
,
y_pred
)
# Calculate the adjusted R-squared score
n
=
len
(
self
.
y_test_scaled
)
p
=
self
.
X
.
shape
[
1
]
# Number of predictors/features
adjusted_r2
=
1
-
(
1
-
r2
)
*
(
n
-
1
)
/
(
n
-
p
-
1
)
print
(
"
Adjusted R-squared for the model is: {}
"
.
format
(
adjusted_r2
))
def
predict_from_UI
(
self
):
# Predict the house-price for the inputs features from the web-UI
y_pred_UI
=
self
.
tree_model
.
predict
(
self
.
scaler_x
.
transform
([[
self
.
MSSubClass
,
self
.
LotArea
,
self
.
YearBuilt
,
self
.
TotRmsAbvGrd
,
# Fit model
tree_model
.
fit
(
self
.
X_train_scaled
,
self
.
y_train_scaled
)
# Predict the SalePrice output with the inputs from the web-UI as entered by the user
y_pred_UI
=
tree_model
.
predict
(
self
.
scaler_x
.
transform
([[
self
.
MSSubClass
,
self
.
LotArea
,
self
.
YearBuilt
,
self
.
TotRmsAbvGrd
,
self
.
GarageCars
,
self
.
GrLivArea
,
self
.
OverallQual
]]))
# Rescale the predicted SalePrice in accordance to the standard scale
print
(
y_pred_UI
)
y_pred_UI
=
self
.
scaler_y
.
inverse_transform
(
y_pred_UI
.
reshape
(
-
1
,
1
))
return
y_pred_UI
return
y_pred_UI
def
predict_sale_price
(
MSSubClass
,
LotArea
,
YearBuilt
,
TotRmsAbvGrd
,
GarageCars
,
GrLivArea
,
OverallQual
):
hpp
=
HPP
(
MSSubClass
,
LotArea
,
YearBuilt
,
TotRmsAbvGrd
,
GarageCars
,
GrLivArea
,
OverallQual
)
SalePrice_
UI
=
hpp
.
predict
_from_UI
()
return
SalePrice_
UI
SalePrice_
prediction
=
hpp
.
predict
ion
()
return
SalePrice_
prediction
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment