Appendix
INPUT
setwd("C:\\Users\\user\\Desktop")
model.dat<-read.table("haha.txt",header=TRUE)
model.dat
model.reg<lm(Y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10,data=model.dat)
summary(model.reg)
OUTPUT
Y X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
1 650000 1.0 3150 2 5 4 2.50 0.64 1.40 0.22 0.5
2 1100000 1.0 2380 2 8 3 1.08 0.66 1.32 0.12 0.5
3 850000 2.0 2380 2 4 3 2.04 0.55 0.89 0.29 0.0
4 898000 2.0 1400 1 4 3 1.02 2.00 2.40 0.86 0.5
5 618000 1.0 1400 1 3 2 1.08 0.66 1.32 0.12 0.5
6 1260000 2.0 2380 2 6 4 1.05 0.33 2.15 0.72 1.0
7 560000 2.0 880 2 5 3 2.00 0.12 0.41 0.50 0.5
8 380000 2.0 1540 2 5 5 4.31 0.73 1.44 1.04 0.5
9 750000 2.0 1760 2 4 3 1.08 0.66 1.32 0.12 0.5
10 1250000 2.5 1650 2 6 4 1.08 0.66 1.32 0.12 0.5
11 800000 1.0 2700 2 4 4 0.59 0.94 0.70 0.30 0.5
12 650000 1.0 3150 2 5 4 1.62 0.58 0.85 0.30 0.5
13 830000 1.5 1870 2 4 3 1.62 0.05 0.43 0.35 0.5
14 560000 1.0 1200 1 3 1 3.51 0.26 0.40 0.31 0.5
15 1500000 2.5 3300 1 6 5 1.08 0.66 1.32 0.12 0.5
16 660000 1.0 1540 1 3 2 1.08 0.66 1.32 0.12 0.0
17 2400000 2.0 3000 2 19 4 2.55 0.48 1.32 0.03 0.5
18 1150000 2.5 1650 2 5 4 0.96 0.31 1.11 0.17 0.5
19 925000 2.0 2460 2 4 3 2.10 1.91 0.77 1.80 0.5
20 610000 1.0 3000 2 6 3 0.47 0.74 0.96 0.78 0.5
21 900000 2.0 1400 1 4 3 0.80 1.81 0.94 2.30 0.5
22 550000 1.0 3300 2 4 2 0.50 1.40 2.10 0.47 0.5
23 418000 2.5 700 1 3 3 0.05 0.63 0.46 0.56 0.5
24 1990000 3.0 2880 2 6 6 0.80 0.78 1.20 1.70 0.5
25 480000 1.0 1470 2 3 2 0.05 0.38 0.35 2.40 0.5
26 1300000 1.0 3200 1 3 2 0.05 0.38 0.35 0.40 0.5
27 450000 1.0 1430 2 3 2 0.05 0.38 0.35 0.70 0.5
28 835000 1.0 1760 2 3 2 1.00 0.20 1.12 0.70 0.0
29 1200000 1.0 3525 1 3 2 1.00 0.20 1.12 0.60 0.5
30 690000 1.0 1760 2 3 2 1.66 0.71 1.41 0.14 0.5
31 640000 1.0 1430 1 3 2 0.05 0.35 0.46 0.59 0.5
32 630000 1.0 1650 2 3 2 0.43 0.52 0.89 0.42 0.5
33 880000 2.0 1760 2 4 3 0.32 0.70 1.40 2.50 0.5
34 340000 2.0 658 1 2 2 1.40 1.80 0.20 2.60 0.5
35 1380000 0.0 2262 1 4 4 0.82 0.69 0.50 0.38 1.0
36 500000 1.0 1438 2 3 2 0.43 0.52 0.82 0.42 0.5
37 598000 1.0 1438 1 3 2 0.60 0.80 0.30 0.50 0.5
38 715000 1.0 1438 1 3 2 0.46 0.59 0.90 1.40 0.5
39 650000 1.0 1560 2 3 2 1.07 0.30 1.23 2.50 0.5
40 390000 1.0 1438 1 3 2 0.43 0.52 0.82 0.42 0.5
41 1750000 2.0 3150 1 5 4 0.43 0.52 0.82 0.42 0.5
42 418000 2.5 770 2 3 2 0.43 0.52 0.82 0.42 0.5
43 550000 2.0 1375 1 3 2 0.43 0.52 0.82 0.42 0.5
44 930000 1.5 1650 1 3 3 0.79 0.34 1.12 0.35 1.0
45 500000 1.0 1430 2 3 2 1.13 0.13 0.89 0.23 0.0
46 2180000 3.0 3300 1 6 5 1.03 0.79 1.23 0.20 1.0
47 1000000 2.5 1650 2 6 5 2.06 0.94 1.11 0.23 0.0
48 660000 1.0 1300 1 3 2 1.00 0.57 0.78 0.24 0.5
49 780000 1.0 1300 2 3 2 1.08 0.66 1.32 0.12 0.5
50 600000 1.0 1300 1 3 2 0.43 0.52 0.82 0.42 0.5
51 675000 1.0 1300 1 3 2 0.43 0.52 0.82 0.42 0.5
52 910000 2.0 1600 1 4 3 0.43 0.52 0.82 0.42 0.5
53 1390000 2.0 1600 1 4 3 0.43 0.52 0.82 0.42 0.5
54 435000 3.0 630 1 3 2 0.43 0.52 0.82 0.42 0.0
55 2280000 3.0 3220 1 7 6 0.30 0.67 0.98 1.60 0.5
56 465000 2.5 658 2 3 3 0.56 0.78 1.50 0.60 0.5
57 1080000 2.0 1400 1 3 3 0.43 0.52 0.82 0.42 0.0
58 599000 2.0 960 1 3 2 0.43 0.52 0.82 0.42 1.0
59 498000 1.0 1650 2 3 2 0.43 0.52 0.82 0.42 0.5
60 550000 2.0 880 2 3 2 0.05 0.43 0.35 0.78 1.0
61 658000 1.5 1430 1 4 2 0.05 0.43 0.35 0.45 0.5
62 1200000 2.0 1760 2 3 4 0.12 0.17 0.18 2.10 1.0
63 655000 1.0 1760 2 3 2 0.13 0.36 0.28 0.50 0.5
64 770000 1.0 1540 1 3 2 2.50 1.78 1.40 0.70 0.5
65 500000 3.0 840 2 3 3 1.55 0.53 0.48 2.30 0.5
Call:
lm(formula = Y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 +
X10, data = model.dat)
Residuals:
Min
1Q Median
-532028 -101390
3Q
Max
2596 135809 427012
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 79525.93 156095.35 0.509 0.61250
X1
X2
99512.34 59918.18 1.661 0.10255
220.92
53.87 4.101 0.00014 ***
X3
-262283.09 63980.86 -4.099 0.00014 ***
X4
83774.21 16884.73 4.962 7.34e-06 ***
X5
109262.42 45492.11 2.402 0.01979 *
X6
-67520.15 39347.29 -1.716 0.09189 .
X7
-130055.64 84225.01 -1.544 0.12839
X8
31032.78 75666.86 0.410 0.68334
X9
50432.54 49294.02 1.023 0.31082
X10
106441.45 130001.33 0.819 0.41652
--Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 1
Residual standard error: 230400 on 54 degrees of freedom
Multiple R-squared: 0.7909,
Adjusted R-squared: 0.7522
F-statistic: 20.43 on 10 and 54 DF, p-value: 5.681e-15
The fitted regression equation is
= 79525.93 + 99512.34X1 + 220.92X2 262283.09X3 + 83774.21X4
+ 109262.42X5 67520.15X6 - 130055.64X7 + 31032.78X8 + 50432.54X9
+ 106441.45X10
Non-linearity of Regression Model
Residual VS Built-Up Area
model.reg<-lm(Y~X2,data=model.dat)
plot(x=model.dat$X2, y=model.reg$residuals, xlab = "Built-Up Area", ylab =
"Residuals", main="Residuals vs. Built-Up Area", col = "red", pch =
19,cex=1.5,panel.first=grid(col="gray",lty="dotted"))
abline(h=0,col="blue")
Residual VS Bedrooms
model.reg<-lm(Y~X4,data=model.dat)
plot(x=model.dat$X4, y=model.reg$residuals, xlab = "Bedrooms", ylab =
"Residuals", main="Residuals vs. Bedrooms", col = "red", pch =
19,cex=1.5,panel.first=grid(col="gray",lty="dotted"))
abline(h=0,col="blue")
Nonconstancy of Error Variance
Residual VS Built-Up Area
model.reg<-lm(Y~X2,data=model.dat)
plot(x=model.reg$fitted.values, y=model.reg$residuals, xlab = "Built-Up
Area", ylab = "Residuals", main="Residuals vs. Predicted Values", col = "red",
pch = 19,cex=1.5,panel.first=grid(col="gray",lty="dotted"))
abline(h=0,col="blue")
Residual VS Bedrooms
model.reg<-lm(Y~X4,data=model.dat)
plot(x=model.reg$fitted.values, y=model.reg$residuals, xlab = "Bedrooms",
ylab = "Residuals", main="Residuals vs. Predicted Values", col = "red", pch =
19,cex=1.5,panel.first=grid(col="gray",lty="dotted"))
abline(h=0,col="blue")
Normal Probability Plot
Price vs Built-Up Area
model.reg<-lm(Y~X2,data=model.dat)
qqplot<-qqnorm(model.reg$residuals,main="Normal Probability
Plot",xlab="Built-up Area",ylab="Price",plot.it=TRUE ,col="blue", pch=19,
cex=1.5,panelfirst=grid(col="gray",lty="dotted"))
abline(lm(qqplot$y~qqplot$x))
Price vs Bedrooms
model.reg<-lm(Y~X4,data=model.dat)
qqplot<-qqnorm(model.reg$residuals,main="Normal Probability
Plot",xlab="Bedrooms",ylab="Price",plot.it=TRUE ,col="blue", pch=19,
cex=1.5,panelfirst=grid(col="gray",lty="dotted"))
abline(lm(qqplot$y~qqplot$x))
Coefficient of Determination
INPUT
setwd("C:\\Users\\user\\Desktop")
model.dat<-read.table("haha.txt",header=TRUE)
model.reg<lm(Y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10,data=model.dat)
sum.reg<-summary(model.reg)
model1.reg<-lm(Y~X1,data=model.dat)
sum1.reg<-summary(model1.reg)
model2.reg<-lm(Y~X1+X2,data=model.dat)
sum2.reg<-summary(model2.reg)
model3.reg<-lm(Y~X1+X2+X3,data=model.dat)
sum3.reg<-summary(model3.reg)
model4.reg<-lm(Y~X1+X2+X3+X4,data=model.dat)
sum4.reg<-summary(model4.reg)
model5.reg<-lm(Y~X1+X2+X3+X4+X5,data=model.dat)
sum5.reg<-summary(model5.reg)
model6.reg<-lm(Y~X1+X2+X3+X4+X5+X6,data=model.dat)
sum6.reg<-summary(model6.reg)
model7.reg<-lm(Y~X1+X2+X3+X4+X5+X6+X7,data=model.dat)
sum7.reg<-summary(model7.reg)
model8.reg<-lm(Y~X1+X2+X3+X4+X5+X6+X7+X8,data=model.dat)
sum8.reg<-summary(model8.reg)
model9.reg<-lm(Y~X1+X2+X3+X4+X5+X6+X7+X8+X9,data=model.dat)
sum9.reg<-summary(model9.reg)
R.sq.value<- data.frame(model=c("x1","x1, x2","x1, x2, x3","x1, x2, x3, x4",
"x1, x2, x3,
x4,x5","x1,x2,x3,x4,x5,x6","x1,x2,x3,x4,x5,x6,x7","x1,x2,x3,x4,x5,x6,x7,x8","
x1,x2,x3,x4,x5,x6,x7,x8,x9","x1,x2,x3,x4,x5,x6,x7,x8,x9,x10"),R.sq=c(sum1.r
eg$r.squared,sum2.reg$r.squared,sum3.reg$r.squared,sum4.reg$r.squared,su
m5.reg$r.squared,sum6.reg$r.squared,sum7.reg$r.squared,sum8.reg$r.squar
ed,sum9.reg$r.squared,sum.reg$r.squared),adj.R.sq=c(sum1.reg$adj.r.square
d,sum2.reg$adj.r.squared,sum3.reg$adj.r.squared,sum4.reg$adj.r.squared,su
m5.reg$adj.r.squared,sum6.reg$adj.r.squared,sum7.reg$adj.r.squared,sum8.r
eg$adj.r.squared,sum9.reg$adj.r.squared,sum.reg$adj.r.squared))
R.sq.value
summary(model7.reg)
OUTPUT
model
1
2
3
x1
x1, x2
x1, x2, x3
R.sq
adj.R.sq
0.1331626
0.1194033
0.5943963
0.5813123
0.6297972
0.6115905
x1, x2, x3, x4
0.7343859
0.7166783
x1, x2, x3, x4,x5
0.7551079
0.7343543
x1,x2,x3,x4,x5,x6
0.7769258
0.7538492
x1,x2,x3,x4,x5,x6,x7
0.7831864
x1,x2,x3,x4,x5,x6,x7,x8
0.7832118 0.7522420
x1,x2,x3,x4,x5,x6,x7,x8,x9
0.7883409
10 x1,x2,x3,x4,x5,x6,x7,x8,x9,x10
0.7909363
0.7565602
0.7537057
0.7522208
Call:
lm(formula = Y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7, data = model.dat)
Residuals:
Min
1Q Median
3Q
Max
-534969 -81367 13282 156080 410508
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 139279.00 138479.11 1.006 0.318774
X1
X2
102055.71 57921.39 1.762 0.083437 .
218.51
52.48 4.164 0.000107 ***
X3
-247709.72 60273.66 -4.110 0.000128 ***
X4
81540.08 16412.54 4.968 6.5e-06 ***
X5
119661.49 44113.46 2.713 0.008811 **
X6
-77243.87 37898.25 -2.038 0.046180 *
X7
-92439.26 72053.61 -1.283 0.204711
--Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 1
Residual standard error: 228400 on 57 degrees of freedom
Multiple R-squared: 0.7832,
Adjusted R-squared: 0.7566
F-statistic: 29.41 on 7 and 57 DF, p-value: < 2.2e-16