The function that does the regression is lm
. To see how it works, let’s use this dataset as an example.
library(datasets)
head(mtcars) # this prints the first 6 observations of the dataset
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
## wt2
## Mazda RX4 6.864400
## Mazda RX4 Wag 8.265625
## Datsun 710 5.382400
## Hornet 4 Drive 10.336225
## Hornet Sportabout 11.833600
## Valiant 11.971600
If we want to regress mpg
on a constant and wt
, we write this.
regressionFit = lm(mpg ~ wt, data = mtcars)
We saved the return value of lm
to regressionFit
. It is a complicated object:
str(regressionFit)
## List of 12
## $ coefficients : Named num [1:2] 37.29 -5.34
## ..- attr(*, "names")= chr [1:2] "(Intercept)" "wt"
## $ residuals : Named num [1:32] -2.28 -0.92 -2.09 1.3 -0.2 ...
## ..- attr(*, "names")= chr [1:32] "Mazda RX4" "Mazda RX4 Wag" "Datsun 710" "Hornet 4 Drive" ...
## $ effects : Named num [1:32] -113.65 -29.116 -1.661 1.631 0.111 ...
## ..- attr(*, "names")= chr [1:32] "(Intercept)" "wt" "" "" ...
## $ rank : int 2
## $ fitted.values: Named num [1:32] 23.3 21.9 24.9 20.1 18.9 ...
## ..- attr(*, "names")= chr [1:32] "Mazda RX4" "Mazda RX4 Wag" "Datsun 710" "Hornet 4 Drive" ...
## $ assign : int [1:2] 0 1
## $ qr :List of 5
## ..$ qr : num [1:32, 1:2] -5.657 0.177 0.177 0.177 0.177 ...
## .. ..- attr(*, "dimnames")=List of 2
## .. .. ..$ : chr [1:32] "Mazda RX4" "Mazda RX4 Wag" "Datsun 710" "Hornet 4 Drive" ...
## .. .. ..$ : chr [1:2] "(Intercept)" "wt"
## .. ..- attr(*, "assign")= int [1:2] 0 1
## ..$ qraux: num [1:2] 1.18 1.05
## ..$ pivot: int [1:2] 1 2
## ..$ tol : num 1e-07
## ..$ rank : int 2
## ..- attr(*, "class")= chr "qr"
## $ df.residual : int 30
## $ xlevels : Named list()
## $ call : language lm(formula = mpg ~ wt, data = mtcars)
## $ terms :Classes 'terms', 'formula' language mpg ~ wt
## .. ..- attr(*, "variables")= language list(mpg, wt)
## .. ..- attr(*, "factors")= int [1:2, 1] 0 1
## .. .. ..- attr(*, "dimnames")=List of 2
## .. .. .. ..$ : chr [1:2] "mpg" "wt"
## .. .. .. ..$ : chr "wt"
## .. ..- attr(*, "term.labels")= chr "wt"
## .. ..- attr(*, "order")= int 1
## .. ..- attr(*, "intercept")= int 1
## .. ..- attr(*, "response")= int 1
## .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>
## .. ..- attr(*, "predvars")= language list(mpg, wt)
## .. ..- attr(*, "dataClasses")= Named chr [1:2] "numeric" "numeric"
## .. .. ..- attr(*, "names")= chr [1:2] "mpg" "wt"
## $ model :'data.frame': 32 obs. of 2 variables:
## ..$ mpg: num [1:32] 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## ..$ wt : num [1:32] 2.62 2.88 2.32 3.21 3.44 ...
## ..- attr(*, "terms")=Classes 'terms', 'formula' language mpg ~ wt
## .. .. ..- attr(*, "variables")= language list(mpg, wt)
## .. .. ..- attr(*, "factors")= int [1:2, 1] 0 1
## .. .. .. ..- attr(*, "dimnames")=List of 2
## .. .. .. .. ..$ : chr [1:2] "mpg" "wt"
## .. .. .. .. ..$ : chr "wt"
## .. .. ..- attr(*, "term.labels")= chr "wt"
## .. .. ..- attr(*, "order")= int 1
## .. .. ..- attr(*, "intercept")= int 1
## .. .. ..- attr(*, "response")= int 1
## .. .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>
## .. .. ..- attr(*, "predvars")= language list(mpg, wt)
## .. .. ..- attr(*, "dataClasses")= Named chr [1:2] "numeric" "numeric"
## .. .. .. ..- attr(*, "names")= chr [1:2] "mpg" "wt"
## - attr(*, "class")= chr "lm"
But we see familiar names such as coefficients
, residuals
, and fitted.values
. We can access these in the way that we access subvariables in a list.
regressionFit$coefficients
## (Intercept) wt
## 37.285126 -5.344472
regressionFit$residuals
## Mazda RX4 Mazda RX4 Wag Datsun 710
## -2.2826106 -0.9197704 -2.0859521
## Hornet 4 Drive Hornet Sportabout Valiant
## 1.2973499 -0.2001440 -0.6932545
## Duster 360 Merc 240D Merc 230
## -3.9053627 4.1637381 2.3499593
## Merc 280 Merc 280C Merc 450SE
## 0.2998560 -1.1001440 0.8668731
## Merc 450SL Merc 450SLC Cadillac Fleetwood
## -0.0502472 -1.8830236 1.1733496
## Lincoln Continental Chrysler Imperial Fiat 128
## 2.1032876 5.9810744 6.8727113
## Honda Civic Toyota Corolla Toyota Corona
## 1.7461954 6.4219792 -2.6110037
## Dodge Challenger AMC Javelin Camaro Z28
## -2.9725862 -3.7268663 -3.4623553
## Pontiac Firebird Fiat X1-9 Porsche 914-2
## 2.4643670 0.3564263 0.1520430
## Lotus Europa Ford Pantera L Ferrari Dino
## 1.2010593 -4.5431513 -2.7809399
## Maserati Bora Volvo 142E
## -3.2053627 -1.0274952
regressionFit$fitted.values
## Mazda RX4 Mazda RX4 Wag Datsun 710
## 23.282611 21.919770 24.885952
## Hornet 4 Drive Hornet Sportabout Valiant
## 20.102650 18.900144 18.793255
## Duster 360 Merc 240D Merc 230
## 18.205363 20.236262 20.450041
## Merc 280 Merc 280C Merc 450SE
## 18.900144 18.900144 15.533127
## Merc 450SL Merc 450SLC Cadillac Fleetwood
## 17.350247 17.083024 9.226650
## Lincoln Continental Chrysler Imperial Fiat 128
## 8.296712 8.718926 25.527289
## Honda Civic Toyota Corolla Toyota Corona
## 28.653805 27.478021 24.111004
## Dodge Challenger AMC Javelin Camaro Z28
## 18.472586 18.926866 16.762355
## Pontiac Firebird Fiat X1-9 Porsche 914-2
## 16.735633 26.943574 25.847957
## Lotus Europa Ford Pantera L Ferrari Dino
## 29.198941 20.343151 22.480940
## Maserati Bora Volvo 142E
## 18.205363 22.427495
To see the usual results that we get from other languages, type these:
regressionFit
##
## Call:
## lm(formula = mpg ~ wt, data = mtcars)
##
## Coefficients:
## (Intercept) wt
## 37.285 -5.344
print(regressionFit)
##
## Call:
## lm(formula = mpg ~ wt, data = mtcars)
##
## Coefficients:
## (Intercept) wt
## 37.285 -5.344
summary(regressionFit)
##
## Call:
## lm(formula = mpg ~ wt, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.5432 -2.3647 -0.1252 1.4096 6.8727
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 37.2851 1.8776 19.858 < 2e-16 ***
## wt -5.3445 0.5591 -9.559 1.29e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.046 on 30 degrees of freedom
## Multiple R-squared: 0.7528, Adjusted R-squared: 0.7446
## F-statistic: 91.38 on 1 and 30 DF, p-value: 1.294e-10
To run a regression without a constant, do the following.
regFitWithoutConst = lm(mpg ~ -1 + wt, data=mtcars)
summary(regFitWithoutConst)
##
## Call:
## lm(formula = mpg ~ -1 + wt, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.3018 -3.3177 0.7468 7.7538 24.1899
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## wt 5.2916 0.5932 8.921 4.55e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.27 on 31 degrees of freedom
## Multiple R-squared: 0.7197, Adjusted R-squared: 0.7106
## F-statistic: 79.58 on 1 and 31 DF, p-value: 4.553e-10
We can also add other regressors.
regressionFit = lm(mpg ~ wt + cyl + disp, data=mtcars)
summary(regressionFit)
##
## Call:
## lm(formula = mpg ~ wt + cyl + disp, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.4035 -1.4028 -0.4955 1.3387 6.0722
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 41.107678 2.842426 14.462 1.62e-14 ***
## wt -3.635677 1.040138 -3.495 0.00160 **
## cyl -1.784944 0.607110 -2.940 0.00651 **
## disp 0.007473 0.011845 0.631 0.53322
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.595 on 28 degrees of freedom
## Multiple R-squared: 0.8326, Adjusted R-squared: 0.8147
## F-statistic: 46.42 on 3 and 28 DF, p-value: 5.399e-11
As we have seen in the ggplot2
tutorial, the variable cyl
has only three values: 4
, 6,
8
. We may want to treat cyl
as a categorical variable and not a continuous variable. To do this so that we regress mpg
on indicators of cyl
, we use the factor
function.
regressionFit = lm(mpg ~ wt + factor(cyl) + disp, data=mtcars)
summary(regressionFit)
##
## Call:
## lm(formula = mpg ~ wt + factor(cyl) + disp, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.5965 -1.2361 -0.4855 1.4740 5.8043
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 34.041673 1.963039 17.341 3.66e-16 ***
## wt -3.306751 1.105083 -2.992 0.00586 **
## factor(cyl)6 -4.305559 1.464760 -2.939 0.00666 **
## factor(cyl)8 -6.322786 2.598416 -2.433 0.02186 *
## disp 0.001715 0.013481 0.127 0.89972
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.603 on 27 degrees of freedom
## Multiple R-squared: 0.8375, Adjusted R-squared: 0.8135
## F-statistic: 34.8 on 4 and 27 DF, p-value: 2.726e-10
If you want to use wt^2
as a regressor, there are two ways to do it. One way is to create another column in the data.frame
.
mtcars$wt2 = mtcars$wt^2 # the dataframe creates the column wt2 and assign the values.
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
## wt2
## Mazda RX4 6.864400
## Mazda RX4 Wag 8.265625
## Datsun 710 5.382400
## Hornet 4 Drive 10.336225
## Hornet Sportabout 11.833600
## Valiant 11.971600
summary(lm(mpg ~ wt + wt2, data=mtcars))
##
## Call:
## lm(formula = mpg ~ wt + wt2, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.483 -1.998 -0.773 1.462 6.238
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 49.9308 4.2113 11.856 1.21e-12 ***
## wt -13.3803 2.5140 -5.322 1.04e-05 ***
## wt2 1.1711 0.3594 3.258 0.00286 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.651 on 29 degrees of freedom
## Multiple R-squared: 0.8191, Adjusted R-squared: 0.8066
## F-statistic: 65.64 on 2 and 29 DF, p-value: 1.715e-11
Another way that does not involve creating a column is the following.
summary(lm(mpg ~ wt + I(wt^2), data=mtcars))
##
## Call:
## lm(formula = mpg ~ wt + I(wt^2), data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.483 -1.998 -0.773 1.462 6.238
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 49.9308 4.2113 11.856 1.21e-12 ***
## wt -13.3803 2.5140 -5.322 1.04e-05 ***
## I(wt^2) 1.1711 0.3594 3.258 0.00286 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.651 on 29 degrees of freedom
## Multiple R-squared: 0.8191, Adjusted R-squared: 0.8066
## F-statistic: 65.64 on 2 and 29 DF, p-value: 1.715e-11
The sums work similarly: e.g. I(cyl+disp)
.
What is the function I()
? The answer is related to what is the nature of the first argument of lm
.
We have been omitting the label for the first argument. The first argument of the lm
function is formula
:
lm(formula = mpg ~ wt + disp, data=mtcars)
##
## Call:
## lm(formula = mpg ~ wt + disp, data = mtcars)
##
## Coefficients:
## (Intercept) wt disp
## 34.96055 -3.35083 -0.01772
formula
is a special object that interprets “expression”. Note that we don’t need to specify mpg ~ wt + disp
as string, in which case we need to write "mpg ~ wt + disp"
. In this “expression”, the operators like ~
and +
work differently from the usual way. For example, +
in the formula is not an arithmetic operator, but an operator that says we have multiple regressors.
The function I
orders R to read operators like +
and ^
inside I
as an arithmetic operator. So the operator ^
in I(wt^2)
is interpreted as a power operator. Similarly, I(cyl+disp)
interprets +
as an arithmetic operator and not the operator that says we have two regressors cyl
and disp
.
The lm
function computes standard errors under the constant-variance assumption. We can account for heteroskedasticity, for which we use the sandwich
and the lmtest
package.
Let’s first load the package.
library(sandwich)
library(lmtest)
We use the following regression as an example.
lfit = lm(formula = mpg ~ wt + disp, data=mtcars)
summary(lfit)
##
## Call:
## lm(formula = mpg ~ wt + disp, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4087 -2.3243 -0.7683 1.7721 6.3484
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 34.96055 2.16454 16.151 4.91e-16 ***
## wt -3.35082 1.16413 -2.878 0.00743 **
## disp -0.01773 0.00919 -1.929 0.06362 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.917 on 29 degrees of freedom
## Multiple R-squared: 0.7809, Adjusted R-squared: 0.7658
## F-statistic: 51.69 on 2 and 29 DF, p-value: 2.744e-10
As mentioned earlier, the standard errors in the above regression are based on the constant-variance assumption. Now we compute the heteroskedasticity-robust standard error. We use the vcovHC
function in the sandwich
package:
vcHC = vcovHC(lfit, type = "HC0")
Note that we use the result of lm
function as the first argument of vcovHC
.
The above command computes the variance-covariance matrix by the estimator
\[ vcHC = (X'X)^{-1}X'\Omega X(X'X)^{-1} \]
where \(X\) is the matrix of regressors and \[ \Omega = \text{diag}(u_1^2, \ldots, u_N^2) \] where \(u_i\) is the residual for individual \(i\).
For the value of the argument type
, we can instead write HC1
, HC2
, HC3
. These are all finite-sample corrections to the above estimator.
Now, to generate the analog of summary(lfit)
, we use the coeftest
function in the lmtest
package:
coeftest(lfit, vcov. = vcHC)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 34.9605540 2.2538319 15.5116 1.41e-15 ***
## wt -3.3508253 1.0491411 -3.1939 0.003371 **
## disp -0.0177247 0.0080406 -2.2044 0.035585 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Compare the above result with the result of the lm
function. We can see that the standard errors are corrected in the above.
summary(lfit)
##
## Call:
## lm(formula = mpg ~ wt + disp, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4087 -2.3243 -0.7683 1.7721 6.3484
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 34.96055 2.16454 16.151 4.91e-16 ***
## wt -3.35082 1.16413 -2.878 0.00743 **
## disp -0.01773 0.00919 -1.929 0.06362 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.917 on 29 degrees of freedom
## Multiple R-squared: 0.7809, Adjusted R-squared: 0.7658
## F-statistic: 51.69 on 2 and 29 DF, p-value: 2.744e-10
We can also compute clustered standard errors using sandwich
package. However, to do so, we need to tell vcovHC
that which variables represent the group and the time indices. We need the plm
package for this.
library(plm)
The plm
function in the plm
package allows to do several kinds of panel data regressions. However, at this time, we use plm
to do exactly what lm
does (i.e. OLS), except that we store information on what are the group and the time indices.
To discuss how to use plm
, we use the following dataset as an example.
data("Grunfeld", package = "plm")
head(Grunfeld)
## firm year inv value capital
## 1 1 1935 317.6 3078.5 2.8
## 2 1 1936 391.8 4661.7 52.6
## 3 1 1937 410.6 5387.1 156.9
## 4 1 1938 257.7 2792.2 209.2
## 5 1 1939 330.8 4313.2 203.4
## 6 1 1940 461.2 4643.9 207.2
Consider the following OLS.
pfitl = lm(inv ~ value + capital, data=Grunfeld)
summary(pfitl)
##
## Call:
## lm(formula = inv ~ value + capital, data = Grunfeld)
##
## Residuals:
## Min 1Q Median 3Q Max
## -291.68 -30.01 5.30 34.83 369.45
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -42.714369 9.511676 -4.491 1.21e-05 ***
## value 0.115562 0.005836 19.803 < 2e-16 ***
## capital 0.230678 0.025476 9.055 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 94.41 on 197 degrees of freedom
## Multiple R-squared: 0.8124, Adjusted R-squared: 0.8105
## F-statistic: 426.6 on 2 and 197 DF, p-value: < 2.2e-16
The following code produces exactly the same result:
pfit = plm(inv ~ value + capital, model="pooling", index="firm", data=Grunfeld)
summary(pfit)
## Pooling Model
##
## Call:
## plm(formula = inv ~ value + capital, data = Grunfeld, model = "pooling",
## index = "firm")
##
## Balanced Panel: n=10, T=20, N=200
##
## Residuals :
## Min. 1st Qu. Median 3rd Qu. Max.
## -292.0 -30.0 5.3 34.8 369.0
##
## Coefficients :
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) -42.7143694 9.5116760 -4.4907 1.207e-05 ***
## value 0.1155622 0.0058357 19.8026 < 2.2e-16 ***
## capital 0.2306785 0.0254758 9.0548 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 9359900
## Residual Sum of Squares: 1755900
## R-Squared: 0.81241
## Adj. R-Squared: 0.8105
## F-statistic: 426.576 on 2 and 197 DF, p-value: < 2.22e-16
The option model="pooling"
makes plm
to run OLS. The option index=firm
tells plm
that the observations are grouped according to the firm
variable.
Now we create clustered standard errors by the following code.
vcHCcluster = vcovHC(pfit, type = "HC0", cluster = "group")
Note that the value for the argument cluster
, which is "group"
, is not the name of the variable in the dataset Grunfeld
. The option cluster = "group"
tells that we use the group index saved in pfit
as the cluster.
The above command computes the estimator
\[ vcHCcluster = \left(\sum_{k=1}^K X_k'X_k\right)^{-1} \sum_{k=1}^K X_k'U_kU_k'X_k \left(\sum_{k=1}^K X_k'X_k\right)^{-1} \]
where \(X_k\) is the matrix of regressors for the \(k\)th cluster and \(U_k\) is the residual vector for the \(k\)th cluster.
To correct for standard errors, we again use the coeftest
function.
coeftest(pfit, vcov. = vcHCcluster)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -42.714369 19.279431 -2.2155 0.027868 *
## value 0.115562 0.015003 7.7027 6.35e-13 ***
## capital 0.230678 0.080201 2.8763 0.004467 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
For probit and logit, we use glm
. Using it is very much similar to using lm
.
# recall:
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
## wt2
## Mazda RX4 6.864400
## Mazda RX4 Wag 8.265625
## Datsun 710 5.382400
## Hornet 4 Drive 10.336225
## Hornet Sportabout 11.833600
## Valiant 11.971600
# let's run probit with some random formula.
probitFit = glm(am ~ mpg + disp, family = binomial(link="probit"), data = mtcars)
probitFit
##
## Call: glm(formula = am ~ mpg + disp, family = binomial(link = "probit"),
## data = mtcars)
##
## Coefficients:
## (Intercept) mpg disp
## -1.439509 0.104365 -0.004225
##
## Degrees of Freedom: 31 Total (i.e. Null); 29 Residual
## Null Deviance: 43.23
## Residual Deviance: 28.6 AIC: 34.6
summary(probitFit)
##
## Call:
## glm(formula = am ~ mpg + disp, family = binomial(link = "probit"),
## data = mtcars)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.5240 -0.6536 -0.3211 0.5906 2.1394
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.439509 2.659683 -0.541 0.588
## mpg 0.104365 0.092554 1.128 0.259
## disp -0.004225 0.004314 -0.979 0.327
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 43.230 on 31 degrees of freedom
## Residual deviance: 28.601 on 29 degrees of freedom
## AIC: 34.601
##
## Number of Fisher Scoring iterations: 6
# let's run logit.
logitFit = glm(am ~ mpg + disp, family = binomial(link="logit"), data = mtcars)
logitFit
##
## Call: glm(formula = am ~ mpg + disp, family = binomial(link = "logit"),
## data = mtcars)
##
## Coefficients:
## (Intercept) mpg disp
## -2.256714 0.169978 -0.007615
##
## Degrees of Freedom: 31 Total (i.e. Null); 29 Residual
## Null Deviance: 43.23
## Residual Deviance: 28.61 AIC: 34.61
summary(logitFit)
##
## Call:
## glm(formula = am ~ mpg + disp, family = binomial(link = "logit"),
## data = mtcars)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.5186 -0.6261 -0.3316 0.5960 2.1655
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.256714 4.760079 -0.474 0.635
## mpg 0.169978 0.168373 1.010 0.313
## disp -0.007615 0.007811 -0.975 0.330
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 43.230 on 31 degrees of freedom
## Residual deviance: 28.606 on 29 degrees of freedom
## AIC: 34.606
##
## Number of Fisher Scoring iterations: 5