The function that does the regression is lm. To see how it works, let’s use this dataset as an example.
library(datasets)
head(mtcars) # this prints the first 6 observations of the dataset
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
## wt2
## Mazda RX4 6.864400
## Mazda RX4 Wag 8.265625
## Datsun 710 5.382400
## Hornet 4 Drive 10.336225
## Hornet Sportabout 11.833600
## Valiant 11.971600
If we want to regress mpg on a constant and wt, we write this.
regressionFit = lm(mpg ~ wt, data = mtcars)
We saved the return value of lm to regressionFit. It is a complicated object:
str(regressionFit)
## List of 12
## $ coefficients : Named num [1:2] 37.29 -5.34
## ..- attr(*, "names")= chr [1:2] "(Intercept)" "wt"
## $ residuals : Named num [1:32] -2.28 -0.92 -2.09 1.3 -0.2 ...
## ..- attr(*, "names")= chr [1:32] "Mazda RX4" "Mazda RX4 Wag" "Datsun 710" "Hornet 4 Drive" ...
## $ effects : Named num [1:32] -113.65 -29.116 -1.661 1.631 0.111 ...
## ..- attr(*, "names")= chr [1:32] "(Intercept)" "wt" "" "" ...
## $ rank : int 2
## $ fitted.values: Named num [1:32] 23.3 21.9 24.9 20.1 18.9 ...
## ..- attr(*, "names")= chr [1:32] "Mazda RX4" "Mazda RX4 Wag" "Datsun 710" "Hornet 4 Drive" ...
## $ assign : int [1:2] 0 1
## $ qr :List of 5
## ..$ qr : num [1:32, 1:2] -5.657 0.177 0.177 0.177 0.177 ...
## .. ..- attr(*, "dimnames")=List of 2
## .. .. ..$ : chr [1:32] "Mazda RX4" "Mazda RX4 Wag" "Datsun 710" "Hornet 4 Drive" ...
## .. .. ..$ : chr [1:2] "(Intercept)" "wt"
## .. ..- attr(*, "assign")= int [1:2] 0 1
## ..$ qraux: num [1:2] 1.18 1.05
## ..$ pivot: int [1:2] 1 2
## ..$ tol : num 1e-07
## ..$ rank : int 2
## ..- attr(*, "class")= chr "qr"
## $ df.residual : int 30
## $ xlevels : Named list()
## $ call : language lm(formula = mpg ~ wt, data = mtcars)
## $ terms :Classes 'terms', 'formula' language mpg ~ wt
## .. ..- attr(*, "variables")= language list(mpg, wt)
## .. ..- attr(*, "factors")= int [1:2, 1] 0 1
## .. .. ..- attr(*, "dimnames")=List of 2
## .. .. .. ..$ : chr [1:2] "mpg" "wt"
## .. .. .. ..$ : chr "wt"
## .. ..- attr(*, "term.labels")= chr "wt"
## .. ..- attr(*, "order")= int 1
## .. ..- attr(*, "intercept")= int 1
## .. ..- attr(*, "response")= int 1
## .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>
## .. ..- attr(*, "predvars")= language list(mpg, wt)
## .. ..- attr(*, "dataClasses")= Named chr [1:2] "numeric" "numeric"
## .. .. ..- attr(*, "names")= chr [1:2] "mpg" "wt"
## $ model :'data.frame': 32 obs. of 2 variables:
## ..$ mpg: num [1:32] 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## ..$ wt : num [1:32] 2.62 2.88 2.32 3.21 3.44 ...
## ..- attr(*, "terms")=Classes 'terms', 'formula' language mpg ~ wt
## .. .. ..- attr(*, "variables")= language list(mpg, wt)
## .. .. ..- attr(*, "factors")= int [1:2, 1] 0 1
## .. .. .. ..- attr(*, "dimnames")=List of 2
## .. .. .. .. ..$ : chr [1:2] "mpg" "wt"
## .. .. .. .. ..$ : chr "wt"
## .. .. ..- attr(*, "term.labels")= chr "wt"
## .. .. ..- attr(*, "order")= int 1
## .. .. ..- attr(*, "intercept")= int 1
## .. .. ..- attr(*, "response")= int 1
## .. .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>
## .. .. ..- attr(*, "predvars")= language list(mpg, wt)
## .. .. ..- attr(*, "dataClasses")= Named chr [1:2] "numeric" "numeric"
## .. .. .. ..- attr(*, "names")= chr [1:2] "mpg" "wt"
## - attr(*, "class")= chr "lm"
But we see familiar names such as coefficients, residuals, and fitted.values. We can access these in the way that we access subvariables in a list.
regressionFit$coefficients
## (Intercept) wt
## 37.285126 -5.344472
regressionFit$residuals
## Mazda RX4 Mazda RX4 Wag Datsun 710
## -2.2826106 -0.9197704 -2.0859521
## Hornet 4 Drive Hornet Sportabout Valiant
## 1.2973499 -0.2001440 -0.6932545
## Duster 360 Merc 240D Merc 230
## -3.9053627 4.1637381 2.3499593
## Merc 280 Merc 280C Merc 450SE
## 0.2998560 -1.1001440 0.8668731
## Merc 450SL Merc 450SLC Cadillac Fleetwood
## -0.0502472 -1.8830236 1.1733496
## Lincoln Continental Chrysler Imperial Fiat 128
## 2.1032876 5.9810744 6.8727113
## Honda Civic Toyota Corolla Toyota Corona
## 1.7461954 6.4219792 -2.6110037
## Dodge Challenger AMC Javelin Camaro Z28
## -2.9725862 -3.7268663 -3.4623553
## Pontiac Firebird Fiat X1-9 Porsche 914-2
## 2.4643670 0.3564263 0.1520430
## Lotus Europa Ford Pantera L Ferrari Dino
## 1.2010593 -4.5431513 -2.7809399
## Maserati Bora Volvo 142E
## -3.2053627 -1.0274952
regressionFit$fitted.values
## Mazda RX4 Mazda RX4 Wag Datsun 710
## 23.282611 21.919770 24.885952
## Hornet 4 Drive Hornet Sportabout Valiant
## 20.102650 18.900144 18.793255
## Duster 360 Merc 240D Merc 230
## 18.205363 20.236262 20.450041
## Merc 280 Merc 280C Merc 450SE
## 18.900144 18.900144 15.533127
## Merc 450SL Merc 450SLC Cadillac Fleetwood
## 17.350247 17.083024 9.226650
## Lincoln Continental Chrysler Imperial Fiat 128
## 8.296712 8.718926 25.527289
## Honda Civic Toyota Corolla Toyota Corona
## 28.653805 27.478021 24.111004
## Dodge Challenger AMC Javelin Camaro Z28
## 18.472586 18.926866 16.762355
## Pontiac Firebird Fiat X1-9 Porsche 914-2
## 16.735633 26.943574 25.847957
## Lotus Europa Ford Pantera L Ferrari Dino
## 29.198941 20.343151 22.480940
## Maserati Bora Volvo 142E
## 18.205363 22.427495
To see the usual results that we get from other languages, type these:
regressionFit
##
## Call:
## lm(formula = mpg ~ wt, data = mtcars)
##
## Coefficients:
## (Intercept) wt
## 37.285 -5.344
print(regressionFit)
##
## Call:
## lm(formula = mpg ~ wt, data = mtcars)
##
## Coefficients:
## (Intercept) wt
## 37.285 -5.344
summary(regressionFit)
##
## Call:
## lm(formula = mpg ~ wt, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.5432 -2.3647 -0.1252 1.4096 6.8727
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 37.2851 1.8776 19.858 < 2e-16 ***
## wt -5.3445 0.5591 -9.559 1.29e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.046 on 30 degrees of freedom
## Multiple R-squared: 0.7528, Adjusted R-squared: 0.7446
## F-statistic: 91.38 on 1 and 30 DF, p-value: 1.294e-10
To run a regression without a constant, do the following.
regFitWithoutConst = lm(mpg ~ -1 + wt, data=mtcars)
summary(regFitWithoutConst)
##
## Call:
## lm(formula = mpg ~ -1 + wt, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.3018 -3.3177 0.7468 7.7538 24.1899
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## wt 5.2916 0.5932 8.921 4.55e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.27 on 31 degrees of freedom
## Multiple R-squared: 0.7197, Adjusted R-squared: 0.7106
## F-statistic: 79.58 on 1 and 31 DF, p-value: 4.553e-10
We can also add other regressors.
regressionFit = lm(mpg ~ wt + cyl + disp, data=mtcars)
summary(regressionFit)
##
## Call:
## lm(formula = mpg ~ wt + cyl + disp, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.4035 -1.4028 -0.4955 1.3387 6.0722
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 41.107678 2.842426 14.462 1.62e-14 ***
## wt -3.635677 1.040138 -3.495 0.00160 **
## cyl -1.784944 0.607110 -2.940 0.00651 **
## disp 0.007473 0.011845 0.631 0.53322
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.595 on 28 degrees of freedom
## Multiple R-squared: 0.8326, Adjusted R-squared: 0.8147
## F-statistic: 46.42 on 3 and 28 DF, p-value: 5.399e-11
As we have seen in the ggplot2 tutorial, the variable cyl has only three values: 4, 6, 8. We may want to treat cyl as a categorical variable and not a continuous variable. To do this so that we regress mpg on indicators of cyl, we use the factor function.
regressionFit = lm(mpg ~ wt + factor(cyl) + disp, data=mtcars)
summary(regressionFit)
##
## Call:
## lm(formula = mpg ~ wt + factor(cyl) + disp, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.5965 -1.2361 -0.4855 1.4740 5.8043
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 34.041673 1.963039 17.341 3.66e-16 ***
## wt -3.306751 1.105083 -2.992 0.00586 **
## factor(cyl)6 -4.305559 1.464760 -2.939 0.00666 **
## factor(cyl)8 -6.322786 2.598416 -2.433 0.02186 *
## disp 0.001715 0.013481 0.127 0.89972
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.603 on 27 degrees of freedom
## Multiple R-squared: 0.8375, Adjusted R-squared: 0.8135
## F-statistic: 34.8 on 4 and 27 DF, p-value: 2.726e-10
If you want to use wt^2 as a regressor, there are two ways to do it. One way is to create another column in the data.frame.
mtcars$wt2 = mtcars$wt^2 # the dataframe creates the column wt2 and assign the values.
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
## wt2
## Mazda RX4 6.864400
## Mazda RX4 Wag 8.265625
## Datsun 710 5.382400
## Hornet 4 Drive 10.336225
## Hornet Sportabout 11.833600
## Valiant 11.971600
summary(lm(mpg ~ wt + wt2, data=mtcars))
##
## Call:
## lm(formula = mpg ~ wt + wt2, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.483 -1.998 -0.773 1.462 6.238
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 49.9308 4.2113 11.856 1.21e-12 ***
## wt -13.3803 2.5140 -5.322 1.04e-05 ***
## wt2 1.1711 0.3594 3.258 0.00286 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.651 on 29 degrees of freedom
## Multiple R-squared: 0.8191, Adjusted R-squared: 0.8066
## F-statistic: 65.64 on 2 and 29 DF, p-value: 1.715e-11
Another way that does not involve creating a column is the following.
summary(lm(mpg ~ wt + I(wt^2), data=mtcars))
##
## Call:
## lm(formula = mpg ~ wt + I(wt^2), data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.483 -1.998 -0.773 1.462 6.238
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 49.9308 4.2113 11.856 1.21e-12 ***
## wt -13.3803 2.5140 -5.322 1.04e-05 ***
## I(wt^2) 1.1711 0.3594 3.258 0.00286 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.651 on 29 degrees of freedom
## Multiple R-squared: 0.8191, Adjusted R-squared: 0.8066
## F-statistic: 65.64 on 2 and 29 DF, p-value: 1.715e-11
The sums work similarly: e.g. I(cyl+disp).
What is the function I()? The answer is related to what is the nature of the first argument of lm.
We have been omitting the label for the first argument. The first argument of the lm function is formula:
lm(formula = mpg ~ wt + disp, data=mtcars)
##
## Call:
## lm(formula = mpg ~ wt + disp, data = mtcars)
##
## Coefficients:
## (Intercept) wt disp
## 34.96055 -3.35083 -0.01772
formula is a special object that interprets “expression”. Note that we don’t need to specify mpg ~ wt + disp as string, in which case we need to write "mpg ~ wt + disp". In this “expression”, the operators like ~ and + work differently from the usual way. For example, + in the formula is not an arithmetic operator, but an operator that says we have multiple regressors.
The function I orders R to read operators like + and ^ inside I as an arithmetic operator. So the operator ^ in I(wt^2) is interpreted as a power operator. Similarly, I(cyl+disp) interprets + as an arithmetic operator and not the operator that says we have two regressors cyl and disp.
The lm function computes standard errors under the constant-variance assumption. We can account for heteroskedasticity, for which we use the sandwich and the lmtest package.
Let’s first load the package.
library(sandwich)
library(lmtest)
We use the following regression as an example.
lfit = lm(formula = mpg ~ wt + disp, data=mtcars)
summary(lfit)
##
## Call:
## lm(formula = mpg ~ wt + disp, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4087 -2.3243 -0.7683 1.7721 6.3484
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 34.96055 2.16454 16.151 4.91e-16 ***
## wt -3.35082 1.16413 -2.878 0.00743 **
## disp -0.01773 0.00919 -1.929 0.06362 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.917 on 29 degrees of freedom
## Multiple R-squared: 0.7809, Adjusted R-squared: 0.7658
## F-statistic: 51.69 on 2 and 29 DF, p-value: 2.744e-10
As mentioned earlier, the standard errors in the above regression are based on the constant-variance assumption. Now we compute the heteroskedasticity-robust standard error. We use the vcovHC function in the sandwich package:
vcHC = vcovHC(lfit, type = "HC0")
Note that we use the result of lm function as the first argument of vcovHC.
The above command computes the variance-covariance matrix by the estimator
\[ vcHC = (X'X)^{-1}X'\Omega X(X'X)^{-1} \]
where \(X\) is the matrix of regressors and \[ \Omega = \text{diag}(u_1^2, \ldots, u_N^2) \] where \(u_i\) is the residual for individual \(i\).
For the value of the argument type, we can instead write HC1, HC2, HC3. These are all finite-sample corrections to the above estimator.
Now, to generate the analog of summary(lfit), we use the coeftest function in the lmtest package:
coeftest(lfit, vcov. = vcHC)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 34.9605540 2.2538319 15.5116 1.41e-15 ***
## wt -3.3508253 1.0491411 -3.1939 0.003371 **
## disp -0.0177247 0.0080406 -2.2044 0.035585 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Compare the above result with the result of the lm function. We can see that the standard errors are corrected in the above.
summary(lfit)
##
## Call:
## lm(formula = mpg ~ wt + disp, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4087 -2.3243 -0.7683 1.7721 6.3484
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 34.96055 2.16454 16.151 4.91e-16 ***
## wt -3.35082 1.16413 -2.878 0.00743 **
## disp -0.01773 0.00919 -1.929 0.06362 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.917 on 29 degrees of freedom
## Multiple R-squared: 0.7809, Adjusted R-squared: 0.7658
## F-statistic: 51.69 on 2 and 29 DF, p-value: 2.744e-10
We can also compute clustered standard errors using sandwich package. However, to do so, we need to tell vcovHC that which variables represent the group and the time indices. We need the plm package for this.
library(plm)
The plm function in the plm package allows to do several kinds of panel data regressions. However, at this time, we use plm to do exactly what lm does (i.e. OLS), except that we store information on what are the group and the time indices.
To discuss how to use plm, we use the following dataset as an example.
data("Grunfeld", package = "plm")
head(Grunfeld)
## firm year inv value capital
## 1 1 1935 317.6 3078.5 2.8
## 2 1 1936 391.8 4661.7 52.6
## 3 1 1937 410.6 5387.1 156.9
## 4 1 1938 257.7 2792.2 209.2
## 5 1 1939 330.8 4313.2 203.4
## 6 1 1940 461.2 4643.9 207.2
Consider the following OLS.
pfitl = lm(inv ~ value + capital, data=Grunfeld)
summary(pfitl)
##
## Call:
## lm(formula = inv ~ value + capital, data = Grunfeld)
##
## Residuals:
## Min 1Q Median 3Q Max
## -291.68 -30.01 5.30 34.83 369.45
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -42.714369 9.511676 -4.491 1.21e-05 ***
## value 0.115562 0.005836 19.803 < 2e-16 ***
## capital 0.230678 0.025476 9.055 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 94.41 on 197 degrees of freedom
## Multiple R-squared: 0.8124, Adjusted R-squared: 0.8105
## F-statistic: 426.6 on 2 and 197 DF, p-value: < 2.2e-16
The following code produces exactly the same result:
pfit = plm(inv ~ value + capital, model="pooling", index="firm", data=Grunfeld)
summary(pfit)
## Pooling Model
##
## Call:
## plm(formula = inv ~ value + capital, data = Grunfeld, model = "pooling",
## index = "firm")
##
## Balanced Panel: n=10, T=20, N=200
##
## Residuals :
## Min. 1st Qu. Median 3rd Qu. Max.
## -292.0 -30.0 5.3 34.8 369.0
##
## Coefficients :
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) -42.7143694 9.5116760 -4.4907 1.207e-05 ***
## value 0.1155622 0.0058357 19.8026 < 2.2e-16 ***
## capital 0.2306785 0.0254758 9.0548 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 9359900
## Residual Sum of Squares: 1755900
## R-Squared: 0.81241
## Adj. R-Squared: 0.8105
## F-statistic: 426.576 on 2 and 197 DF, p-value: < 2.22e-16
The option model="pooling" makes plm to run OLS. The option index=firm tells plm that the observations are grouped according to the firm variable.
Now we create clustered standard errors by the following code.
vcHCcluster = vcovHC(pfit, type = "HC0", cluster = "group")
Note that the value for the argument cluster, which is "group", is not the name of the variable in the dataset Grunfeld. The option cluster = "group" tells that we use the group index saved in pfit as the cluster.
The above command computes the estimator
\[ vcHCcluster = \left(\sum_{k=1}^K X_k'X_k\right)^{-1} \sum_{k=1}^K X_k'U_kU_k'X_k \left(\sum_{k=1}^K X_k'X_k\right)^{-1} \]
where \(X_k\) is the matrix of regressors for the \(k\)th cluster and \(U_k\) is the residual vector for the \(k\)th cluster.
To correct for standard errors, we again use the coeftest function.
coeftest(pfit, vcov. = vcHCcluster)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -42.714369 19.279431 -2.2155 0.027868 *
## value 0.115562 0.015003 7.7027 6.35e-13 ***
## capital 0.230678 0.080201 2.8763 0.004467 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
For probit and logit, we use glm. Using it is very much similar to using lm.
# recall:
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
## wt2
## Mazda RX4 6.864400
## Mazda RX4 Wag 8.265625
## Datsun 710 5.382400
## Hornet 4 Drive 10.336225
## Hornet Sportabout 11.833600
## Valiant 11.971600
# let's run probit with some random formula.
probitFit = glm(am ~ mpg + disp, family = binomial(link="probit"), data = mtcars)
probitFit
##
## Call: glm(formula = am ~ mpg + disp, family = binomial(link = "probit"),
## data = mtcars)
##
## Coefficients:
## (Intercept) mpg disp
## -1.439509 0.104365 -0.004225
##
## Degrees of Freedom: 31 Total (i.e. Null); 29 Residual
## Null Deviance: 43.23
## Residual Deviance: 28.6 AIC: 34.6
summary(probitFit)
##
## Call:
## glm(formula = am ~ mpg + disp, family = binomial(link = "probit"),
## data = mtcars)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.5240 -0.6536 -0.3211 0.5906 2.1394
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.439509 2.659683 -0.541 0.588
## mpg 0.104365 0.092554 1.128 0.259
## disp -0.004225 0.004314 -0.979 0.327
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 43.230 on 31 degrees of freedom
## Residual deviance: 28.601 on 29 degrees of freedom
## AIC: 34.601
##
## Number of Fisher Scoring iterations: 6
# let's run logit.
logitFit = glm(am ~ mpg + disp, family = binomial(link="logit"), data = mtcars)
logitFit
##
## Call: glm(formula = am ~ mpg + disp, family = binomial(link = "logit"),
## data = mtcars)
##
## Coefficients:
## (Intercept) mpg disp
## -2.256714 0.169978 -0.007615
##
## Degrees of Freedom: 31 Total (i.e. Null); 29 Residual
## Null Deviance: 43.23
## Residual Deviance: 28.61 AIC: 34.61
summary(logitFit)
##
## Call:
## glm(formula = am ~ mpg + disp, family = binomial(link = "logit"),
## data = mtcars)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.5186 -0.6261 -0.3316 0.5960 2.1655
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.256714 4.760079 -0.474 0.635
## mpg 0.169978 0.168373 1.010 0.313
## disp -0.007615 0.007811 -0.975 0.330
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 43.230 on 31 degrees of freedom
## Residual deviance: 28.606 on 29 degrees of freedom
## AIC: 34.606
##
## Number of Fisher Scoring iterations: 5