## ----echo = TRUE, eval = FALSE, warning=FALSE--------------------------------- # install.packages("wooldridge") ## ----echo = TRUE, eval = TRUE, warning=FALSE, message=FALSE------------------- library(wooldridge) ## ----echo=FALSE, eval=TRUE, warning=FALSE, message=FALSE---------------------- library(stargazer) library(knitr) ## ----message=FALSE, eval=FALSE------------------------------------------------ # data("wage1") # # ?wage1 ## ----echo=FALSE--------------------------------------------------------------- plot(y = wage1$wage, x = wage1$educ, col = "darkgreen", pch = 21, bg = "lightgrey", cex=1.25, xaxt="n", frame = FALSE, main = "Wages vs. Education, 1976", xlab = "years of education", ylab = "Hourly wages") axis(side = 1, at = c(0,6,12,18)) rug(wage1$wage, side=2, col="darkgreen") ## ----------------------------------------------------------------------------- log_wage_model <- lm(lwage ~ educ, data = wage1) ## ----echo = TRUE, eval = FALSE, warning=FALSE--------------------------------- # summary(log_wage_model) ## ----results='asis', echo=FALSE, warning=FALSE, message=FALSE----------------- stargazer(type = "html", log_wage_model, single.row = TRUE, header = FALSE, digits = 5) ## ----echo=FALSE--------------------------------------------------------------- plot(y = wage1$lwage, x = wage1$educ, main = "A Log Wage Equation", col = "darkgreen", pch = 21, bg = "lightgrey", cex=1.25, xlab = "years of education", ylab = "log of average hourly wages", xaxt="n", frame = FALSE) axis(side = 1, at = c(0,6,12,18)) abline(log_wage_model, col = "blue", lwd=2) rug(wage1$lwage, side=2, col="darkgreen") ## ----eval=FALSE--------------------------------------------------------------- # ?wage1 ## ----fig.height=3, echo=FALSE------------------------------------------------- par(mfrow=c(1,3)) plot(y = wage1$lwage, x = wage1$educ, col="darkgreen", xaxt="n", frame = FALSE, main = "years of education", xlab = "", ylab = "") mtext(side=2, line=2.5, "Hourly wages", cex=1.25) axis(side = 1, at = c(0,6,12,18)) abline(lm(lwage ~ educ, data=wage1), col = "darkblue", lwd=2) plot(y = wage1$lwage, x = wage1$exper, col="darkgreen", xaxt="n", frame = FALSE, main = "years of experience", xlab = "", ylab = "") axis(side = 1, at = c(0,12.5,25,37.5,50)) abline(lm(lwage ~ exper, data=wage1), col = "darkblue", lwd=2) plot(y = wage1$lwage, x = wage1$tenure, col="darkgreen", xaxt="n", frame = FALSE, main = "years with employer", xlab = "", ylab = "") axis(side = 1, at = c(0,11,22,33,44)) abline(lm(lwage ~ tenure, data=wage1), col = "darkblue", lwd=2) ## ----------------------------------------------------------------------------- hourly_wage_model <- lm(lwage ~ educ + exper + tenure, data = wage1) ## ----eval=FALSE--------------------------------------------------------------- # coefficients(hourly_wage_model) ## ----echo=FALSE--------------------------------------------------------------- kable(coefficients(hourly_wage_model), digits=4, col.names = "Coefficients", align = 'l') ## ----echo=FALSE--------------------------------------------------------------- barplot(sort(100*hourly_wage_model$coefficients[-1]), horiz=TRUE, las=1, ylab = " ", main = "Coefficients of Hourly Wage Equation") ## ----eval=FALSE--------------------------------------------------------------- # summary(hourly_wage_model) ## ----results='asis', echo=FALSE, warning=FALSE, message=FALSE----------------- stargazer(type = "html", hourly_wage_model, single.row = TRUE, header = FALSE, digits=5) ## ----eval=FALSE--------------------------------------------------------------- # summary(hourly_wage_model)$coefficients ## ----echo=FALSE--------------------------------------------------------------- kable(summary(hourly_wage_model)$coefficients, align="l", digits=5) ## ----fig.height=8, eval=FALSE, echo=FALSE------------------------------------- # par(mfrow=c(2,2)) # # plot(y = hourly_wage_model$residuals, x = hourly_wage_model$fitted.values , col="darkgreen", xaxt="n", # frame = FALSE, main = "Fitted Values", xlab = "", ylab = "") # mtext(side=2, line=2.5, "Model Residuals", cex=1.25) # abline(0, 0, col = "darkblue", lty=2, lwd=2) # # plot(y = hourly_wage_model$residuals, x = wage1$educ, col="darkgreen", xaxt="n", # frame = FALSE, main = "years of education", xlab = "", ylab = "") # axis(side = 1, at = c(0,6,12,18)) # abline(0, 0, col = "darkblue", lty=2, lwd=2) # # plot(y = hourly_wage_model$residuals, x = wage1$exper, col="darkgreen", xaxt="n", # frame = FALSE, main = "years of experience", xlab = "", ylab = "") # mtext(side=2, line=2.5, "Model Residuals", cex=1.25) # axis(side = 1, at = c(0,12.5,25,37.5,50)) # abline(0, 0, col = "darkblue", lty=2, lwd=2) # # plot(y = hourly_wage_model$residuals, x = wage1$tenure, col="darkgreen", xaxt="n", # frame = FALSE, main = "years with employer", xlab = "", ylab = "") # axis(side = 1, at = c(0,11,22,33,44)) # abline(0, 0, col = "darkblue", lty=2, lwd=2) ## ----echo=FALSE--------------------------------------------------------------- barplot(sort(summary(hourly_wage_model)$coefficients[-1, "t value"]), horiz=TRUE, las=1, ylab = " ", main = "t statistics of Hourly Wage Equation") ## ----echo = TRUE, eval = TRUE, warning=FALSE, message=FALSE------------------- data("jtrain") ## ----echo = TRUE, eval = FALSE, warning=FALSE--------------------------------- # ?jtrain ## ----------------------------------------------------------------------------- jtrain_subset <- subset(jtrain, subset = (year == 1987 & union == 0), select = c(year, union, lscrap, hrsemp, lsales, lemploy)) ## ----------------------------------------------------------------------------- sum(is.na(jtrain_subset)) ## ----------------------------------------------------------------------------- jtrain_clean <- na.omit(jtrain_subset) ## ----echo=FALSE, fig.height=3------------------------------------------------- par(mfrow=c(1,3)) point_size <- 1.75 plot(y = jtrain_clean$lscrap, x = jtrain_clean$hrsemp, frame = FALSE, main = "Total (hours/employees) trained", ylab = "", xlab="", pch = 21, bg = "lightgrey", cex=point_size) mtext(side=2, line=2, "Log(scrap rate)", cex=1.25) abline(lm(lscrap ~ hrsemp, data=jtrain_clean), col = "blue", lwd=2) plot(y = jtrain_clean$lscrap, x = jtrain_clean$lsales, frame = FALSE, main = "Log(annual sales $)", ylab = " ", xlab="", pch = 21, bg = "lightgrey", cex=point_size) abline(lm(lscrap ~ lsales, data=jtrain_clean), col = "blue", lwd=2) plot(y = jtrain_clean$lscrap, x = jtrain_clean$lemploy, frame = FALSE, main = "Log(# employees at plant)", ylab = " ", xlab="", pch = 21, bg = "lightgrey", cex=point_size) abline(lm(lscrap ~ lemploy, data=jtrain_clean), col = "blue", lwd=2) ## ----------------------------------------------------------------------------- linear_model <- lm(lscrap ~ hrsemp + lsales + lemploy, data = jtrain_clean) ## ----eval=FALSE, warning=FALSE, message=FALSE--------------------------------- # summary(linear_model) ## ----results='asis', echo=FALSE, warning=FALSE, message=FALSE----------------- stargazer(type = "html", linear_model, single.row = TRUE, header = FALSE, digits=5) ## ----echo=FALSE, eval=FALSE--------------------------------------------------- # #Plot the coefficients, representing the impact of each variable on $log($`scrap`$)$ for a quick comparison. As you can observe, for some variables, the confidence intervals are wider than others. # coefficient <- coef(linear_model)[-1] # confidence <- confint(linear_model, level = 0.95)[-1,] # # graph <- drop(barplot(coefficient, ylim = range(c(confidence)), # main = "Coefficients & 95% C.I. of variables on Firm Scrap Rates")) # # arrows(graph, coefficient, graph, confidence[,1], angle=90, length=0.55, col="blue", lwd=2) # arrows(graph, coefficient, graph, confidence[,2], angle=90, length=0.55, col="blue", lwd=2) # ## ----------------------------------------------------------------------------- data("hprice3") ## ----echo=FALSE, fig.align='center'------------------------------------------- par(mfrow=c(1,2)) plot(y = hprice3$price, x = hprice3$dist, main = " ", xlab = "Distance to Incinerator in feet", ylab = "Selling Price", frame = FALSE, pch = 21, bg = "lightgrey") abline(lm(price ~ dist, data=hprice3), col = "blue", lwd=2) ## ----------------------------------------------------------------------------- price_dist_model <- lm(lprice ~ ldist, data = hprice3) ## ----------------------------------------------------------------------------- price_area_model <- lm(lprice ~ ldist + larea, data = hprice3) ## ----eval=FALSE--------------------------------------------------------------- # summary(price_dist_model) # summary(price_area_model) ## ----results='asis', echo=FALSE, warning=FALSE, message=FALSE----------------- stargazer(type = "html",price_dist_model, price_area_model, single.row = TRUE, header = FALSE, digits=5) ## ----echo=FALSE--------------------------------------------------------------- par(mfrow=c(1,2)) point_size <- 0.80 plot(y = hprice3$lprice, x = hprice3$ldist, frame = FALSE, main = "Log(distance from incinerator)", ylab = "", xlab="", pch = 21, bg = "lightgrey", cex=point_size) mtext(side=2, line=2, "Log( selling price )", cex=1.25) abline(lm(lprice ~ ldist, data=hprice3), col = "blue", lwd=2) plot(y = hprice3$lprice, x = hprice3$larea, frame = FALSE, main = "Log(square footage of house)", ylab = " ", xlab="", pch = 21, bg = "lightgrey", cex=point_size) abline(lm(lprice ~ larea, data=hprice3), col = "blue", lwd=2) ## ----message=FALSE, eval=FALSE------------------------------------------------ # data("hprice2") # ?hprice2 ## ----------------------------------------------------------------------------- housing_level <- lm(price ~ nox + crime + rooms + dist + stratio, data = hprice2) ## ----------------------------------------------------------------------------- housing_standardized <- lm(scale(price) ~ 0 + scale(nox) + scale(crime) + scale(rooms) + scale(dist) + scale(stratio), data = hprice2) ## ----eval=FALSE--------------------------------------------------------------- # summary(housing_level) # summary(housing_standardized) ## ----results='asis', echo=FALSE, warning=FALSE, message=FALSE----------------- stargazer(type = "html",housing_level, housing_standardized, single.row = TRUE, header = FALSE, digits=5) ## ----------------------------------------------------------------------------- housing_model_4.5 <- lm(lprice ~ lnox + log(dist) + rooms + stratio, data = hprice2) housing_model_6.2 <- lm(lprice ~ lnox + log(dist) + rooms + I(rooms^2) + stratio, data = hprice2) ## ----eval=FALSE--------------------------------------------------------------- # summary(housing_model_4.5) # summary(housing_model_6.2) ## ----results='asis', echo=FALSE, warning=FALSE, message=FALSE----------------- stargazer(type = "html", housing_model_4.5 , housing_model_6.2, single.row = TRUE, header = FALSE, digits=5) ## ----------------------------------------------------------------------------- beta_1 <- summary(housing_model_6.2)$coefficients["rooms",1] beta_2 <- summary(housing_model_6.2)$coefficients["I(rooms^2)",1] turning_point <- abs(beta_1 / (2*beta_2)) print(turning_point) ## ----------------------------------------------------------------------------- Rooms <- c(min(hprice2$rooms), 4, turning_point, 5, 5.5, 6.45, 7.5, max(hprice2$rooms)) Percent.Change <- 100*(beta_1 + 2*beta_2*Rooms) kable(data.frame(Rooms, Percent.Change)) ## ----echo=FALSE--------------------------------------------------------------- from <- min(hprice2$rooms) to <- max(hprice2$rooms) rooms <- seq(from=from, to =to, by = ((to - from)/(NROW(hprice2)-1))) quadratic <- abs(100*summary(housing_model_6.2)$coefficients["rooms",1] + 200*summary(housing_model_6.2)$coefficients["I(rooms^2)",1]*rooms) housing_model_frame <- model.frame(housing_model_6.2) housing_sq <- abs(beta_1*housing_model_frame[,"rooms"]) + beta_2*housing_model_frame[,"I(rooms^2)"] ## ----echo=FALSE--------------------------------------------------------------- rooms_interaction <- lm(lprice ~ rooms + I(rooms^2), data = hprice2) par(mfrow=c(1,2)) plot(y = hprice2$lprice, x = hprice2$rooms, xaxt="n", pch = 21, bg = "lightgrey", frame = FALSE, main = "lprice ~ rooms", xlab = "Rooms", ylab = "") mtext(side=2, line=2, "Log( selling price )", cex=1.25) axis(side = 1, at = c(min(hprice2$rooms), 4, 5, 6, 7, 8, max(hprice2$rooms))) abline(lm(lprice ~ rooms, data = hprice2), col="red", lwd=2.5) plot(y = hprice2$lprice, x = hprice2$rooms, xaxt="n", pch = 21, bg = "lightgrey", frame = FALSE, main = "lprice ~ rooms + I(rooms^2)", xlab = "Rooms", ylab = " ") axis(side = 1, at = c(min(hprice2$rooms), 4, 5, 6, 7, 8, max(hprice2$rooms))) lines(sort(hprice2$rooms), sort(fitted(rooms_interaction)), col = "red", lwd=2.5) ## ----------------------------------------------------------------------------- data("hprice1") ## ----eval=FALSE--------------------------------------------------------------- # ?hprice1 ## ----fig.height=8, eval=FALSE, echo=FALSE------------------------------------- # par(mfrow=c(2,2)) # # palette(rainbow(6, alpha = 0.8)) # plot(y = hprice1$lprice, x = hprice1$llotsize, col=hprice1$bdrms, pch = 19, # frame = FALSE, main = "Log(lot size)", xlab = "", ylab = "") # mtext(side=2, line=2, "Log( selling price )", cex=1.25) # # # plot(y = hprice1$lprice, x = hprice1$lsqrft, col=hprice1$bdrms, pch=19, # frame = FALSE, main = "Log(home size)", xlab = "Rooms", ylab = " ") # legend(8, 5.8, sort(unique(hprice1$bdrms)), col = 1:length(hprice1$bdrms), # pch=19, title = "bdrms") # # # hprice1$colonial <- as.factor(hprice1$colonial) # # palette(rainbow(2, alpha = 0.8)) # plot(y = hprice1$lprice, x = hprice1$llotsize, col=hprice1$colonial, pch = 19, bg = "lightgrey", # frame = FALSE, main = "Log(lot size)", xlab = "", ylab = "") # mtext(side=2, line=2, "Log( selling price )", cex=1.25) # # # plot(y = hprice1$lprice, x = hprice1$lsqrft, col=hprice1$colonial, pch=19, # frame = FALSE, main = "Log(home size)", xlab = "Rooms", ylab = " ") # legend(8, 5.25, unique(hprice1$colonial), col=1:length(hprice1$colonial), pch=19, title = "colonial") ## ----------------------------------------------------------------------------- housing_qualitative <- lm(lprice ~ llotsize + lsqrft + bdrms + colonial, data = hprice1) ## ----eval=FALSE--------------------------------------------------------------- # summary(housing_qualitative) ## ----results='asis', echo=FALSE, warning=FALSE, message=FALSE----------------- stargazer(type = "html",housing_qualitative, single.row = TRUE, header = FALSE, digits=5) ## ----message=FALSE------------------------------------------------------------ data("gpa1") gpa1$parcoll <- as.integer(gpa1$fathcoll==1 | gpa1$mothcoll) GPA_OLS <- lm(PC ~ hsGPA + ACT + parcoll, data = gpa1) ## ----------------------------------------------------------------------------- weights <- GPA_OLS$fitted.values * (1-GPA_OLS$fitted.values) GPA_WLS <- lm(PC ~ hsGPA + ACT + parcoll, data = gpa1, weights = 1/weights) ## ----results='asis', echo=FALSE, warning=FALSE, message=FALSE----------------- stargazer(type = "html",GPA_OLS, GPA_WLS, single.row = TRUE, header = FALSE, digits=5) ## ----message=FALSE------------------------------------------------------------ data("rdchem") all_rdchem <- lm(rdintens ~ sales + profmarg, data = rdchem) ## ----echo=FALSE--------------------------------------------------------------- plot_title <- "FIGURE 9.1: Scatterplot of R&D intensity against firm sales" x_axis <- "firm sales (in millions of dollars)" y_axis <- "R&D as a percentage of sales" plot(rdintens ~ sales, pch = 21, bg = "lightgrey", data = rdchem, main = plot_title, xlab = x_axis, ylab = y_axis) ## ----------------------------------------------------------------------------- smallest_rdchem <- lm(rdintens ~ sales + profmarg, data = rdchem, subset = (sales < max(sales))) ## ----results='asis', echo=FALSE, warning=FALSE, message=FALSE----------------- stargazer(type = "html",all_rdchem, smallest_rdchem, single.row = TRUE, header = FALSE, digits=5) ## ----message=FALSE------------------------------------------------------------ data("intdef") # load data # load eXtensible Time Series package. # xts is excellent for time series plots and # properly indexing time series. library(xts) # create xts object from data.frame # First, index year as yearmon class of monthly data. # Note: I add 11/12 to set the month to December, end of year. index <- zoo::as.yearmon(intdef$year + 11/12) # Next, create the xts object, ordering by the index above. intdef.xts <- xts(intdef[ ,-1], order.by = index) # extract 3-month Tbill, inflation, and deficit data intdef.xts <- intdef.xts[ ,c("i3", "inf", "def")] # rename with clearer names colnames(intdef.xts) <- c("Tbill3mo", "cpi", "deficit") # plot the object, add a title, and place legend at top left. plot(x = intdef.xts, main = "Inflation, Deficits, and Interest Rates", legend.loc = "topleft") # Run a Linear regression model tbill_model <- lm(Tbill3mo ~ cpi + deficit, data = intdef.xts) ## ----results='asis', echo=FALSE, warning=FALSE, message=FALSE----------------- stargazer(type = "html",tbill_model, single.row = TRUE, header = FALSE, digits=5) ## ----eval=FALSE, FALSE, message=FALSE, warning=FALSE-------------------------- # # DO NOT RUN # library(quantmod) # # # Tbill, 3 month # getSymbols("TB3MS", src = "FRED") # # convert to annual observations and convert index to type `yearmon`. # TB3MS <- to.yearly(TB3MS, OHLC=FALSE, drop.time = TRUE) # index(TB3MS) <- zoo::as.yearmon(index(TB3MS)) # # # Inflation # getSymbols("FPCPITOTLZGUSA", src = "FRED") # # Convert the index to yearmon and shift FRED's Jan 1st to Dec # index(FPCPITOTLZGUSA) <- zoo::as.yearmon(index(FPCPITOTLZGUSA)) + 11/12 # # Rename and update column names # inflation <- FPCPITOTLZGUSA # colnames(inflation) <- "inflation" # # ## Deficit, percent of GDP: Federal outlays - federal receipts # # Download outlays # getSymbols("FYFRGDA188S", src = "FRED") # # Lets move the index from Jan 1st to Dec 30th/31st # index(FYFRGDA188S) <- zoo::as.yearmon(index(FYFRGDA188S)) + 11/12 # # Rename and update column names # outlays <- FYFRGDA188S # colnames(outlays) <- "outlays" # # # Download receipts # getSymbols("FYONGDA188S", src = "FRED") # # Lets move the index from Jan 1st to Dec 30th/31st # index(FYONGDA188S) <- zoo::as.yearmon(index(FYONGDA188S)) + 11/12 # # Rename and update column names # receipts <- FYONGDA188S # colnames(receipts) <- "receipts" ## ----eval=FALSE, message=FALSE, warning=FALSE--------------------------------- # # DO NOT RUN # # create deficits from outlays - receipts # # xts objects respect their indexing and outline the future # deficit <- outlays - receipts # colnames(deficit) <- "deficit" # # # Merge and remove leading and trailing NAs for a balanced data matrix # intdef_updated <- merge(TB3MS, inflation, deficit) # intdef_updated <- zoo::na.trim(intdef_updated) # # #Plot all # plot(intdef_updated, # main = "T-bill (3mo rate), inflation, and deficit (% of GDP)", # legend.loc = "topright",) ## ----eval=FALSE--------------------------------------------------------------- # # DO NOT RUN # updated_model <- lm(TB3MS ~ inflation + deficit, data = intdef_updated) ## ----eval=FALSE, results='asis', echo=FALSE, warning=FALSE, message=FALSE----- # #DO NOT RUN # updated_model <- lm(TB3MS ~ inflation + deficit, data = intdef_updated) # # stargazer(type = "html", updated_model, single.row = TRUE, header = FALSE, digits=5) ## ----message=FALSE------------------------------------------------------------ data("earns") wage_time <- lm(lhrwage ~ loutphr + t, data = earns) ## ----------------------------------------------------------------------------- wage_diff <- lm(diff(lhrwage) ~ diff(loutphr), data = earns) ## ----results='asis', echo=FALSE, warning=FALSE, message=FALSE----------------- stargazer(type = "html",wage_time, wage_diff, single.row = TRUE, header = FALSE, digits=5) ## ----message=FALSE, eval=FALSE------------------------------------------------ # data("nyse") # ?nyse ## ----------------------------------------------------------------------------- return_AR1 <-lm(return ~ return_1, data = nyse) ## ----------------------------------------------------------------------------- return_mu <- residuals(return_AR1) mu2_hat_model <- lm(return_mu^2 ~ return_1, data = return_AR1$model) ## ----results='asis', echo=FALSE, warning=FALSE, message=FALSE----------------- stargazer(type = "html",return_AR1, mu2_hat_model, single.row = TRUE, header = FALSE, digits=5) ## ----------------------------------------------------------------------------- mu2_hat <- return_mu[-1]^2 mu2_hat_1 <- return_mu[-NROW(return_mu)]^2 arch_model <- lm(mu2_hat ~ mu2_hat_1) ## ----results='asis', echo=FALSE, warning=FALSE, message=FALSE----------------- stargazer(type = "html",arch_model, single.row = TRUE, header = FALSE, digits=5) ## ----message=FALSE, eval=FALSE------------------------------------------------ # data("traffic1") # ?traffic1 ## ----------------------------------------------------------------------------- DD_model <- lm(cdthrte ~ copen + cadmn, data = traffic1) ## ----results='asis', echo=FALSE, warning=FALSE, message=FALSE----------------- stargazer(type = "html",DD_model, single.row = TRUE, header = FALSE, digits=5) ## ----message=FALSE, eval=FALSE------------------------------------------------ # data("phillips") # ?phillips ## ----------------------------------------------------------------------------- phillips_train <- subset(phillips, year <= 1996) unem_AR1 <- lm(unem ~ unem_1, data = phillips_train) ## ----------------------------------------------------------------------------- unem_inf_VAR1 <- lm(unem ~ unem_1 + inf_1, data = phillips_train) ## ----results='asis', echo=FALSE, warning=FALSE, message=FALSE----------------- stargazer(type = "html",unem_AR1, unem_inf_VAR1, single.row = TRUE, header = FALSE, digits=5) ## ----warning=FALSE, message=FALSE, echo=TRUE---------------------------------- phillips_test <- subset(phillips, year >= 1997) AR1_forecast <- predict.lm(unem_AR1, newdata = phillips_test) VAR1_forecast <- predict.lm(unem_inf_VAR1, newdata = phillips_test) kable(cbind(phillips_test[ ,c("year", "unem")], AR1_forecast, VAR1_forecast))