library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.2.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(ggplot2)
library(dplyr)
library(readr)
g_warming_data <- read.csv("~/Downloads/global_warming_data.csv")
head(g_warming_data)
## Year Total.Emissions Temp.Change
## 1 1880 236 -0.19
## 2 1881 243 -0.10
## 3 1882 256 -0.11
## 4 1883 272 -0.19
## 5 1884 275 -0.29
## 6 1885 277 -0.32
summary(g_warming_data)
## Year Total.Emissions Temp.Change
## Min. :1880 Min. : 236.0 Min. :-0.49000
## 1st Qu.:1912 1st Qu.: 827.5 1st Qu.:-0.22000
## Median :1945 Median :1383.0 Median :-0.09000
## Mean :1945 Mean :2746.8 Mean :-0.01389
## 3rd Qu.:1978 3rd Qu.:5056.5 3rd Qu.: 0.16000
## Max. :2010 Max. :9167.0 Max. : 0.70000
model <- lm(`Temp.Change` ~ `Total.Emissions`, data = g_warming_data)
summary(model)
##
## Call:
## lm(formula = Temp.Change ~ Total.Emissions, data = g_warming_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.31368 -0.08822 0.00097 0.08986 0.36404
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.962e-01 1.679e-02 -17.64 <2e-16 ***
## Total.Emissions 1.028e-04 4.497e-06 22.85 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1301 on 129 degrees of freedom
## Multiple R-squared: 0.8019, Adjusted R-squared: 0.8004
## F-statistic: 522.3 on 1 and 129 DF, p-value: < 2.2e-16
ggplot(g_warming_data, aes(x = `Total.Emissions`, y = `Temp.Change`)) +
geom_point(color = "blue") +
geom_smooth(method = "lm", se = FALSE, color = "purple") +
labs(title = "Relationship Between Total CO2 Emissions and Global Temperature",
x = "CO2 Emissions", y = "Temperature Change (°C)") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
residuals <- resid(model)
fitted <- fitted(model)
plot(fitted, residuals,
main = "Residuals vs. Fitted Values",
xlab = "Fitted Values", ylab = "Residuals",
pch = 19, col = "blue")
abline(h = 0, col = "purple", lty = 2)
qqnorm(residuals, main = "Normal Q-Q Plot of Residuals")
qqline(residuals, col = "purple", lwd = 2)
#statistical inferences on the model
anova(model)
## Analysis of Variance Table
##
## Response: Temp.Change
## Df Sum Sq Mean Sq F value Pr(>F)
## Total.Emissions 1 8.8443 8.8443 522.31 < 2.2e-16 ***
## Residuals 129 2.1844 0.0169
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(model, level = 0.95)
## 2.5 % 97.5 %
## (Intercept) -3.293814e-01 -0.2629533781
## Total.Emissions 9.386763e-05 0.0001116606