Loading packages

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.2.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(ggplot2)
library(dplyr)
library(readr)

Importing the Excel file

g_warming_data <- read.csv("~/Downloads/global_warming_data.csv")

Viewing the data

head(g_warming_data)
##   Year Total.Emissions Temp.Change
## 1 1880             236       -0.19
## 2 1881             243       -0.10
## 3 1882             256       -0.11
## 4 1883             272       -0.19
## 5 1884             275       -0.29
## 6 1885             277       -0.32

Summary of the data

summary(g_warming_data)
##       Year      Total.Emissions   Temp.Change      
##  Min.   :1880   Min.   : 236.0   Min.   :-0.49000  
##  1st Qu.:1912   1st Qu.: 827.5   1st Qu.:-0.22000  
##  Median :1945   Median :1383.0   Median :-0.09000  
##  Mean   :1945   Mean   :2746.8   Mean   :-0.01389  
##  3rd Qu.:1978   3rd Qu.:5056.5   3rd Qu.: 0.16000  
##  Max.   :2010   Max.   :9167.0   Max.   : 0.70000

Fitting linear regression model

model <- lm(`Temp.Change` ~ `Total.Emissions`, data = g_warming_data)

View model summary

summary(model)
## 
## Call:
## lm(formula = Temp.Change ~ Total.Emissions, data = g_warming_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.31368 -0.08822  0.00097  0.08986  0.36404 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -2.962e-01  1.679e-02  -17.64   <2e-16 ***
## Total.Emissions  1.028e-04  4.497e-06   22.85   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1301 on 129 degrees of freedom
## Multiple R-squared:  0.8019, Adjusted R-squared:  0.8004 
## F-statistic: 522.3 on 1 and 129 DF,  p-value: < 2.2e-16

Scatter plot with trend line

ggplot(g_warming_data, aes(x = `Total.Emissions`, y = `Temp.Change`)) +
  geom_point(color = "blue") +
  geom_smooth(method = "lm", se = FALSE, color = "purple") +
  labs(title = "Relationship Between Total CO2 Emissions and Global Temperature",
       x = "CO2 Emissions", y = "Temperature Change (°C)") +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

Extracting residuals and fitted values

residuals <- resid(model)
fitted <- fitted(model)

Residuals vs. Fitted Values

plot(fitted, residuals,
     main = "Residuals vs. Fitted Values",
     xlab = "Fitted Values", ylab = "Residuals",
     pch = 19, col = "blue")
abline(h = 0, col = "purple", lty = 2)

Normal Q-Q Plot

qqnorm(residuals, main = "Normal Q-Q Plot of Residuals")
qqline(residuals, col = "purple", lwd = 2)

#statistical inferences on the model

anova(model)
## Analysis of Variance Table
## 
## Response: Temp.Change
##                  Df Sum Sq Mean Sq F value    Pr(>F)    
## Total.Emissions   1 8.8443  8.8443  522.31 < 2.2e-16 ***
## Residuals       129 2.1844  0.0169                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(model, level = 0.95)
##                         2.5 %        97.5 %
## (Intercept)     -3.293814e-01 -0.2629533781
## Total.Emissions  9.386763e-05  0.0001116606