## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup--------------------------------------------------------------------
rm(list=ls(all=TRUE))
library(rdhte)
library(rdrobust)
library(sandwich)
library(multcomp)


# Generate data
set.seed(123)
n <- 5000
X <- runif(n, -1, 1) * 2  # Running variable in [-2,2]
# Cluster variable (C) with 10 clusters
C <- sample(1:10, n, replace = TRUE)  # categorical variable (e.g., region, firm)

# Heterogeneity variables
W1 <- rbinom(n, 1, 0.5)  # Binary variable (e.g., gender, policy group)
W2 <- sample(1:3, n, replace = TRUE)  # W takes values 1, 2, or 3
W3 <- runif(n, 0, 10)  # Continuous variable (e.g., income, age)

# Define heterogeneous treatment effects
tau_W1 <- 3 * W1  # The treatment effect depends on W
tau_W2 <- ifelse(W2 == 1, 2, ifelse(W2 == 2, 4, 6))  # Treatment effect varies by W
tau_W3 <- 2 + 0.5 * W3  # Treatment effect increases with W
# Define heterogeneous treatment effects
tau_W4 <- ifelse(W1 == 0 & W2 == 1, 2,  # Low education, male
                ifelse(W1 == 0 & W2 == 2, 4,  # Medium education, male
                       ifelse(W1 == 0 & W2 == 3, 6,  # High education, male
                              ifelse(W1 == 1 & W2 == 1, 3,  # Low education, female
                                     ifelse(W1 == 1 & W2 == 2, 5,  # Medium education, female
                                            7)))))  # High education, female

error_term <- rep(rnorm(10, 0, 1), length.out = n)  # Cluster-level errors
epsilon <- error_term + rnorm(n)  # Add individual-level noise to the errors

# Generate outcome variable with heterogeneous treatment effect at X = 0
Y1 <- 3 + 2 * X + 1.5 * X^2 + 0.5 * X^3 + sin(2 * X) + tau_W1 * (X >= 0) + rnorm(n)  # Treatment effect = 3*W at cutoff
Y2 <- 3 + 2 * X + 1.5 * X^2 + 0.5 * X^3 + sin(2 * X) + tau_W2 * (X >= 0) + rnorm(n)  # Treatment effect = tau_W at cutoff
Y3 <- 3 + 2 * X + 1.5 * X^2 + 0.5 * X^3 + sin(2 * X) + tau_W3 * (X >= 0) + rnorm(n)
Y4 <- 3 + 2 * X + 1.5 * X^2 + 0.5 * X^3 + sin(2 * X) + tau_W4 * (X >= 0) + rnorm(n)
Y5 <- 3 + 2 * X + 1.5 * X^2 + 0.5 * X^3 + sin(2 * X) + tau_W2 * (X >= 0) + epsilon  

### Case 1: one subgroup variable W2.
m1 <- rdhte(y = Y2, x = X, covs.hte = factor(W2))
summary(m1)
linfct <- c("`factor(W2)2` - `factor(W2)3` = 0", "`factor(W2)2` = 0")
rdhte_lincom(model = m1, linfct = linfct)

# Case 3: one continuous variable W3.
m2 <- rdhte(y = Y3, x = X, covs.hte = W3)
summary(m2)
linfct <- c("T - T:W3 = 0")
rdhte_lincom(model = m2, linfct = linfct)

# Case 3: two (or more) subgroup variables:
m3 <- rdhte(y = Y2, x = X, covs.hte = factor(W2):factor(W1))
summary(m3)
linfct <- c("`factor(W2):factor(W1)1:0` - `factor(W2):factor(W1)1:1`= 0")
rdhte_lincom(model = m3, linfct = linfct)

# Case 4: two (or more) continuous variables (this includes polynomials). 
m4 <- rdhte(y = Y3, x = X, covs.hte = "W2 + W3")
summary(m4)
linfct <- c("T:W2 - T:W3 = 0")
rdhte_lincom(model = m4, linfct = linfct)

# Case 5: interaction between continous and subgroup varibles.
m5 <- rdhte(y = Y3, x = X, covs.hte = "W3*factor(W1)")
summary(m5)
linfct <- c("T:W3.factor.W1.1 = 0")
rdhte_lincom(model = m5, linfct = linfct)

# Format: cases 1-3 can be either a formula or a expression. Cases 4-5 only work as a formula, thus they need to be included in quotes. 
# Note: some expressions work either way, but with different interpretations. For example, W1 + W2 vs "W1 + W2".
# Exact expressions for setting the linear restrictions in `linfct` can be obtained from the names in the `coef` object returned by `rdhte`.