grace <- ggplot(df.race, aes(y = Effect, x = name)) +
geom_col(aes(fill = factor(signif), alpha = factor(signif)), width = 0.75) +
scale_fill_manual(values = cols) +
scale_alpha_manual(values = alphas) +
ylim(c(gmin, gmax)) +
geom_errorbar(mapping = aes(ymin = lower, ymax = upper), width = 0.25, col = "grey46") +
theme_minimal() +
theme(axis.text.x =  element_text(),
axis.title.y = element_blank(),
plot.title = element_text(hjust = 0.5)) +
labs(title = "Race / Ethnicity") +
geom_hline(yintercept = 0,  col = "grey46") +
theme(legend.position="none", axis.text=element_text(size=11)) +
coord_flip()
grace
rm(df.race)
# Industry
df.ind1990c <- df[grep("ind1990c", df$name),]
df.ind1990c$name <- str_replace(df.ind1990c$name, "ind1990c", "")
df.ind1990c$name <- str_replace(df.ind1990c$name, "PROFE", "Profess./Rel. services")
df.ind1990c$name <- str_replace(df.ind1990c$name, "FINANCE", "Finance/Insur./Real est.")
df.ind1990c$name <- str_replace(df.ind1990c$name, "TRANS", "Transportation")
df.ind1990c$name <- str_replace(df.ind1990c$name, "RETAIL", "Retail Trade")
df.ind1990c$name <- str_replace(df.ind1990c$name, "ADMIN", "Public Admin")
gind1990c <- ggplot(df.ind1990c, aes(y = Effect, x = name)) +
geom_col(aes(fill = factor(signif), alpha = factor(signif)), width = 0.75) +
scale_fill_manual(values = cols) +   scale_alpha_manual(values = alphas) +
ylim(c(gmin, gmax)) +
geom_errorbar(mapping = aes(ymin = lower, ymax = upper), width = 0.25, col = "grey46") +
theme_minimal() +
theme(axis.text.x =  element_text(),
axis.title.y = element_blank(),
plot.title = element_text(hjust = 0.5)) +
labs(title = "Industry") +
geom_hline(yintercept = 0,  col = "grey46") +
theme(legend.position="none", axis.text=element_text(size=11)) +
coord_flip()
gind1990c
rm(df.ind1990c)
# Occupation
df.occ2010c <- df[grep("occ2010c", df$name),]
df.occ2010c$name <- str_replace(df.occ2010c$name, "occ2010c", "")
df.occ2010c.1 <- df.occ2010c[1:round(nrow(df.occ2010c)/2),]
gocc2010c.1 <- ggplot(df.occ2010c.1, aes(y = Effect, x = name)) +
geom_col(aes(fill = factor(signif), alpha = factor(signif)), width = 0.75) +
scale_fill_manual(values = cols) +
scale_alpha_manual(values = alphas) +
ylim(c(gmin, gmax)) +
geom_errorbar(mapping = aes(ymin = lower, ymax = upper), width = 0.25, col = "grey46") +
theme_minimal() +
theme(axis.text.x =  element_text(),
axis.title.y = element_blank(),
plot.title = element_text(hjust = 0.5)) +
labs(title = "Occupation (1/2)") +
geom_hline(yintercept = 0,  col = "grey46") +
theme(legend.position="none", axis.text=element_text(size=11)) +
coord_flip()
gocc2010c.1
rm(df.occ2010c.1)
df.occ2010c.2 <- df.occ2010c[(round(nrow(df.occ2010c)/2)+1):nrow(df.occ2010c),]
gocc2010c.2 <- ggplot(df.occ2010c.2, aes(y = Effect, x = name)) +
geom_col(aes(fill = factor(signif), alpha = factor(signif)), width = 0.75) +
scale_fill_manual(values = cols) +
scale_alpha_manual(values = alphas) +
ylim(c(gmin, gmax)) +
geom_errorbar(mapping = aes(ymin = lower, ymax = upper), width = 0.25, col = "grey46") +
theme_minimal() +
theme(axis.text.x =  element_text(),
axis.title.y = element_blank(),
plot.title = element_text(hjust = 0.5)) +
labs(title = "Occupation (2/2)") +
geom_hline(yintercept = 0,  col = "grey46") +
theme(legend.position="none", axis.text=element_text(size=11)) +
coord_flip()
gocc2010c.2
rm(df.occ2010c.2)
# Degree field
df.degfield <- df[grep("degfield", df$name),]
df.degfield$name <- str_replace(df.degfield$name, "degfield", "")
df.degfield.1 <- df.degfield[1:round(nrow(df.degfield)/2),]
gdegfield.1 <- ggplot(df.degfield.1, aes(y = Effect, x = name)) +
geom_col(aes(fill = factor(signif), alpha = factor(signif)), width = 0.75) +
scale_fill_manual(values = cols) +
scale_alpha_manual(values = alphas) +
ylim(c(gmin, gmax)) +
geom_errorbar(mapping = aes(ymin = lower, ymax = upper), width = 0.25, col = "grey46") +
theme_minimal() +
theme(axis.text.x =  element_text(),
axis.title.y = element_blank(),
plot.title = element_text(hjust = 0.5)) +
labs(title = "Field of Degree (1/2)") +
geom_hline(yintercept = 0,  col = "grey46") +
theme(legend.position="none", axis.text=element_text(size=11)) +
coord_flip()
gdegfield.1
rm(df.degfield.1)
df.degfield.2 <- df.degfield[(round(nrow(df.degfield)/2)+1):nrow(df.degfield),]
gdegfield.2 <- ggplot(df.degfield.2, aes(y = Effect, x = name)) +
geom_col(aes(fill = factor(signif), alpha = factor(signif)), width = 0.75) +
scale_fill_manual(values = cols) +
scale_alpha_manual(values = alphas) +
ylim(c(gmin, gmax)) +
geom_errorbar(mapping = aes(ymin = lower, ymax = upper), width = 0.25, col = "grey46") +
theme_minimal() +
theme(axis.text.x =  element_text(),
axis.title.y = element_blank(),
plot.title = element_text(hjust = 0.5)) +
labs(title = "Field of Degree (2/2)") +
geom_hline(yintercept = 0,  col = "grey46") +
theme(legend.position="none", axis.text=element_text(size=11)) +
coord_flip()
gdegfield.2
rm(df.degfield.2, df.degfield)
# Experience and Veteran Status
df.expvetst <- df[c(grep("vetstat", df$name), grep("exp", df$name)),]
df.expvetst$name <- str_replace(df.expvetst$name, "exp", "Experience")
df.expvetst$name <- str_replace(df.expvetst$name, "vetstatVeteran", "Veteran")
gexpvetst <- ggplot(df.expvetst, aes(y = Effect, x = ordered(name, levels = c("Veteran", "Experience")))) +
geom_col(aes(fill = factor(signif), alpha = factor(signif)), width = 0.75) +
scale_fill_manual(values = cols) +
scale_alpha_manual(values = alphas) +
ylim(c(gmin, gmax)) +
geom_errorbar(mapping = aes(ymin = lower, ymax = upper), width = 0.25, col = "grey46") +
theme_minimal() +
theme(axis.text.x =  element_text(),
axis.title.y = element_blank(),
plot.title = element_text(hjust = 0.5)) +
labs(title = "Experience, Veteran Status") +
geom_hline(yintercept = 0,  col = "grey46") +
theme(legend.position="none", axis.text=element_text(size=11)) +
coord_flip()
gexpvetst
rm(df.expvetst)
# Hours Worked
df.hw <- df[grep("hw", df$name),]
df.hw$name <- str_replace(df.hw$name, "hw", "")
df.hw$name <- str_replace(df.hw$name, "70plus", "> 70 ")
df.hw$name <- str_replace(df.hw$name, "to", " < ")
ghw <- ggplot(df.hw, aes(y = Effect, x = ordered(name, levels =
c("40 < 49",
"50 < 59",
"60 < 69",
"> 70 ")))) +
geom_col(aes(fill = factor(signif), alpha = factor(signif)), width = 0.75) +
scale_fill_manual(values = cols) +
scale_alpha_manual(values = alphas) +
ylim(c(gmin, gmax)) +
geom_errorbar(mapping = aes(ymin = lower, ymax = upper), width = 0.25, col = "grey46") +
theme_minimal() +
theme(axis.text.x =  element_text(),
axis.title.y = element_blank(),
plot.title = element_text(hjust = 0.5)) +
labs(title = "Hours Worked") +
geom_hline(yintercept = 0,  col = "grey46") +
theme(legend.position="none", axis.text=element_text(size=11)) +
coord_flip()
ghw
rm(df.hw)
# p=align_plots(gmarst,
#               gdchlt,
#               grace,
#               gind1990c,
#               gocc2010c.1,
#               gocc2010c.2,
#               gdegfield.1,
#               gdegfield.2, align="hv")
#ggsave('descriptives_marst.png', path = paste0(mydir, "descriptives/graphs/"), p[[1]], width=12, height=7, dpi=120)
# https://cran.r-project.org/web/packages/cowplot/vignettes/plot_grid.html
plots <- plot_grid(gmarst,
gdchlt,
grace,
gexpvetst,
gind1990c,
gocc2010c.1,
gocc2010c.2,
ghw,
gdegfield.1,
gdegfield.2,
ncol = 3, align="v")
ggsave('effects_bachelor_allsignificant.png', path =
paste0(mydir, "GenderWageGap/Paper_V4_short/figures/effectplots/"), plots, width=20, height=11.5, dpi=120)
plots <- plot_grid(gmarst,
gdchlt,
grace,
gexpvetst,
gind1990c,
gocc2010c.1,
gocc2010c.2,
ghw,
ncol = 3, align="v")
ggsave('effects_bachelor_allsignificant_nodegfield.png', path =
paste0(mydir, "GenderWageGap/Paper_V4_short/figures/effectplots/"), plots, width=20, height=11.5, dpi=120)
rm(list=ls())
rm(list=ls)
rm(list=ls()
)
install.packages("grf")
library(grf)
# Generate data.
n = 2000; p = 10
X = matrix(rnorm(n*p), n, p)
X.test = matrix(0, 101, p)
X.test[,1] = seq(-2, 2, length.out = 101)
# Train a causal forest.
W = rbinom(n, 1, 0.4 + 0.2 * (X[,1] > 0))
Y = pmax(X[,1], 0) * W + X[,2] + pmin(X[,3], 0) + rnorm(n)
tau.forest = causal_forest(X, Y, W)
tau.forest
table(Y)
plot(Y)
plot(Y~X)
plot(Y~X[,1])
points(predict(tau.forest)~X[,1], col = "red")
rm(list=ls())
library(hdm)
?rlassoEffects
library(hdm); library(ggplot2)
set.seed(1)
n = 100 #sample size
p = 100 # number of variables
s = 3 # nubmer of non-zero variables
X = matrix(rnorm(n*p), ncol=p)
colnames(X) <- paste("X", 1:p, sep="")
beta = c(rep(3,s), rep(0,p-s))
y = 1 + X%*%beta + rnorm(n)
data = data.frame(cbind(y,X))
colnames(data)[1] <- "y"
fm = paste("y ~", paste(colnames(X), collapse="+"))
fm = as.formula(fm)
lasso.effect = rlassoEffects(X, y, index=c(1,2,3,50))
lasso.effect = rlassoEffects(fm, I = ~ X1 + X2 + X3 + X50, data=data)
print(lasso.effect)
summary(lasso.effect)
confint(lasso.effect)
plot(lasso.effect)
(n <- dim(X)[1])
(p <- dim(X)[2])
index
interactions.index  = c(1,2,3,50)
rlassoEffects(X, y, index=c(1,2,3,50))
indices = interactions.index
lasso.effect = rlassoEffects(X, y, index=c(1,2,3,50))
rlassoEffects4b <- parallel::mcMap(function(indexA)  rlassoEffects(x=X, y=y, index = indexA,
method = "double selection", post=TRUE),
indices, mc.cores=12)
library(parallel)
rlassoEffects4b <- parallel::mcMap(function(indexA)  rlassoEffects(x=X, y=y, index = indexA,
method = "double selection", post=TRUE),
indices, mc.cores=12)
Map(function(indexA)  rlassoEffects(x=X, y=y, index = indexA,
method = "double selection", post=TRUE)
, indices)
listeffects =
Map(function(indexA)  rlassoEffects(x=X, y=y, index = indexA,
method = "double selection", post=TRUE)
, indices)
lasso.effect = unlist(listeffects, recursive = F)
lasso.effect
lasso.effect = unlist(listeffects, recursive = T)
lasso.effect
?lapply
lapply(listeffects, summary)
lasso.effect
lapply(listeffects, summary)
lasso.effect
lasso.effect = rlassoEffects(X, y, index=c(1,2,3,50))
lapply(listeffects, summary)
lasso.effect = rlassoEffects(X, y, index=c(1,2,3,50))
summary(lasso.effect)
indices = interactions.index[1:2]
listeffects12 =
Map(function(indexA)  rlassoEffects(x=X, y=y, index = indexA,
method = "double selection", post=TRUE), indices)
listeffects12
rlassoEffect()
?rlassoEffect
library(hdm); library(ggplot2)
set.seed(1)
n = 100 #sample size
p = 100 # number of variables
s = 3 # nubmer of non-zero variables
X = matrix(rnorm(n*p), ncol=p)
colnames(X) <- paste("X", 1:p, sep="")
beta = c(rep(3,s), rep(0,p-s))
y = 1 + X%*%beta + rnorm(n)
data = data.frame(cbind(y,X))
colnames(data)[1] <- "y"
fm = paste("y ~", paste(colnames(X), collapse="+"))
fm = as.formula(fm)
lasso.effect = rlassoEffects(X, y, index=c(1,2,3,50))
library(hdm); library(ggplot2)
set.seed(1)
n = 100 #sample size
p = 100 # number of variables
s = 3 # nubmer of non-zero variables
X = matrix(rnorm(n*p), ncol=p)
colnames(X) <- paste("X", 1:p, sep="")
beta = c(rep(3,s), rep(0,p-s))
y = 1 + X%*%beta + rnorm(n)
data = data.frame(cbind(y,X))
colnames(data)[1] <- "y"
fm = paste("y ~", paste(colnames(X), collapse="+"))
fm = as.formula(fm)
index1 = c(1,2,3,50)
lasso.effect = rlassoEffects(X, y, index=index1)
lasso.effect
str(lasso.effect)
?Mapo
?Map
lasso.effect1 = Map(function(indexA) rlassoEffects(X,y, index = indexA, method = "double selection", post=TRUE)),
index1)
# Map, one target per execution
lasso.effect1 = Map(function(indexA) rlassoEffects(X,y, index = indexA, method = "double selection", post=TRUE),
index1)
# All targets in one index
index1 = c(1,2,3,50)
lasso.effect = rlassoEffects(X, y, index=index1, method = "double selection", post = TRUE)
lasso.effect
# Map, one target per execution
lasso.effect1 = Map(function(indexA) rlassoEffects(X,y, index = indexA, method = "double selection", post=TRUE),
index1)
lasso.effect1
library(hdm); library(ggplot2)
set.seed(1)
n = 100 #sample size
p = 100 # number of variables
s = 3 # nubmer of non-zero variables
X = matrix(rnorm(n*p), ncol=p)
colnames(X) <- paste("X", 1:p, sep="")
beta = c(rep(3,s), rep(0,p-s))
y = 1 + X%*%beta + rnorm(n)
data = data.frame(cbind(y,X))
colnames(data)[1] <- "y"
fm = paste("y ~", paste(colnames(X), collapse="+"))
fm = as.formula(fm)
# All targets in one index
index1 = c(1,2,3,50)
lasso.effect = rlassoEffects(X, y, index=index1, method = "double selection", post = TRUE)
s1 = summary(lasso.effect)
# Map, one target per execution
lasso.effect1 = Map(function(indexA) rlassoEffects(X,y, index = indexA, method = "double selection", post=TRUE),
index1)
s2 = lapply(summary, lasso.effect1)
s2 = lapply(lasso.effect1, summary)
lapply(lasso)s1
s1
s2
identical(s[1], s2[[1]])
all.equal(s[1], s2[[1]])
s1
s2[[1]]
debugonce(rlassoEffects)
y
lasso.effect = rlassoEffects(X, y, index=index1, method = "double selection", post = TRUE)
rm(list=ls())
debugonce(rlassoEffect)
# All targets in one index
index1 = c(1,2,3,50)
lasso.effect = rlassoEffects(X, y, index=index1, method = "double selection", post = TRUE)
library(hdm); library(ggplot2)
set.seed(1)
n = 100 #sample size
p = 100 # number of variables
s = 3 # nubmer of non-zero variables
X = matrix(rnorm(n*p), ncol=p)
colnames(X) <- paste("X", 1:p, sep="")
beta = c(rep(3,s), rep(0,p-s))
y = 1 + X%*%beta + rnorm(n)
data = data.frame(cbind(y,X))
colnames(data)[1] <- "y"
fm = paste("y ~", paste(colnames(X), collapse="+"))
fm = as.formula(fm)
# All targets in one index
index1 = c(1,2,3,50)
lasso.effect = rlassoEffects(X, y, index=index1, method = "double selection", post = TRUE)
s1 = summary(lasso.effect)
library(hdm); library(ggplot2)
set.seed(1)
n = 100 #sample size
p = 100 # number of variables
s = 3 # nubmer of non-zero variables
X = matrix(rnorm(n*p), ncol=p)
colnames(X) <- paste("X", 1:p, sep="")
beta = c(rep(3,s), rep(0,p-s))
y = 1 + X%*%beta + rnorm(n)
data = data.frame(cbind(y,X))
colnames(data)[1] <- "y"
fm = paste("y ~", paste(colnames(X), collapse="+"))
fm = as.formula(fm)
# All targets in one index
index1 = c(1,2,3,4, 5, 6, 8, 10, 50)
lasso.effect = rlassoEffects(X, y, index=index1, method = "double selection", post = TRUE)
s1 = summary(lasso.effect)
# Map, one target per execution
lasso.effect1 = Map(function(indexA) rlassoEffects(X,y, index = indexA, method = "double selection", post=TRUE),
index1)
s2 = lapply(lasso.effect1, summary)
s1
s2
Reduce(s2)
Reduce(summary, s2)
Reduce(rlassoEffects, s2)
Reduce(summary, lasso.effect1)
index
index1
library(hdm); library(ggplot2)
set.seed(1)
n = 100 #sample size
p = 100 # number of variables
s = 3 # nubmer of non-zero variables
X = matrix(rnorm(n*p), ncol=p)
colnames(X) <- paste("X", 1:p, sep="")
beta = c(rep(3,s), rep(0,p-s))
y = 1 + X%*%beta + rnorm(n)
data = data.frame(cbind(y,X))
colnames(data)[1] <- "y"
fm = paste("y ~", paste(colnames(X), collapse="+"))
fm = as.formula(fm)
# All targets in one index
index1 = c(1,2,3,4, 5, 6, 8, 10, 50)
lasso.effect = rlassoEffects(X, y, index=index1, method = "double selection", post = TRUE)
s1 = summary(lasso.effect)
# Map, one target per execution
lasso.effect1 = Map(function(indexA) rlassoEffects(X,y, index = indexA, method = "double selection", post=TRUE),
index1)
s2 = lapply(lasso.effect1, summary)
# Map, 2 targets per execution
index2 = index1[1:2]
index3 = index1[3:4]
lasso.effect2 = Map(function(indexA) rlassoEffects(X,y, index = indexA, method = "double selection", post=TRUE),
index2)
lasso.effect3 = Map(function(indexA) rlassoEffects(X,y, index = indexA, method = "double selection", post=TRUE),
index3)
s3 = lapply(lasso.effect2, summary)
s4 = lapply(lasso.effect3, summary)
s1
s2
s3
s4
class(s1)
s1[1]
str(s2)
coef(s1)
coef(s2)
lapply(s2, coef)
lapply(lasso.effect1, coef)
unlist(lapply(lasso.effect1, coef))
coef(lasso.effect)
identical(unlist(lapply(lasso.effect1, coef)),  coef(lasso.effect))
# Check if estimates are identical
identical(unlist(lapply(lasso.effect1, coef)),  coef(lasso.effect))
# Map, 2 targets per execution
index2 = index1[1:2]
index3 = index1[3:4]
lasso.effect2 = Map(function(indexA) rlassoEffects(X,y, index = indexA, method = "double selection", post=TRUE),
index2)
lasso.effect3 = Map(function(indexA) rlassoEffects(X,y, index = indexA, method = "double selection", post=TRUE),
index3)
s3 = lapply(lasso.effect2, summary)
s4 = lapply(lasso.effect3, summary)
identical(unlist(lapply(lasso.effect1, coef))[1:2], unlist(lapply(lasso.effect2, coef))
)
library(hdm); library(ggplot2)
set.seed(1)
n = 100 #sample size
p = 100 # number of variables
s = 3 # nubmer of non-zero variables
X = matrix(rnorm(n*p), ncol=p)
colnames(X) <- paste("X", 1:p, sep="")
beta = c(rep(3,s), rep(0,p-s))
y = 1 + X%*%beta + rnorm(n)
data = data.frame(cbind(y,X))
colnames(data)[1] <- "y"
fm = paste("y ~", paste(colnames(X), collapse="+"))
fm = as.formula(fm)
# All targets in one index
index1 = c(1,2,3,4, 5, 6, 8, 10, 50)
lasso.effect = rlassoEffects(X, y, index=index1, method = "double selection", post = TRUE)
s1 = summary(lasso.effect)
# Map, one target per execution
lasso.effect1 = Map(function(indexA) rlassoEffects(X,y, index = indexA, method = "double selection", post=TRUE),
index1)
s2 = lapply(lasso.effect1, summary)
# Check if estimates are identical
identical(unlist(lapply(lasso.effect1, coef)),  coef(lasso.effect))
# Map, 2 targets per execution
index2 = index1[1:2]
index3 = index1[3:4]
lasso.effect2 = Map(function(indexA) rlassoEffects(X,y, index = indexA, method = "double selection", post=TRUE),
index2)
lasso.effect3 = Map(function(indexA) rlassoEffects(X,y, index = indexA, method = "double selection", post=TRUE),
index3)
s3 = lapply(lasso.effect2, summary)
s4 = lapply(lasso.effect3, summary)
# Check if estimates are identical
identical(unlist(lapply(lasso.effect1, coef))[1:2], unlist(lapply(lasso.effect2, coef)))
identical(unlist(lapply(lasso.effect1, coef))[3:4], unlist(lapply(lasso.effect3, coef)))
setwd()
setwd("C:/Users/PuD/Documents/Pippo/Promotion/DissundPapers/R Journal_ Simultaneous Inference with HDM/replication/siminf-code-and-supplement/code-supplement-simulation
")
setwd("C:/Users/PuD/Documents/Pippo/Promotion/DissundPapers/R Journal_ Simultaneous Inference with HDM/replication/siminf-code-and-supplement/code-supplement-simulation")
source("helpers_MERGE.R")
for (i in 2:8){
load(paste0(dir, "/results/simulation_server_60_200_R_5000_file_", i, ".RData"))
betasMERGE[j1:(j1+R1-1),] = betas[j1:(j1+R1-1),]
for (l in 1:length(psMERGE)){
psMERGE[[l]][j1:(j1+R1-1),] = ps[[l]][j1:(j1+R1-1),]
}
}
correj
# Helper functions for calculation of results (FWER, Number of false Rej etc.)
source("helpers.R")
