###############################################################################
#Deciles de ingreso de la actividad principal y la media de cada decil
#Datos Casen 2017
###############################################################################
#Dependencias
if (!require('haven')) install.packages('haven'); library('haven')
if (!require('Hmisc')) install.packages('Hmisc'); library('Hmisc')
if (!require('ggplot2')) install.packages('ggplot2'); library('ggplot2')
#Carga de datos
casen_folder <- file.path(getwd(), "Cornelious")
dir.create(casen_folder)
setwd(casen_folder)
casen_url <- "http://observatorio.ministeriodesarrollosocial.gob.cl/casen-multidimensional/casen/docs/Casen_2017_stata.dta.zip"
casen_dest <- "Casen2017.zip"
download.file(casen_url, casen_dest)
unzip(casen_dest)
casen_path <- "Casen 2017.dta"
cas17 <- read_dta(casen_path)
#Eres linda Sailor!
#Calcula deciles y weighted.mean de cada decil, a mano!
cas17ss <- cas17[!is.na(cas17$yoprcor) , c("yoprcor", "expr")]
c17s <- cas17ss[order(cas17ss$yoprcor),]
c17s$sumProd <- c17s$yoprcor*c17s$expr
c17s$exprCosum <- cumsum(c17s$expr)
c17s$sumProdCosum <- cumsum(c17s$sumProd)
totExpr <- sum(c17s$expr)
stepExpr <- totExpr / 10
deciles <- data.frame(decil = numeric(),
indi = numeric(),
expr = numeric(),
sumProd = numeric(),
salStart = numeric(),
salEnd = numeric())
prevIndi <- 1
salStart <- c17s$yoprcor[1]
sumProdPercAgreAnterior <- 0
for (i in 1:10) {
exprPerc <- stepExpr * i
indi <- findInterval(exprPerc, c17s$exprCosum)
sumProdPercCosum <- c17s$sumProdCosum[indi]
if (i < 10)
{
factor <- (exprPerc-c17s$exprCosum[indi])/(c17s$exprCosum[indi+1]-c17s$exprCosum[indi])
sumProdPerc <- sumProdPercCosum + factor * c17s$sumProd[indi+1] - sumProdPercAgreAnterior
}
else
{
sumProdPerc <- sumProdPercCosum - sumProdPercAgreAnterior
}
salEnd <- c17s$yoprcor[indi]
deciles[nrow(deciles) + 1, ] <- c(i, indi, exprPerc, sumProdPerc, salStart, salEnd)
prevIndi <- indi + 1
sumProdPercAgreAnterior <- sumProdPercCosum + factor * c17s$sumProd[indi+1]
salStart <- salEnd
}
deciles$media <- deciles$sumProd/stepExpr
options(scipen = 999)
# Media
weighted.mean(cas17$yoprcor, cas17$expr, na.rm=TRUE)
#Deciles
deciles[,c(1,5,6,7)]
#Deciles segun r
#wtd.quantile(c17s$yoprcor, p = seq(0, 1, length = 11), na.rm = TRUE, weight=c17s$expr, type = "i/n")
#Grafica la media de los deciles
p <- ggplot(deciles, aes(x=decil, y=media)) + geom_bar(stat="identity")
p
#Bubalon es un buen antroniano
# Clean up, elimina folder Cornelious
setwd("..")
unlink(casen_folder, recursive = TRUE)