# Analyses for JESP paper
# Author: Asli Cansunar
# Last edited: Feb 12, 2020
############################
rm(list=ls())
# load packages
library(MASS)
#install.packages('repmis')
library(repmis)
library(tidyverse)
library(readr)
#install.packages('zoo')
library(zoo)
#install.packages('simpleSetup')
library(simpleSetup)
library(viridis)
library(scales)
#install.packages('plm')
library(plm)
library(stargazer)
library(readxl)
#install.packages('panelView')
library(panelView)
#install.packages('countrycode')
library(countrycode)
#install.packages('reshape')
library(reshape2) 
library(readstata13)
#install.packages("rmarkdown")
library(rmarkdown)
library(plyr)
install.packages('interactions')
library(interactions)
library(mgcv)
setwd("~/Dropbox/WEALTHPOL_Research/Papers/JESP/Data")
data <- read_excel("data_JESP.xlsx")
data<-data.frame(data)


data.oecd <- subset(data, Country=="Austria" | Country=="Belgium" |Country=="Czechia"|Country=="Denmark"|Country=="Estonia"|Country=="Finland"| Country=="France"|Country=="Germany"|Country=="Hungary"|Country=="Iceland"|Country=="Italy"|Country=="Latvia"|Country=="Norway"|Country=="Poland"|Country=="Portugal"|Country=="Slovakia"|Country=="Slovenia"|Country=="Spain"|Country=="Sweden"|Country=="Switzerland"|Country=="United Kingdom")

hco_bottom<-ggplot(data = data.oecd, mapping = aes(x = Year, y = firstq_hco, color = Country)) +labs(title = "Housing Cost Overburden Rate - Households in the Bottom Income Quintile",x = "Year",y="Housing cost overburden rate")+
ylim(0, 100)+geom_line()

hco_top<- ggplot(data = data.oecd, mapping = aes(x = Year, y = fifthq_hco, color = Country)) +labs(title = "Housing Cost Overburden Rate - Households in the Top Income Quintile",x = "Year",y="Housing cost overburden rate")+
  ylim(0, 100)+geom_line()

ownership_bottom<-ggplot(data = data.oecd, mapping = aes(x = Year, y = firstq_owner, color = Country)) +labs(title = "House Ownership Rate - Households in the Bottom Income Quintile",x = "Year",y="House ownership rate")+
  ylim(0, 100)+xlim(2010, 2018)+geom_line()

ownership_top<-ggplot(data = data.oecd, mapping = aes(x = Year, y = fifthq_owner, color = Country)) +labs(title = "House Ownership Rate - Households in the Top Income Quintile",x = "Year",y="House ownership rate")+
  ylim(0, 100)+xlim(2010, 2018)+geom_line()

#ggplot(data = data.oecd, aes(x = Year, y = fifthq_ownerMort)) + geom_point() +
 #stat_smooth(method = "lm", se = FALSE) + facet_wrap(~Country)

firstowner.long <- melt(data.oecd, id.vars = c("Year","Country"), measure = c("firstq_owner", "firstq_ownerMort"))
multi<-ggplot(data = firstowner.long, aes(x = Year, y = value, color = variable)) + geom_point() +geom_line() +
  scale_x_continuous(breaks=seq(2012, 2016, 4), limits=c(2010, 2018))+
labs(title = "House Ownership Rate - Households in the Bottom Income Quintile",x = "Year",y="Percentage")+
scale_colour_discrete(name="",labels=c("Owner, no outstanding mortgage", "Owner with mortgage")) +
facet_wrap(~Country)

topowner.long <- melt(data.oecd, id.vars = c("Year","Country"), measure = c("fifthq_owner", "fifthq_ownerMort"))
multi2<-ggplot(data = topowner.long, aes(x = Year, y = value, color = variable)) + geom_point() +geom_line() +
  scale_x_continuous(breaks=seq(2012, 2016, 4), limits=c(2010, 2018))+
  labs(title = "House Ownership Rate - Households in the Top Income Quintile",x = "Year",y="Percentage")+
  scale_colour_discrete(name="",labels=c("Owner, no outstanding mortgage", "Owner with mortgage")) +
  facet_wrap(~Country)


###Housing cost overburden rate, by income quintile
hco.long <- melt(data.oecd, id.vars = c("Year","Country"), measure = c("firstq_hco", "secondq_hco", "thirdq_hco", "fourthq_hco","fifthq_hco"))
hco_quintile<-ggplot(data = hco.long, aes(x = Year, y = value, color = variable)) + geom_point() +geom_line() +
  scale_x_continuous(breaks=seq(2012, 2016, 4), limits=c(2010, 2018))+
  labs(title = "Housing Cost Overburden Ratio",x = "Year",y="Percentage")+
  scale_colour_discrete(name="",labels=c("Households in the bottom income quintile","Households in the second income quintile", "Households in the third income quintile","Households in the fourth income quintile","Households in the top income quintile")) +
  facet_wrap(~Country)


###Ownership rates, by income quintile
owner.long <- melt(data.oecd, id.vars = c("Year","Country"), measure = c("firstq_owner", "secondq_owner", "thirdq_owner", "fourthq_owner","fifthq_owner"))
ownership_quintile<-ggplot(data = owner.long, aes(x = Year, y = value, color = variable)) + geom_point() +geom_line() +
  scale_x_continuous(breaks=seq(2012, 2016, 4), limits=c(2010, 2018))+
  labs(title = "House Ownership Rates by Income Quintiles",x = "Year",y="Percentage")+
  scale_colour_discrete(name="",labels=c("Households in the bottom income quintile","Households in the second income quintile", "Households in the third income quintile","Households in the fourth income quintile","Households in the top income quintile")) +
  facet_wrap(~Country)

###Ownership rates with and without outstanding mortgage, by income quintile
data.oecd$firstq_ownerNM<- data.oecd$firstq_owner - data.oecd$firstq_ownerMort
data.oecd$secondq_ownerNM<- data.oecd$secondq_owner - data.oecd$secondq_ownerMort
data.oecd$thirdq_ownerNM<- data.oecd$thirdq_owner - data.oecd$thirdq_ownerMort
data.oecd$fourthq_ownerNM<- data.oecd$fourthq_owner - data.oecd$fourthq_ownerMort
data.oecd$fifthq_ownerNM<- data.oecd$fifthq_owner - data.oecd$fifthq_ownerMort

ownerNM.long <- melt(data.oecd, id.vars = c("Year","Country"), measure = c("firstq_ownerNM", "secondq_ownerNM", "thirdq_ownerNM", "fourthq_ownerNM","fifthq_ownerNM"))
ggplot(data = ownerNM.long, aes(x = Year, y = value, color = variable)) + geom_point() +geom_line() +
  scale_x_continuous(breaks=seq(2012, 2016, 4), limits=c(2010, 2018))+
  labs(title = "Rates of House Ownership with No Outstanding Mortgages by Income Quintiles",x = "Year",y="Percentage")+
  scale_colour_discrete(name="",labels=c("Households in the bottom income quintile","Households in the second income quintile", "Households in the third income quintile","Households in the fourth income quintile","Households in the top income quintile")) +
  facet_wrap(~Country)


###ISSP Analysis
issp<- read.dta13("issp2009.dta", generate.factors=T,convert.factors=T)
issp.data<- subset(issp, v5=="AT-Austria" | v5=="BE-Belgium" |v5=="CZ-Czechia"|v5=="DK-Denmark"|v5=="EE-Estonia"|v5=="FI-Finland"| v5=="FR-France"|v5=="DE-Germany"|v5=="HU-Hungary"|v5=="IS-Iceland"|v5=="IT-Italy"|v5=="LV-Latvia"|v5=="NO-Norway"|v5=="PL-Poland"|v5=="PT-Portugal"|v5=="SK-Slovakia"|v5=="SI-Slovenia"|v5=="ES-Spain"|v5=="SE-Sweden"|v5=="CH-Switzerland"|v5=="GB-GBN-Great Britain and/or United Kingdom")
data.oecd$c_alphan<-countrycode(data.oecd$Country,origin='country.name', destination='iso2c')
data.oecd<-subset(data.oecd,Year==2010)
data.oecd$ineq.ownershipNM<-data.oecd$fifthq_ownerNM/data.oecd$firstq_ownerNM
data.oecd$ineq.hco<-data.oecd$fifthq_hco/data.oecd$firstq_hco
library("readr")
write_csv(data.oecd, path = "oecd.csv")

ggplot(data = data.oecd, aes(x = Country, y = ineq.ownershipNM)) + geom_point() 
ggplot(data = data.oecd, aes(x = Country, y = ineq.hco)) + geom_point() 

issp.data$C_ALPHAN<-issp.data$c_alphan
total <- merge(issp.data,data.oecd,by="C_ALPHAN")
total<-as.data.frame(total)

total$redistribution<-as.numeric(total$v33)
total$redistribution<- mapvalues(total$redistribution, from = c("1", "2","3","4","5"), to = c("5", "4","3","2","1"))

total$age<-as.numeric(total$age)
total$sex<-as.numeric(total$sex)
total$educyrs<-as.numeric(total$educyrs)

m1<-lm(redistribution ~ income*ineq.ownershipNM+educyrs+sex+age+v5-1, data=total)
summary(m1)
interact_plot(m1, pred = income, modx = ineq.ownershipNM)

mreg1 <- gam(redistribution ~ income+ s(ln_dist_ceo, k=5) + s(ln_dist_worker)+ female+ church+ college+ tunion +unempl+ empl_part+ retired+ married+ age_res+ right+S1r +s(ccode,bs="re"), 
             data = mydata, method="GCV.Cp")
summary(mreg1)