# Create Third YouGov UK Survey Dataset Nov 11 2022 BA #

library(tidyverse)
library(haven)
library(tidylog)
library(janitor)



# Import main datasets ----------------------------------------------------


# using SPSS as main input dataset to keep labels
#  BA 28/7/21 changing file paths to work for all users - Adding constituency level data and constituency data for panel

yg_dat <-read_sav("~/Dropbox/WEALTHPOL_Research/Papers/uk_survey_experiments/UK_third_survey_experiment/yg_data/UniversityofOxford_InheritenceTax_October2022_Client.sav")

yg_dat <- zap_formats(yg_dat)

yg_dat <- zap_widths(yg_dat)

extra_dat <-read_sav("~/Dropbox/WEALTHPOL_Research/Papers/uk_survey_experiments/UK_third_survey_experiment/yg_data/UniversityofOxford_InheritenceTax_October2022_ExtraDemogs.sav")

extra_dat <- zap_formats(extra_dat)

extra_dat <- zap_widths(extra_dat)

yg_dat <- yg_dat %>% 
  left_join(extra_dat, by = "ID")

eth_dat <-read_sav("~/Dropbox/WEALTHPOL_Research/Papers/uk_survey_experiments/UK_third_survey_experiment/yg_data/UniversityofOxford_InheritenceTax_October2022_Ethnicity.sav")

eth_dat <- zap_formats(eth_dat)

eth_dat <- zap_widths(eth_dat)

yg_dat <- yg_dat %>% 
  left_join(eth_dat, by = "ID")

cons_data <- read_csv("~/Dropbox/WEALTHPOL_Research/Papers/uk_survey_experiments/yg_data/constituency_data_yg.csv")

yg_dat <- yg_dat %>% 
  left_join(cons_data, by = "profile_pcon_2010_constituency_Cal")

#write_csv(yg_dat_cons, "~/Dropbox/WEALTHPOL_Research/Papers/uk_survey_experiments/yg_data/yg_data_par_cons.csv")

# Using csvs instead

#yg_dat <-read_csv("~/Dropbox/WEALTHPOL_Research/Papers/uk_survey_experiments/yg_data/OxfordUni_WealthInequality_June2021_Client.csv")
#yg_dat_cons <- read_csv("~/Dropbox/WEALTHPOL_Research/Papers/uk_survey_experiments/yg_data/yg_data_par_cons.csv")


# Recoding 

# Don't Knows coded as NAs 
# All number answers coded as integers
# Nominal answers coded as characters

# Adding label variables for demographics ---------------------------------


# Adding Codes for Sector, Region, Ethnicity, Occupation, HH Income, Tenure


ed_codes <- data.frame(
  stringsAsFactors = FALSE,
  check.names = FALSE,
  profile_education_level = as.numeric(c("1",
           "2","3","4","5","6","7","8","9","10",
           "11","12","13","14","15","16","17","18",
           "19","20","98","99")),
  education_code = c("No formal qualifications",
                "Youth training certificate/skillseekers",
                "Recognised trade apprenticeship completed","Clerical and commercial",
                "City & Guilds certificate",
                "City & Guilds certificate - advanced","ONC",
                "CSE grades 2-5",
                "CSE grade 1, GCE O level, GCSE, School Certificate",
                "Scottish Ordinary/ Lower Certificate","GCE A level or Higher Certificate",
                "Scottish Higher Certificate",
                "Nursing qualification (e.g. SEN, SRN, SCM, RGN)",
                "Teaching qualification (not degree)",
                "University diploma",
                "University or CNAA first degree (e.g. BA, B.Sc, B.Ed)",
                "University or CNAA higher degree (e.g. M.Sc, Ph.D)",
                "Other technical, professional or higher qualification",
                "Don't know","Prefer not to say","Skipped",
                "Not Asked")
)

sector_codes <- data.frame(
  stringsAsFactors = FALSE,
  employer_type_private_non_self = c("1","2","3","4","5","6",
           "7","8","9","10","11","12","13","14","15","16",
           "17","18","19","20","21","22","23","24","25","26",
           "27","28","29","30","31","32","33","34","97",
           "99","998","999"),
  sector_private = c("Agriculture","Natural resources",
             "Construction",
             "Manufacturing: Automobiles ","Manufacturing: Heavy industry",
             "Manufacturing: Technology",
             "Manufacturing: Other","Wholesaling",
             "Media and communications","Financial Services: Banking",
             "Financial Services: Insurance",
             "Financial Services: Brokerage & Investment",
             "Business services",
             "Real estate","Scientific research","Technology services",
             "Engineering, design, or architecture",
             "Legal services","Personal care","Education",
             "Health","Arts, entertainment, and recreation",
             "Restaurant services","Hotels or lodging",
             "Airlines or air travel","Other travel","Retail",
             "Transportation","Maintenance and repair","Accounting",
             "Financial Services: Other",
             "Leisure",
             "Tourism ",
             "Utilities and energy","Other","Not applicable","Skipped",
             "Not Asked")
)

sector_codes <- sector_codes %>%
  mutate(employer_type_private_non_self=as.numeric(employer_type_private_non_self))

region_codes<- data.frame(
  stringsAsFactors = FALSE,
       check.names = FALSE,
            profile_GOR = c("1","2","3","4","5","6",
                       "7","8","9","10","11","12","13","98","99"),
         region_name = c("North East","North West",
                       "Yorkshire and the Humber","East Midlands","West Midlands",
                       "East of England","London","South East","South West",
                       "Wales","Scotland","Northern Ireland",
                       "Non UK & Invalid","Skipped","Not Asked")
)

region_codes <- region_codes %>%
  mutate(profile_GOR = as.numeric(profile_GOR))

ethnicity_codes <- data.frame(
  stringsAsFactors = FALSE,
       check.names = FALSE,
       Oethnicity_new = as.numeric(c("1","2","3","4","5","6",
                                          "7","8","9","10","11","12","13",
                                          "14","15","16","17","18","19",
                                          "98","99")),
       ethnicity_name = c("White British","Irish",
                          "Gypsy or Irish Traveller",
                           "Other White",
                           "White and Black Caribbean","White and Black African",
                           "White and Asian",
                           "Other Mixed","Indian",
                           "Pakistani","Bangladeshi","Chinese",
                           "Other Asian ","African",
                           "Caribbean",
                           "Other Black","Arab",
                           "Any other ethnic group",
                           "Prefer not to say","Skipped","Not Asked")
                   )

work_stat_codes <- data.frame(
  stringsAsFactors = FALSE,
       check.names = FALSE,
        profile_work_stat = as.numeric(c("1","2","3","4","5","6",
                                          "7","8","98","99")),
       work_status = c("Full time",
                       "Part time (8-29 hrs)",
                       "Part time (<8 hrs)",
                        "Student","Retired","Unemployed",
                        "Not working","Other","Skipped",
                         "Not Asked")
                   )

occ_codes <- data.frame(
  stringsAsFactors = FALSE,
       check.names = FALSE,
  profile_work_type = as.numeric(c("1",
            "2","3","4","5","6","7","8","9",
            "98","99")),
  occ_type = c("Professional",
                "Manager",
                "Junior managerial",
                "Sales or Services ",
                "Supervisor",
                "Skilled Manual Work",
                "Low-skilled Manual Work", "Other", "Never Worked",
                "Skipped","Not Asked")
             )

industry_codes <- data.frame(
  stringsAsFactors = FALSE,
  check.names = FALSE,
  profile_work_industry = as.numeric(c("1","2","3","4","5","6","7",
            "8","9","10","11","12","13","14",
            "15","16","17","18","19","20",
            "21","98","99")),
  Not.Asked = c("Advertising/Marketing/PR",
                "Aerospace",
                "Agriculture/Chemicals/Forest Products","Automotive",
                "Computers/Electronics","Construction",
                "Consumer Goods","Education","Energy/Mining",
                "Finance/Insurance/Real Estate",
                "Government/Military/Public Service",
                "Hospitality/Recreation",
                "Media/Publishing/Entertainment","Medical/Health Services",
                "Pharmaceuticals","Retail","Service",
                "Telecommunications/Networking",
                "Travel/Transportation","Other",
                "Have never worked","Skipped","Not Asked")
)

marital_codes <- data.frame(
  stringsAsFactors = FALSE,
  check.names = FALSE,
  profile_marital_stat = as.numeric(c("1","2","3","4","5","6","7",
            "8","9")),
  marital_status= c("Married","Living as married",
                "Separated (after being married)","Divorced",
                "Widowed","Never married",
                "Civil Partnership","Skipped","Not Asked")
)

housing_codes <- data.frame(
  stringsAsFactors = FALSE,
  check.names = FALSE,
  profile_house_tenure = as.numeric(c("1","2","3","4","5","6","7",
           "8","9","98","99")),
  house_tenure = c("Own outright",
                "Own mortgage",
                "Shared Ownership",
                "Rent: private","Rent:LA",
                "Rent:HA",
                "Live with others: pay",
                "Live with others: free","Other",
                "Skipped","Not Asked")
)
  
hh_income_codes <- data.frame(
  stringsAsFactors = FALSE,
  check.names = FALSE,
  profile_gross_household = as.numeric(c("1","2","3","4","5","6",
            "7","8","9","10","11","12","13",
            "14","15","16","17","98","99")),
  hh_income_code = c("under £5,000 per year",
                "£5,000 to £9,999 per year",
                "£10,000 to £14,999 per year",
                "£15,000 to £19,999 per year","£20,000 to £24,999 per year",
                "£25,000 to £29,999 per year",
                "£30,000 to £34,999 per year",
                "£35,000 to £39,999 per year",
                "£40,000 to £44,999 per year",
                "£45,000 to £49,999 per year","£50,000 to £59,999 per year",
                "£60,000 to £69,999 per year",
                "£70,000 to £99,999 per year",
                "£100,000 to £149,999 per year","£150,000 and over",
                "Don't know","Prefer not to answer",
                "Skipped","Not Asked")
)

pers_inc_codes <- data.frame(
  stringsAsFactors = FALSE,
  check.names = FALSE,
  profile_gross_personal = as.numeric(c("1","2","3","4","5","6","7",
            "8","9","10","11","12","13","14",
            "15","16","98","99")),
  pers_income_code = c("under £5,000 per year",
                "£5,000 to £9,999 per year",
                "£10,000 to £14,999 per year",
                "£15,000 to £19,999 per year","£20,000 to £24,999 per year",
                "£25,000 to £29,999 per year",
                "£30,000 to £34,999 per year",
                "£35,000 to £39,999 per year",
                "£40,000 to £44,999 per year","£45,000 to £49,999 per year",
                "£50,000 to £59,999 per year",
                "£60,000 to £69,999 per year",
                "£70,000 to £99,999 per year","£100,000 and over",
                "Don't know","Prefer not to answer","Skipped",
                "Not Asked")
)

# Currently missing PDLs commented out BA 14/11/22

yg_dat <- yg_dat %>% 
  left_join(ed_codes, by = "profile_education_level")
# Need to get from YG
yg_dat <- yg_dat %>%
  left_join(sector_codes, by = "employer_type_private_non_self")
# 
yg_dat <- yg_dat %>%
  left_join(region_codes, by = "profile_GOR")
# 
# yg_dat <- yg_dat %>% 
#   left_join(ethnicity_codes, by = "Oethnicity_new")

yg_dat <- yg_dat %>% 
  left_join(work_stat_codes, by = "profile_work_stat")

yg_dat <- yg_dat %>% 
  left_join(occ_codes, by = "profile_work_type")

yg_dat <- yg_dat %>%
  left_join(marital_codes, by = "profile_marital_stat")

yg_dat <- yg_dat %>% 
  left_join(housing_codes, by = "profile_house_tenure")

yg_dat <- yg_dat %>% 
  left_join(hh_income_codes, by = "profile_gross_household")

yg_dat <- yg_dat %>% 
  left_join(pers_inc_codes, by = "profile_gross_personal")

rm(ed_codes, ethnicity_codes, hh_income_codes, housing_codes, industry_codes, marital_codes, occ_codes,
   pers_inc_codes, region_codes, sector_codes, work_stat_codes)





# Adding New Variables for Demographics -----------------------------------



# Create New Variables with DK's removed etc  for DEMOGRAPHICS

yg_dat <- yg_dat %>% 
  mutate(turnout_ge_2019 = case_when(voted_ge_2019 == 1L ~ 1L,
                                   voted_ge_2019 == 2L ~ 0L,
                                   TRUE ~ NA_integer_),
         ge_2019 = case_when(pastvote_ge_2019 == 1L ~ "Conservative",
                                      pastvote_ge_2019 == 2L ~ "Labour",
                                      pastvote_ge_2019 == 3L ~ "Liberal Democrat",
                                      pastvote_ge_2019 == 4L ~ "SNP",
                                      pastvote_ge_2019 == 5L ~ "Plaid Cymru",
                                      pastvote_ge_2019 == 6L ~ "Brexit Party",
                                      pastvote_ge_2019 == 7L ~ "Green",
                                      pastvote_ge_2019 == 98 ~ "Other",
                                      TRUE ~ NA_character_),
         EURef = case_when(pastvote_EURef == 1L ~ "Remain",
                                    pastvote_EURef == 2L ~ "Leave",
                                    pastvote_EURef == 3L ~ "Did Not Vote",
                                    pastvote_EURef == 4L ~ "Can't Remember",
                                    TRUE ~ NA_character_),
         voted_leave = case_when(EURef == "Remain" ~ 0L, 
                                 EURef == "Leave" ~ 1L,
                                 EURef == "Did Not Vote" ~0L,
                                 TRUE ~ NA_integer_),
         voted_remain = case_when(EURef == "Remain" ~ 1L, 
                                 EURef == "Leave" ~ 0L,
                                 EURef == "Did Not Vote" ~0L,
                                 TRUE ~ NA_integer_),
         ref_didnt_vote = case_when(EURef == "Remain" ~ 0L, 
                                  EURef == "Leave" ~ 0L,
                                  EURef == "Did Not Vote" ~1L,
                                  TRUE ~ NA_integer_),
         gender = case_when(profile_gender == 1L ~ "Male",
                                    profile_gender == 2L ~ "Female",
                                    TRUE ~ NA_character_),
         social_grade_no = case_when (socgrade4 < 7 ~ 5 - socgrade4,
                                       TRUE ~ NA_real_),
         socialgrade_cie = case_when(socgrade4 == 1L ~ "AB",
                                     socgrade4 == 2L ~ "C1",
                                     socgrade4 == 3L ~ "C2",
                                     socgrade4 == 4L ~ "DE",
                                     TRUE ~ NA_character_),
         UK_citizen = case_when(citizenship < 3 ~ 1,
                                citizenship == 3 ~ 0,
                                TRUE ~NA_real_),
         citizenship_code = case_when(citizenship == 1 ~ "UK Citizen",
                                 citizenship == 2 ~ "Dual National",
                                 citizenship == 3 ~ "Non-Citizen",
                                 TRUE ~ NA_character_),
         education_no = case_when(profile_education_level <19 ~ profile_education_level,
                                  profile_education_level> 18 ~ NA_real_),
         education_short = case_when(profile_education_level == 1 ~ 1,
                                     profile_education_level %in% 2:10 ~ 2,
                                     profile_education_level %in% 11:12 ~ 3,
                                     profile_education_level %in% 13:14 ~ 3,
                                     profile_education_level %in% 15:16 ~ 4,
                                     profile_education_level ==17 ~ 5,
                                     profile_education_level ==18 ~ 4,
                                     profile_education_level > 18 ~NA_real_),
         education_short_lab = case_when(profile_education_level == 1 ~ "1. No Qualifications",
                                     profile_education_level %in% 2:10 ~ "2. GCSEs / O-Levels",
                                     profile_education_level %in% 11:12 ~ "3. A-Levels or equivalent",
                                     profile_education_level %in% 13:14 ~ "3. A-Levels or equivalent",
                                     profile_education_level %in% 15:16 ~ "4. Undergraduate",
                                     profile_education_level ==17 ~ "5. Postgraduate",
                                     profile_education_level ==18 ~ "4. Undergraduate",
                                     profile_education_level > 18 ~NA_character_),
         degree = case_when(profile_education_level %in% 15:18 ~ 1,
                            profile_education_level <15 ~ 0,
                            TRUE ~NA_real_),
         homeowner = case_when(profile_house_tenure < 4 ~ 1,
                               profile_house_tenure %in% 4:9 ~ 0,
                               TRUE ~NA_real_),
         renter = case_when(profile_house_tenure < 4 ~ 0,
                            profile_house_tenure %in% 4:6 ~ 1,
                            profile_house_tenure %in% 7:9 ~0,
                            TRUE ~NA_real_), 
         parent_homeowner = case_when(Q26 < 4 ~ 1,
                                      Q26 %in% 4:9 ~ 0,
                               TRUE ~NA_real_),
         parent_renter = case_when(Q26 < 4 ~ 0,
                                   Q26 %in% 4:6 ~ 1,
                                   Q26 %in% 7:9 ~0,
                            TRUE ~NA_real_), 
         household_size = case_when(profile_household_size < 9 ~ profile_household_size,
                                    TRUE ~ NA_real_),
         hh_income = case_when(profile_gross_household<16 ~ profile_gross_household,
                               TRUE ~ NA_real_),
         pers_income = case_when(profile_gross_personal<15 ~ profile_gross_personal,
                               TRUE ~ NA_real_)
         
         ) 

# yg_dat %>% 
#   group_by(age) %>% 
#   count() %>% View()


#write_csv(yg_dat, "/Users/madselkjaer/Dropbox/WEALTHPOL_Research/Papers/uk_survey_experiments/yg_data/yg_data_for_analysis.csv")


# ADDING LOCAL AUTHORITY INFORMATION - Fully matched - median prices for August 2022 - also adds LA Names and codes

yg_la_codes <- read_csv("~/Dropbox/WEALTHPOL_Research/Papers/uk_survey_experiments/UK_third_survey_experiment/Wealth and income data/yg_la_codes.csv")

la_house_price_data <- read_csv("~/Dropbox/WEALTHPOL_Research/Papers/uk_survey_experiments/UK_third_survey_experiment/Wealth and income data/la_median_prices_8_22")

la_house_price_data <- la_house_price_data %>% 
  rename(la_name = RegionName) %>% 
  mutate(la_name = case_when(la_name=="City of Westminster" ~ "Westminster",
                   la_name=="St Helens" ~ "St. Helens",
                   la_name=="City of Kingston upon Hull" ~ "Kingston upon Hull, City of",
                   la_name=="City of Derby" ~ "Derby",
                   la_name=="City of Nottingham" ~ "Nottingham",
                   la_name=="Herefordshire" ~ "Herefordshire, County of",
                   la_name=="City of Bristol" ~ "Bristol, City of",
                   la_name=="City of Plymouth" ~ "Plymouth",
                   la_name=="City of Peterborough" ~ "Peterborough",
                   la_name=="Vale of Glamorgan" ~ "The Vale of Glamorgan",
                   la_name=="Rhondda Cynon Taf" ~ "Rhondda, Cynon, Taff",
                   la_name=="City of Aberdeen" ~ "Aberdeen City",
                   la_name=="Argyll and Bute" ~ "Argyll & Bute",
                   la_name=="Dumfries and Galloway" ~ "Dumfries & Galloway",
                   la_name=="City of Dundee" ~ "Dundee City",
                   la_name=="City of Edinburgh" ~ "Edinburgh, City of",
                   la_name=="Na h-Eileanan Siar" ~ "Eilean Siar",
                   la_name=="Folkestone and Hythe" ~ "Folkestone & Hythe",
                   la_name=="Armagh City Banbridge and Craigavon" ~ "Armagh, Banbridge and Craigavon",
                   la_name=="Derry City and Strabane" ~ "Derry and Strabane",
                   la_name=="Newry Mourne and Down" ~ "Newry, Mourne and Down",
                   la_name=="Ards and North Down" ~ "North Down and Ards",
                   la_name=="City of Glasgow" ~ "Glasgow City",
                   la_name=="Bournemouth Christchurch and Poole" ~ "Bournemouth, Christchurch and Poole",
                   TRUE~ la_name
                   )
  )

yg_la_codes <- yg_la_codes %>% 
  left_join(la_house_price_data, by = "la_name") %>% 
  select(-Date) %>% 
  mutate(pdl_profile_oslaua = as.numeric(pdl_profile_oslaua))



yg_dat <- yg_dat %>% 
  left_join(yg_la_codes, by = "pdl_profile_oslaua")

rm(yg_la_codes, la_house_price_data)

## Adding in Local Authority Pay - Same here - No NI and a couple of years old so could be updated

la_pay <- read_csv("~/Dropbox/WEALTHPOL_Research/Papers/uk_survey_experiments/labor_mkt_data/la_pay_long.csv")

la_pay <- la_pay %>% 
  dplyr::select(-(la_name)) %>% 
  filter(quantile == 50) %>% 
  select(-quantile)



yg_dat <- yg_dat %>% 
  left_join(la_pay, by= "la_code")


rm(la_pay)
# Recoding Survey Questions -----------------------------------------------

# Remove all DKs and Skipped  - code as NA. 
# Coding is set-up so agreement is higher number except where noted

yg_dat <-  yg_dat %>% 
  mutate(
    # Where using Likert scales these are coded so agreement is higher number
    # unless noted otherwise
    vote_choice = case_when(Q9 == 1 ~ "Conservative",
                            Q9 == 2 ~ "Labour",
                            Q9 == 3 ~ "Liberal Democrats",
                            Q9 == 4 ~ "SNP",
                            Q9 == 5 ~ "Plaid Cymru",
                            Q9 == 6 ~ "Reform UK",
                            Q9 == 7 ~ "Green",
                            Q9 == 8 ~ "Other",
                            TRUE ~ NA_character_),
    # For authoritarianism, higher numbers mean more authoritarian
    soc_att_death = case_when(Q10_1 < 8 ~ Q10_1,
                              TRUE ~ NA_real_),
    soc_att_rich = case_when(Q10_2 < 8 ~  Q10_2,
                             TRUE ~ NA_real_),
    soc_att_fairshare = case_when(Q10_3 < 8 ~ 6 - Q10_3,
                                  TRUE ~ NA_real_),
    soc_att_respect = case_when(Q10_4 < 8 ~  Q10_4,
                                TRUE ~ NA_real_),
    soc_att_children = case_when(Q10_5 < 8 ~ 6 - Q10_5,
                                 TRUE ~ NA_real_),
    soc_att_people = case_when(Q10_6 < 8 ~  Q10_6,
                               TRUE ~ NA_real_),
    
    authoritarian = soc_att_children+soc_att_death+soc_att_respect,
    
    populism = soc_att_fairshare+soc_att_rich+soc_att_people,
    
    populism_econ = soc_att_fairshare+soc_att_rich,
    
    # Need to hand-code years lived

    
    years_lived  = case_when(!is.na(Q13aa_other) ~ parse_number(Q13aa_other),
                             TRUE ~ NA_real_),
    
    years_lived  = if_else(str_detect (Q13aa_other, "month") == TRUE, years_lived/12, years_lived),
    
    years_lived = case_when(Q13aa_other == "Year and a half" ~ 1.5,
                            Q13aa_other == "Two years" ~ 2,
                            Q13aa_other == "Two" ~ 2,
                            Q13aa_other == "Three years" ~ 3,
                            Q13aa_other == "Thirty plus" ~ 30,
                            Q13aa_other == "Ten" ~ 10,
                            Q13aa_other == "Six" ~ 6,
                            Q13aa_other == "One" ~ 1,
                            Q13aa_other == "Nine years" ~ 9,
                            Q13aa_other == "Nine" ~ 9,
                            Q13aa_other == "Less than a year" ~ 0.5,
                            Q13aa_other == "Kidlington Oxford - 68 years" ~ 68,
                             Q13aa_other == "From birth" ~ as.double(age),
                            Q13aa_other == "Four" ~ 4,
                            Q13aa_other == "Five years" ~ 5,
                            Q13aa_other == "Five" ~ 5,
                            Q13aa_other == "Fifty" ~ 50,
                            Q13aa_other == "Eighteen Months" ~ 1.5,
                            Q13aa_other == "All of my life" ~ as.double(age),
                            Q13aa_other == "All my life" ~ as.double(age),
                            Q13aa_other == "all my life" ~ as.double(age),
                            TRUE ~ years_lived
                            ),
                      
                             
    
  
    # Internal Efficacy same so higher means more efficacy (i.e. disagree)
    internal_efficacy = case_when(Q45_first_1 <8 ~ Q45_first_1,
                                  Q45_second_1 <8 ~ Q45_second_1,
                                  TRUE ~ NA_real_),
    # External Efficacy Kep Same so Higher Means more efficacy (i.e. disagree)
    efficacy_say= case_when(Q45_first_2 <8 ~  Q45_first_2,
                            Q45_second_2 <8 ~  Q45_second_2,
                            TRUE ~ NA_real_),
    
    efficacy_officals = case_when(Q45_first_3 <8 ~  Q45_first_3,
                                  Q45_second_3 <8 ~ Q45_second_3,
                                  TRUE ~ NA_real_),
    
    efficacy_treatment_later = case_when(split_sample == 1 ~ 0 ,
                                         split_sample == 2 ~ 1),
    
    # Pension security rising in confidence
    pension_security = case_when(Q28 == 1 ~ 4,
                                 Q28 == 2 ~ 3,
                                 Q28 == 3 ~ 2,
                                 Q28 == 4 ~ 1,
                                 TRUE ~ NA_real_),
  
    # Perceptions rising in wealth
    perception_hh_inc = case_when(Q29_1 == 1 ~ 1,
                                  Q29_1 == 2 ~ 2,
                                  Q29_1 == 3 ~ 3,
                                  Q29_1 == 4 ~ 4,
                                  Q29_1 == 5 ~ 5,
                                  TRUE ~ NA_real_),
    perception_hh_wea = case_when(Q29_2 == 1 ~ 1,
                                  Q29_2 == 2 ~ 2,
                                  Q29_2 == 3 ~ 3,
                                  Q29_2 == 4 ~ 4,
                                  Q29_2 == 5 ~ 5,
                                  TRUE ~ NA_real_),        
    perception_val_hou = case_when(Q29_3 == 1 ~ 1,
                                   Q29_3 == 2 ~ 2,
                                   Q29_3 == 3 ~ 3,
                                   Q29_3 == 4 ~ 4,
                                   Q29_3 == 5 ~ 5,
                                   TRUE ~ NA_real_),
    
    # Comprehension Checks - Basic
    
    info_treatment = case_when(q_seen_questions == 1 ~ "Control",
                               q_seen_questions == 2 ~ "Both Wealth and Income",
                               q_seen_questions == 3 ~ "Wealth Only",
                               q_seen_questions == 4 ~ "Income Only"),
    
    equal_dist = case_when(Q31 == 1 ~"10%",
                           Q31 == 2 ~ "20%",
                           Q31 == 3 ~ "90%",
                           TRUE ~ NA_character_),
    
    equal_dist_right = Q31==1,
    
    unequal_dist = case_when(Q32 == 1 ~"10%",
                           Q32 == 2 ~ "90%",
                           Q32 == 3 ~ "100%",
                           TRUE ~ NA_character_),
    
    unequal_dist_right = Q32==3,
    
    wealth_comprehension_both = case_when(Q35 == 1 ~"Around £100,000",
                                     Q35 == 2 ~ "Around £500,000",
                                     Q35 == 3 ~ "Around £1,000,000",
                                     TRUE ~ NA_character_),
    
    wealth_both_right = Q35 == 3,
    
    income_comprehension_both = case_when(Q36 == 1 ~"Around £80,000",
                                          Q36 == 2 ~ "Around £120,000",
                                          Q36 == 3 ~ "Around £200,000",
                                          TRUE ~ NA_character_),
    
    income_both_right = Q36 == 2,
    
    wealth_comprehension = case_when(Q38 == 1 ~"Around £100,000",
                                     Q38 == 2 ~ "Around £500,000",
                                     Q38 == 3 ~ "Around £1,000,000",
                                     TRUE ~ NA_character_),
    
    wealth_right = Q38 == 3,
    
    income_comprehension = case_when(Q40 == 1 ~"Around £80,000",
                                     Q40 == 2 ~ "Around £120,000",
                                     Q40 == 3 ~ "Around £200,000",
                                     TRUE ~ NA_character_),
    
    income_right = Q40 == 2,
    
    comp_treated_right = case_when(!is.na(income_right) ~ income_right,
                                    !is.na(wealth_right) ~ wealth_right,
                                    !is.na(wealth_both_right) ~ wealth_both_right,
                                    !is.na(income_both_right) ~ income_both_right,
                                    ),

    
    comp_all_right = case_when(info_treatment=="Control" & equal_dist_right==1 & unequal_dist_right ==1 ~ 1,
                               info_treatment=="Control" & equal_dist_right==0 & unequal_dist_right == 1 ~ 0,
                               info_treatment=="Control" & equal_dist_right==1 & unequal_dist_right == 0 ~ 0,
                               info_treatment=="Control" & equal_dist_right==0 & unequal_dist_right == 0 ~ 0,
                               info_treatment!="Control" & equal_dist_right==1 & unequal_dist_right == 1 & comp_treated_right == 1 ~ 1,
                               info_treatment!="Control" & equal_dist_right==1 & unequal_dist_right == 0 & comp_treated_right == 1 ~ 0,
                               info_treatment!="Control" & equal_dist_right==0 & unequal_dist_right == 1 & comp_treated_right == 1 ~ 0,
                               info_treatment!="Control" & equal_dist_right==0 & unequal_dist_right == 0 & comp_treated_right == 1 ~ 0,
                               info_treatment!="Control" & comp_treated_right == 0 ~ 0),
                               
    
    # Individual Tax Preferences: Reversed so fairness is higher and DK excluded
    
    VAT_fair  = case_when(Q41_1 < 6 ~ 6 - Q41_1,
                          TRUE ~ NA_real_),
    
    inc_tax_fair  = case_when(Q41_2 < 6 ~ 6 - Q41_2,
                          TRUE ~ NA_real_),
    
    cg_tax_fair  = case_when(Q41_3 < 6 ~ 6 - Q41_3,
                              TRUE ~ NA_real_),
    
    inh_tax_fair  = case_when(Q41_4 < 6 ~ 6 - Q41_4,
                              TRUE ~ NA_real_),
    
    int_tax_fair  = case_when(Q41_5 < 6 ~ 6 - Q41_5,
                              TRUE ~ NA_real_),
    
    div_tax_fair  = case_when(Q41_6 < 6 ~ 6 - Q41_6,
                              TRUE ~ NA_real_),
    
    ni_fair  = case_when(Q41_7 < 6 ~ 6 - Q41_7,
                              TRUE ~ NA_real_),
    
    sdlt_fair  = case_when(Q41_8 < 6 ~ 6 - Q41_8,
                              TRUE ~ NA_real_),
    
    council_tax_fair  = case_when(Q41_9 < 6 ~ 6 - Q41_9,
                              TRUE ~ NA_real_),
    
    wealth_taxes_fair = int_tax_fair+cg_tax_fair+div_tax_fair+inh_tax_fair,
    
    finance_taxes_fair = int_tax_fair+cg_tax_fair+div_tax_fair,
    
    property_taxes_fair = inh_tax_fair+council_tax_fair+sdlt_fair,
    
    work_taxes_fair = ni_fair+inc_tax_fair,
    
    # Wealth tax and CG equalization questions
    
    net_wealth_tax  = case_when (Q42<6 ~ 6 - Q42,
                                 TRUE ~ NA_real_),
    
    bin_net_wealth_tax = case_when(net_wealth_tax > 3 ~ 1,
                                   net_wealth_tax <=3 ~ 0,
                                   TRUE ~ NA_real_),
    
    cg_equal  = case_when (Q43<6 ~ 6 - Q43,
                                 TRUE ~ NA_real_),
    
    # House Price Estimates - larger numbers mean go up
    
    uk_house_prices = case_when(Q44_1 < 6 ~ 6 - Q44_1,
                                TRUE ~ NA_real_),
    
    local_house_prices = case_when(Q44_2 < 6 ~ 6 - Q44_2,
                                   TRUE ~ NA_real_),
    
    own_house_price = case_when(Q44_3 < 6 ~ 6 - Q44_3,
                                TRUE ~ NA_real_),
    
    # Building New Houses - positive is more supportive
    
    build_new_houses = case_when(Q60new < 8 ~ 6 - Q60new,
                                 TRUE ~ NA_real_),
    
    build_new_houses_txt = Q60newopen,
    
    gov_resp_housing = case_when(Q61new < 5 ~  Q61new,
                                TRUE ~ NA_real_),
    
    # Differences in income, wealth and housing too large (higher more agreement)

    diff_inc = case_when(Q46_1 < 6 ~ 6 - Q46_1, 
                         TRUE ~ NA_real_),
    diff_wea = case_when(Q46_2 < 6 ~ 6 - Q46_2, 
                         TRUE ~ NA_real_),
    diff_houses = case_when(Q46_3 < 6 ~ 6 - Q46_3, 
                            TRUE ~ NA_real_),
    support_redistribution_inc = case_when(Q47_1 <5 ~ 5 - Q47_1,
                                           TRUE ~ NA_real_),
    
    support_redistribution_wea = case_when(Q47_2 <5 ~ 5 - Q47_2,
                                           TRUE ~ NA_real_),
    
    # Tax slider increasing in preferring to tax income
    tax_slider = Q48_1,
    
    like_taxing_wealth = as.numeric(tax_slider<5),
    
    tax_slider_text = Q49,
    
    # Inheritance Tax Questions - treatment
    
    framing_treatment = case_when(split_group2 == 1 ~ "Control",
                                  split_group2 == 2 ~ "Double Taxation",
                                  split_group2 == 3 ~ "Levelling Playing Field",
                                  split_group2 == 4 ~ "Spending and Tax Cuts"
                                  ),
    
    # Tax questions have "too high" as larger number
    
    inh_overall = case_when(Q51_1 < 7 ~ Q51_1,
                            Q53_1 < 7 ~ Q53_1,
                            Q55_1 < 7 ~ Q55_1,
                            q57_1 < 7 ~ q57_1,
                            TRUE ~ NA_real_),
    
    inh_high =      case_when(Q51_2 < 7 ~ Q51_2,
                            Q53_2 < 7 ~ Q53_2,
                            Q55_2 < 7 ~ Q55_2,
                            q57_2 < 7 ~ q57_2,
                            TRUE ~ NA_real_),
    
    inh_mid =     case_when(Q51_3 < 7 ~ Q51_3,
                            Q53_3 < 7 ~ Q53_3,
                            Q55_3 < 7 ~ Q55_3,
                            q57_3 < 7 ~ q57_3,
                            TRUE ~ NA_real_),
    
    inh_low =     case_when(Q51_4 < 7 ~ Q51_4,
                            Q53_4 < 7 ~ Q53_4,
                            Q55_4 < 7 ~ Q55_4,
                            q57_4 < 7 ~ q57_4,
                            TRUE ~ NA_real_),
    
    inh_you =     case_when(Q51_5 < 7 ~ Q51_5,
                            Q53_5 < 7 ~ Q53_5,
                            Q55_5 < 7 ~ Q55_5,
                            q57_5 < 7 ~ q57_5,
                            TRUE ~ NA_real_),
    
    inh_heirs =   case_when(Q51_6 < 7 ~ Q51_6,
                            Q53_6 < 7 ~ Q53_6,
                            Q55_6 < 7 ~ Q55_6,
                            q57_6 < 7 ~ q57_6,
                            TRUE ~ NA_real_),
    
    slider_low = case_when(Q58a <= 100  ~ Q58a,
                           TRUE ~ NA_real_),
    
    slider_low_zero = slider_low == 0,
    
    slider_low_over_fifty = slider_low >=50,
    
    slider_med = case_when(Q58b <= 100  ~ Q58b,
                           TRUE ~ NA_real_),
    
    slider_med_zero = slider_med == 0,
    
    slider_med_over_fifty = slider_med >=50,
    
    slider_high = case_when(Q58c <= 100 ~ Q58c,
                            TRUE ~ NA_real_),
    
    slider_high_zero = slider_high == 0,
    
    slider_high_over_fifty = slider_high >=50,
  )



## House price, parents' house and mortgage left data. Did parents help. Would parents help. TO ADD

# Own house

yg_dat<- yg_dat %>% 
  mutate(house_price_written = Q20a_other) %>% 
  mutate(house_price_written = case_when(house_price_written %in% c("__NA__", "__DK__") ~NA_character_,
                                         TRUE ~ house_price_written))%>% 
  mutate(parsed_house = parse_number(house_price_written))%>% 
  mutate(est_house_price = case_when(between(parsed_house, 20, 999) ~ 1000*parsed_house,
                                     between(parsed_house, 0, 5) ~ 1000000*parsed_house,
                                     TRUE ~ parsed_house)) %>%  
  mutate(est_house_price = case_when (ID == 1543 ~ NA_real_,
                                      ID == 1049 ~ 160000,
                                      ID == 1513 ~ 230000,
                                      ID == 3023 ~ 300000,
                                      ID == 1799 ~ 350000,
                                      ID == 2882 ~ 400000,
                                      ID == 495 ~ 600000,
                                      ID == 1618 ~ 660000,
                                      ID == 1837 ~ NA_real_,
                                      ID == 2453 ~ 180000,
                                      ID == 1726 ~ 400000,
                                      ID == 559 ~ 200000,
                                      TRUE ~ est_house_price),
         log_est_house_price = log(est_house_price),
         homeowner_X_price = case_when(homeowner==1 ~ est_house_price,
                                              homeowner == 0 ~ 0,
                                              TRUE ~NA_real_),
         homeowner_X_log_price = case_when(homeowner==1 ~ log_est_house_price,
                                                  homeowner == 0 ~ 0,
                                                  TRUE ~NA_real_),
         house_rent_groups  = case_when(homeowner==0 ~ "Don't Own",
                                        est_house_price <=100000 ~ "Own, £100k and under",
                                        between(est_house_price, 100001, 200000) ~ "Own, £100k to £200k",
                                        between(est_house_price, 200001, 300000) ~ "Own, £200k to £300k",
                                        between(est_house_price, 300001, 400000) ~ "Own, £300k to £400k",
                                        between(est_house_price, 400001, 500000) ~ "Own, £400k to £500k",
                                        between(est_house_price, 500001, 750000) ~ "Own, £500k to £750k",
                                        est_house_price >750000 ~ "Own, £750k and up",
                                        TRUE ~NA_character_),
         house_rent_groups = factor(house_rent_groups, 
                                    levels = c("Don't Own", "Own, £100k and under", "Own, £100k to £200k",
                                                      "Own, £200k to £300k", "Own, £300k to £400k", "Own, £400k to £500k",
                                                      "Own, £500k to £750k", "Own, £750k and up"))
  ) 


# Parents' house
  

yg_dat <- yg_dat %>% 
  mutate(house_price_written_parents = Q27a_other) %>% 
  mutate(house_price_written_parents = case_when(house_price_written_parents %in% c("__NA__", "__DK__") ~NA_character_,
                                                 TRUE ~ house_price_written_parents)) %>% 
  mutate(parsed_house_parents = parse_number(house_price_written_parents)) %>% 
  mutate(est_house_price_parents = case_when(between(parsed_house_parents, 20, 999) ~ 1000*parsed_house_parents,
                                             between(parsed_house_parents, 0, 19) ~ 1000000*parsed_house_parents,
                                             TRUE ~ parsed_house_parents)) %>% 
  mutate(est_house_price_parents = case_when (ID %in% c(3585, 3376, 1065, 1986, 24) ~ NA_real_,
                                              ID == 2355 ~ 120000,
                                              ID == 1964 ~ 250000,
                                              ID == 2523 ~ 250000,
                                              ID == 2640 ~ 250000,
                                              ID == 1232 ~ 400000,
                                              ID == 1625 ~ 450000,
                                              ID == 2150 ~ 450000,
                                              ID == 3420 ~ 1000000,
                                              TRUE ~ est_house_price_parents)) %>%
  mutate(log_est_house_price_parents = log(est_house_price_parents),
         homeowner_X_price_parents = case_when(parent_homeowner==1 ~ est_house_price_parents,
                                               parent_homeowner == 0 ~ 0,
                                               TRUE ~NA_real_),
         homeowner_X_log_price_parents = case_when(parent_homeowner==1 ~ log_est_house_price_parents,
                                                   parent_homeowner == 0 ~ 0,
                                                   TRUE ~NA_real_),
         house_rent_groups_parents  = case_when(parent_homeowner==0 ~ "Don't Own",
                                                est_house_price_parents <=100000 ~ "Own, £100k and under",
                                                between(est_house_price_parents, 100001, 200000) ~ "Own, £100k to £200k",
                                                between(est_house_price_parents, 200001, 300000) ~ "Own, £200k to £300k",
                                                between(est_house_price_parents, 300001, 400000) ~ "Own, £300k to £400k",
                                                between(est_house_price_parents, 400001, 500000) ~ "Own, £400k to £500k",
                                                between(est_house_price_parents, 500001, 750000) ~ "Own, £500k to £750k",
                                                est_house_price_parents >750000 ~ "Own, £750k and up",
                                                TRUE ~NA_character_),
         house_rent_groups_parents = factor(house_rent_groups_parents, 
                                            levels = c("Don't Own", "Own, £100k and under", "Own, £100k to £200k",
                                                       "Own, £200k to £300k", "Own, £300k to £400k", "Own, £400k to £500k",
                                                       "Own, £500k to £750k", "Own, £750k and up")),
         house_value_bins = case_when(house_rent_groups == "Don't Own" ~ 1,
                                      house_rent_groups == "Own, £100k and under" ~ 2,
                                      house_rent_groups == "Own, £100k to £200k" ~ 3,
                                      house_rent_groups == "Own, £200k to £300k" ~ 4,
                                      house_rent_groups == "Own, £300k to £400k" ~ 5,
                                      house_rent_groups == "Own, £400k to £500k" ~ 6,
                                      house_rent_groups == "Own, £500k to £750k" ~ 7,
                                      house_rent_groups == "Own, £750k and up" ~ 8,
                                      TRUE ~ NA_real_),
         house_value_bins_parents = case_when(house_rent_groups_parents == "Don't Own" ~ 1,
                                     house_rent_groups_parents == "Own, £100k and under" ~ 2,
                                     house_rent_groups_parents == "Own, £100k to £200k" ~ 3,
                                     house_rent_groups_parents == "Own, £200k to £300k" ~ 4,
                                     house_rent_groups_parents == "Own, £300k to £400k" ~ 5,
                                     house_rent_groups_parents == "Own, £400k to £500k" ~ 6,
                                     house_rent_groups_parents == "Own, £500k to £750k" ~ 7,
                                     house_rent_groups_parents == "Own, £750k and up" ~ 8,
                                     TRUE ~ NA_real_)
         )  

# Other housing questions

yg_dat <- yg_dat %>% 
  mutate(rent_costs = case_when(Q21 < 10  ~ Q21,
                                 TRUE ~ NA_real_),
         parent_helped_buy = case_when(Q23_1 == 1 ~ "Parents Helped Buy",
                                        Q23_2 == 1 ~ "Inherited House",
                                        Q23_3 == 1 ~ "Parents No Help",
                                        TRUE ~ NA_character_),
         parents_would_help = case_when(Q24 == 1 ~ 1,
                                          Q24 == 2 ~ 0,
                                          TRUE  ~ NA_real_),
         parents_live = case_when(Q25 == 1  ~ Q25_other,
                                   Q25  == 3 ~ NA_character_,
                                   TRUE ~ NA_character_),
         
         mortgage_left = case_when (Q22 == 1 ~ Q22a_other,
                                    TRUE ~ NA_character_),
         
         mortgage_left = parse_number(mortgage_left),
         
         mortgage_left =  case_when(between(mortgage_left, 1, 999) ~ 1000*mortgage_left,
                                    TRUE ~ mortgage_left)
                                    
    
  )

yg_dat <- yg_dat %>% mutate(ID_ch = ID-lag(ID)) %>% 
  filter(ID_ch==1 | is.na(ID_ch))



# Last saved Nov 17 2022
write_csv(yg_dat, "~/Dropbox/WEALTHPOL_Research/Papers/uk_survey_experiments/UK_third_survey_experiment/data_for_analysis/yg_data_3_ready.csv")
save(yg_dat, file = "~/Dropbox/WEALTHPOL_Research/Papers/uk_survey_experiments/UK_third_survey_experiment/data_for_analysis/yg_data_ready.Rda" )

#load( "~/Dropbox/WEALTHPOL_Research/Papers/uk_survey_experiments/UK_third_survey_experiment/data_for_analysis/yg_data_ready.Rda" )



# Code for JBM

jbm <- yg_dat %>% 
  select(Weight:pdl_profile_oslaua, profile_gross_personal:profile_GOR, Q20:Q22a_other, constituency_name, ons_const_id, education_code:AveragePrice,
         vote_choice:years_lived, est_house_price, house_rent_groups)


write_csv(jbm, "~/Dropbox/WEALTHPOL_Research/Papers/uk_survey_experiments/UK_third_survey_experiment/data_for_analysis/jbm.csv")
save(jbm, file = "~/Dropbox/WEALTHPOL_Research/Papers/uk_survey_experiments/UK_third_survey_experiment/data_for_analysis/jbm.Rda" )

load("~/Dropbox/WEALTHPOL_Research/Papers/uk_survey_experiments/UK_third_survey_experiment/data_for_analysis/jbm.Rda" )
