Data handling


# Start Inf data
# --------------

# read data
dtf <- read_dta("./Data/SouthAfrica/1_SA.dta", encoding = "latin1")

#labels of each variable
dtf_labels <- unlist(lapply(dtf, attr, which = "label"))

# remove labels
dtf <- as.data.frame(lapply(dtf, c))

# Notice that for a given frequency,
# the data begins at the start of the period
# it reports to (for example, monthly data's date is on the 1st of
# each month)

# fix date
datelookup <- seq.Date(from = as.Date("2000-07-01"),
                       to = as.Date("2019-10-01"),
                       by = "quarter")
dtf$date <- datelookup[dtf$date - abs(min(dtf$date)) + 1]

# order by date
dtf <- arrange(dtf, date)

# correct magnitude of certain variables
dtf <- dtf %>% mutate_at(vars(-date, -contains("skew")),
                         ~ ./100 )

#ADJUSTMENT OF LEAD DATA
#------------------------------------------
if (bAdjustPeriodTiming) {
  dtf <- dtf %>%
    mutate_at(vars(starts_with("Ecpi1")),
              ~ lag(.,4)) %>% #quarterly data
    mutate_at(vars(starts_with("Ecpi2")),
              ~ lag(.,8))
}
# no "-1" since every data point has end of period date

Timespan is 2012 to 2018

Figure(s) 13: South Africa’s unlucky run: 2010-16

Subfigure 13(a): Actual inflation, markets and survey first-order moments


#Actual vs inflation expectations

vars1 <- c("cpi",
           "CPI_Core",
           "Ecpi1_ana",
           "Ecpi1_bus",
           "Ecpi1_tradeun")
breakss <- c(vars1,
            "Upper bound",
            "Lower bound")
legs <-  c("Inflation (CPI Headline)",
           "Inflation (CPI Core)",
            "Analysts' forecast",
            "Businesses' forecast",
            "Trade unions' forecasts",
            "Upper bound",
            "Lower bound")
legv <- c('red', 'blue', 'orange3', 'purple', 'green4', 'black', 'black')
linev <- c('solid', 'dotted', 'dotdash', 'dashed', 'longdash', 'solid', 'solid')


# inflation target
target <- 0.045
target_ub <- 0.06
target_lb <- 0.03

# thickness of line
vline_w <- 0.6 #mm

# ticks
labs <- c(target_lb, target, target_ub)

# plot
plot <- create_plot(dtf_long,
                    vars = vars1,
                    scales = 0) +
  theme(legend.position = c(0.2,0.36),
        legend.text = element_text(size = tfont(legend_text_size - 2))) +
  geom_hline(aes(yintercept = target_ub, colour = "Upper bound", linetype = "Upper bound"),
             size = tfont(vline_w)) +
  geom_hline(aes(yintercept = target_lb, colour = "Lower bound", linetype = "Lower bound"),
             size = tfont(vline_w)) +
  scale_linetype_manual(breaks = breakss, values = linev, labels = legs) +
  scale_colour_manual(breaks = breakss, values = legv, labels = legs) +
  scale_y_continuous(limits = c(0.025, 0.07), breaks = labs,
                     labels = scales::percent(labs, accuracy = 0.1)) +
  guides(col = guide_legend(keywidth = 5, keyheight = 1))

Scale for ‘y’ is already present. Adding another scale for ‘y’, which will replace the existing scale.

  
print(plot)

if (bSavePlots) my_ggsave(paste0(path_figure_out, "figure_13_a.pdf"), plot)

Subfigures 13(b): Cross-sectional survey distributions


# density of estimates

# read data from paper
df_temp <- read_excel("Data/SouthAfrica/siklos_et_al_data.xlsx", range = "A2:N101") 

New names: * Inflation -> Inflation…1 * Density -> Density…2 * `` -> …3 * Inflation -> Inflation…4 * Density -> Density…5 * …

x <- unlist(lapply(df_temp, function(x) !all(is.na(x))))
df_temp <- df_temp[, x] # drop all NA columns

# keep only 2014, 2015, 2016
df_temp <- df_temp [, -c(1:4)]
N <- nrow(df_temp)
df_temp <- as.data.frame(rbind(as.matrix(df_temp[, 1:2]),
                               as.matrix(df_temp[3:4]),
                               as.matrix(df_temp[5:6])
                              )
                        )

colnames(df_temp) <- c("inflation", "density")

years <- c(2014, 2015, 2016)

df_temp$year <- factor(c(rep(2014, N), rep(2015, N), rep(2016, N)),
                       levels = years)

df_temp$inflation <- df_temp$inflation/100


legv <- c('red','blue','green4','orange3',"purple2")
linev <- c('solid','dotdash','dashed')
#limits of x axis
x_lb <- -0.01
x_ub <- 0.15
labs <- seq(x_lb, x_ub, by = 0.01)

plot <- ggplot(df_temp, aes(x = inflation, y = density
                           , color = year, linetype = year)) +
        geom_line( size = line_sizee) +
        scale_linetype_manual(breaks = years, values= linev, labels = years) +
        scale_colour_manual(breaks = years, values = legv, labels = years) + 
        scale_x_continuous(breaks = labs,
                           labels = scales::percent(labs, accuracy = 1),
                           limits = c(x_lb,x_ub)) +
        theme(legend.position = c(0.2,0.8))

print(plot)

if (bSavePlots) my_ggsave(paste0(path_figure_out, "figure_13_b.pdf"), plot)
---
title: "Brookings Paper - South Africa - Notebook"
output: html_notebook
---


```{r start_R, include = FALSE}

#----------------------------------------------------------------
# Initiation
#----------------------------------------------------------------

rm(list = ls())

# clean console
cat("\014")

#### PLEASE CHOOSE #####

# print and save plots?
bSavePlots <- 1   # 0: no
                  # 1: yes

# Expectations at T = T-h or T+h?
# will shift by 12 periods (monthly), be careful for e.g. quarterly data
bAdjustPeriodTiming <- 1   # 0: T+h
                           # 1: T-h

####  CHOICE END   #####

# BEWARE ONLY VARIABLES USED ARE ADJUSTED
# IF YOU WANT TO USE OTHER VARIABLES
# PLEASE ADD THEM BELOW WHEN THE ADJUSTMENT
# IS MADE

######################################

# packages
# list of libraries
packages <- c("tidyverse"   # data analysis
              ,"knitr"      # markdown
              ,"haven"      # read .dta files
              ,"readxl"     # read excel files
              ,"lubridate"  # dates
              ,"tinytex"    # for markdown
              ,"magrittr"   # pipe operator
)

# install them if needed
new_packages <-
  packages[!(packages %in% installed.packages()[, "Package"])] 

 if (length(new_packages) > 0){ #installs them
   install.packages(new_packages, dependencies = TRUE)
}

# load libraries
lapply(packages, require, character.only = TRUE)

```


```{r plot_def, include = FALSE}

# ggplot2 options

# size of figures for saving and displaying (disp will be adjusted)
# figure size in cm (for inches divide by 0.393700787)
fig_h <- 2.5 * 8.4375* 0.393700787 # width
fig_w <- 2.5 * 15 * 0.393700787    # height
fig_disp_adj <- 1                  # adjustment for figure display


# Convert from mm to internal units used in grid ggplot
tfont <- function(x) return(.pt*x)        # this way no res problems
tlwd  <- function(x) return(.stroke*x) 

# ggplot2 options
# plot definitions 

legend_text_size <- 7.5
axis_text_size   <- 7

legend_key_width <- 20 # mm

# size of line for plots
line_sizee <- tlwd(1)

theme_set(
  theme_minimal() +
            theme(panel.background = element_blank(),
                  plot.title = element_text(face = "plain", size = tfont(6)),
                  plot.subtitle = element_text(face = "italic"),
                  panel.grid.minor.x = element_blank(),
                  axis.ticks = element_blank(),
                  axis.line = element_line(),
                  legend.background = element_rect(colour = "black",
                                                   fill = NA, size = 0.5),
                  legend.box.background = element_blank(),
                  legend.title = element_blank(),
                  legend.position = c(0.25,0.8),
                  legend.key = element_rect(colour = NA,
                                            fill = NA),
                  legend.key.width = unit(legend_key_width, "mm"),                  
                  legend.text = element_text(size = tfont(legend_text_size)),
                  axis.text  = element_text(size = tfont(axis_text_size)),
                  axis.title = element_blank()
                 )
)


# functions to create colour/line scheme

create_legv <- function(x) {

  # colours to be used in plots
  y <- c('red', 'blue', 'green4', 'orange3', "purple2")
  n <- length(x)
  y <- y[1:n]
  names(y) <- x 
  return(y)

}

create_linev <- function(x) {

  # linetypes to be used in plots
  y <- c('solid', 'longdash', 'dotted', 'dotdash', "dotdash")
  n <- length(x)
  y <- y[1:n]
  names(y) <- x 
  return(y)

}

create_plot <- function(df,
                        vars,
                        legs = NULL,
                        date_lb = as.Date("2012-01-01"),
                        date_ub = as.Date("2018-12-31"),
                        perc = 1,
                        scales = 1,
                        line_size = line_sizee) {
  
  # date bounds
  date_lb <- as.Date(date_lb)
  date_ub <- as.Date(date_ub)
  
  # colours/linetypes
  legv <- create_legv(vars)
  linev <- create_linev(vars)
  
  df <- df %>%
        filter(date >= date_lb & date <= date_ub &
               variable %in% vars & !is.na(value)) %>%
        mutate(variable = factor(variable, levels = vars))
  
  plot <- ggplot(df, aes(x = date, y = value)) +
    geom_line(aes(colour = variable, linetype = variable),
              size = line_size) +
    scale_x_date(date_breaks = "1 year", date_labels = "%Y",
                 limits = c(date_lb, date_ub)) 
  
  if (scales) plot <- plot + 
        scale_linetype_manual(breaks = vars, values= linev, labels = legs) +
        scale_colour_manual(breaks = vars, values = legv, labels = legs) 
  
  if (perc) plot <- plot + 
        scale_y_continuous(labels = scales::percent_format(accuracy = 1))
    
  return(plot)
  
}

my_ggsave <- function(.filename, .plot,
                      .device = "pdf", .width = fig_w,
                      .height = fig_h, .units = "in", .dpi = 300, ...) {
  
  ggsave(filename = .filename, plot = .plot,
         device = .device,
         width = .width, height = .height,
         units = .units, dpi = .dpi, ...)
  
}

```

```{r setup, include = FALSE}

# directory
directory <- getwd()
directory <- gsub("/Code", "", directory)
opts_knit$set(root.dir = directory)

path_figure_out <- "./Figures/"

# default chunk options
opts_chunk$set(message = FALSE, warning = FALSE, results = 'asis', echo = TRUE,
               fig.width = fig_disp_adj * fig_w, fig.height = fig_disp_adj * fig_h,
               fig.retina = 1) # figure options

```

## Data handling

```{r load_data}

# Start Inf data
# --------------

# read data
dtf <- read_dta("./Data/SouthAfrica/1_SA.dta", encoding = "latin1")

#labels of each variable
dtf_labels <- unlist(lapply(dtf, attr, which = "label"))

# remove labels
dtf <- as.data.frame(lapply(dtf, c))

# Notice that for a given frequency,
# the data begins at the start of the period
# it reports to (for example, monthly data's date is on the 1st of
# each month)

# fix date
datelookup <- seq.Date(from = as.Date("2000-07-01"),
                       to = as.Date("2019-10-01"),
                       by = "quarter")
dtf$date <- datelookup[dtf$date - abs(min(dtf$date)) + 1]

# order by date
dtf <- arrange(dtf, date)

# correct magnitude of certain variables
dtf <- dtf %>% mutate_at(vars(-date, -contains("skew")),
                         ~ ./100 )

# ADJUSTMENT OF LEAD DATA
# -----------------------
if (bAdjustPeriodTiming) {
  dtf <- dtf %>%
    mutate_at(vars(starts_with("Ecpi1")),
              ~ lag(.,4)) %>% #quarterly data
    mutate_at(vars(starts_with("Ecpi2")),
              ~ lag(.,8))
}
# no "-1" since every data point has end of period date

# add core CPI
dtf_corecpi <- read_xlsx("./Data/SouthAfrica/2_CPI_monthly.xlsx")

convert_16_to_12_headline <- dtf_corecpi$CPI_Headline_base_2012[dtf_corecpi$date == as.POSIXct("2016-12-01")] / 100
convert_16_to_12_core <- dtf_corecpi$CPI_Core_base_2012[dtf_corecpi$date == as.POSIXct("2016-12-01")] / 100
convert_12_to_16_headline <- 100 / dtf_corecpi$CPI_Headline_base_2012[dtf_corecpi$date == as.POSIXct("2016-12-01")]
convert_12_to_16_core <- 100 / dtf_corecpi$CPI_Core_base_2012[dtf_corecpi$date == as.POSIXct("2016-12-01")]

inflation_maker <- function(CPI, n) {
  (CPI - lag(CPI, n = n)) / lag(CPI, n = n)
}

dtf_corecpi %<>%
  mutate(month = month(date),
         quartal = quarter(date),
         year = year(date)) %>%
  arrange(year, quartal, month) %>%
  mutate(CPI_Headline_base_2012 = ifelse(is.na(CPI_Headline_base_2012), convert_16_to_12_headline * CPI_Headline, CPI_Headline_base_2012),
         CPI_Core_base_2012 = ifelse(is.na(CPI_Core_base_2012), convert_16_to_12_core * CPI_Core, CPI_Core_base_2012)) %>%
  mutate(CPI_Headline = ifelse(is.na(CPI_Headline), convert_12_to_16_headline * CPI_Headline_base_2012, CPI_Headline),
         CPI_Core = ifelse(is.na(CPI_Core), convert_12_to_16_core * CPI_Core_base_2012, CPI_Core)) %>%
  select(-base) %>%
  mutate(across(starts_with("CPI"), inflation_maker, n = 12)) %>%
  group_by(year, quartal) %>%
  summarise(date = first(date),
            across(starts_with("CPI_"), mean),
            .groups = "drop")

dtf %<>%
  left_join(dtf_corecpi, by = "date") %>%
  mutate(date = as.Date(date))


# pivot data to long format for plotting
dtf_long <- pivot_longer(dtf, !date, names_to = "variable", values_to = "value", values_transform = as.numeric)

```

## Timespan is 2012 to 2018

## Figure(s) 13: South Africa’s unlucky run: 2010-16
## Subfigure 13(a): Actual inflation, markets and survey first-order moments

```{r fig_13_a}

# Actual vs inflation expectations

vars1 <- c("cpi",
           "CPI_Core",
           "Ecpi1_ana",
           "Ecpi1_bus",
           "Ecpi1_tradeun")
breakss <- c(vars1,
            "Upper bound",
            "Lower bound")
legs <-  c("Inflation (CPI Headline)",
           "Inflation (CPI Core)",
            "Analysts' forecast",
            "Businesses' forecast",
            "Trade unions' forecasts",
            "Upper bound",
            "Lower bound")
legv <- c('red', 'blue', 'orange3', 'purple', 'green4', 'black', 'black')
linev <- c('solid', 'dotted', 'dotdash', 'dashed', 'longdash', 'solid', 'solid')


# inflation target
target <- 0.045
target_ub <- 0.06
target_lb <- 0.03

# thickness of line
vline_w <- 0.6 #mm

# ticks
labs <- c(target_lb, target, target_ub)

# plot
plot <- create_plot(dtf_long,
                    vars = vars1,
                    scales = 0) +
  theme(legend.position = c(0.2,0.36),
        legend.text = element_text(size = tfont(legend_text_size - 2))) +
  geom_hline(aes(yintercept = target_ub, colour = "Upper bound", linetype = "Upper bound"),
             size = tfont(vline_w)) +
  geom_hline(aes(yintercept = target_lb, colour = "Lower bound", linetype = "Lower bound"),
             size = tfont(vline_w)) +
  scale_linetype_manual(breaks = breakss, values = linev, labels = legs) +
  scale_colour_manual(breaks = breakss, values = legv, labels = legs) +
  scale_y_continuous(limits = c(0.025, 0.07), breaks = labs,
                     labels = scales::percent(labs, accuracy = 0.1)) +
  guides(col = guide_legend(keywidth = 5, keyheight = 1))
  
print(plot)
if (bSavePlots) my_ggsave(paste0(path_figure_out, "figure_13_a.pdf"), plot)

```

## Subfigures 13(b): Cross-sectional survey distributions

```{r fig_13_b}

# density of estimates

# read data from paper
df_temp <- read_excel("Data/SouthAfrica/siklos_et_al_data.xlsx", range = "A2:N101") 

x <- unlist(lapply(df_temp, function(x) !all(is.na(x))))
df_temp <- df_temp[, x] # drop all NA columns

# keep only 2014, 2015, 2016
df_temp <- df_temp [, -c(1:4)]
N <- nrow(df_temp)
df_temp <- as.data.frame(rbind(as.matrix(df_temp[, 1:2]),
                               as.matrix(df_temp[3:4]),
                               as.matrix(df_temp[5:6])
                              )
                        )

colnames(df_temp) <- c("inflation", "density")

years <- c(2014, 2015, 2016)

df_temp$year <- factor(c(rep(2014, N), rep(2015, N), rep(2016, N)),
                       levels = years)

df_temp$inflation <- df_temp$inflation/100


legv <- c('red','blue','green4','orange3',"purple2")
linev <- c('solid','dotdash','dashed')
# limits of x axis
x_lb <- -0.01
x_ub <- 0.15
labs <- seq(x_lb, x_ub, by = 0.01)

plot <- ggplot(df_temp, aes(x = inflation, y = density
                           , color = year, linetype = year)) +
        geom_line( size = line_sizee) +
        scale_linetype_manual(breaks = years, values= linev, labels = years) +
        scale_colour_manual(breaks = years, values = legv, labels = years) + 
        scale_x_continuous(breaks = labs,
                           labels = scales::percent(labs, accuracy = 1),
                           limits = c(x_lb,x_ub)) +
        theme(legend.position = c(0.2,0.8))

print(plot)
if (bSavePlots) my_ggsave(paste0(path_figure_out, "figure_13_b.pdf"), plot)

```