Monday, October 15

More on GDP, GDI and Okun

In the comments to an earlier post, Andrew Lilley suggested that I look at a one or two quarter lag to GDP. He throught I would find a stronger relationship with lagged data.

With a little rewriting of my R program (which I have posted below), I have compared the lagged data. But first I should explain a couple of other changes. in my initial model I compared the quarterly GDP/GDI figure with the first difference of the UR data for the last month in the quarter. In updating my programs, I am now taking the mean of the UR rate for the three months of the quarter.

The second thing to note is that I have checked the UR first difference data from leading by three quarters (a negative lag) to laging by three quarters.

The third thing to note is that I have added a Bayesian Information Criterion as the comparator, rather than R-squared (which is at best problematic as an information criterion). This turns out to be significant in the selection of the winning model (given the R-squared results.

The conclusion:  The GDI growth rare remains better than the GDP growth rate at explaining changes the unemployment rate (selecting the requression with the lowest Bayesian Information Criterion). It does its best job when not lagged. The GDP growth rate (Quarter on Quarter) does its best job when it is lagged by two quarters.
delta(UR)@t = GDPgrowth@t-2 + u
(comparing this quarter with the previous quarter)
The GDP growth rate on a through the year basis does its best job when lagged by one quarter.
delta(UR)@t = GDPgrowth@t-1 + u
(comparing this quarter with the same quarter last year)
The charts on the winning regression models follow.





The R code follows. Please let me know if you see any errors.


### r-okun.R
###
###     GNU R batch file for plotting graphs
###

### ----- create the right environment ...
source("../bin/mysqlConnect.R")
source('../bin/i-plot-constants.R')
library('zoo')
library('plyr')
imageDir <- paste(imageDir, 'okun/', sep='')

### ----- home brew lagging function 
lagger <- function(vector, k)
{
 # - sanity checks
 k <- as.integer(k)
 n <- length(vector)
 if(n == 0) return (NULL)
 if(k > n)  return (NULL)
 
 # - do the lag - pad with NAs
 lagged = NULL
 if(k == 0) lagged <- vector
 
 if(k > 0)
 {
  lagged <- c(rep(NA, k), vector)
  lagged <- lagged[1:n]
 }
 
 if(k < 0)
 {
  k <- abs(k)
  lagged <- c(vector, rep(NA, k))
  lagged <- lagged[(1+k):(n+k)]
 }
 
 return (lagged)
}

### --- get National Accounts from MySQL database
rs <- dbSendQuery(conn, "select * from TT_5206001_key_aggregates_Data1")
natAccounts <- fetch(rs, n = -1)

### --- get Monthly Labour Force (MLF) survey data rfom MySQL database
rs <- dbSendQuery(conn, "select * from TT_6202002_Data1")
seasAdjEmploy <- fetch(rs, n = -1)

### ----- calculate the quarterly mean unemployment rate
endDate <- as.Date(as.yearmon(seasAdjEmploy$t), frac=1.0)
seasAdjEmploy$Qtr <- factor( as.yearqtr(endDate) )
seasAdjEmploy <- ddply(seasAdjEmploy, "Qtr", transform,
 mean.UR.for.qtr = mean(A181525X))

### ----- merge National Accounts & MLF - only keep EOQ MLF records
mergedData <- merge(natAccounts, seasAdjEmploy, by = "t", all = FALSE)

### ----- keep needed columns from the merged data.frame - add Date
# A2304402X ==> GDP: CVM ; $ Millions : Seasonally Adjusted
# A2304410X ==> Real GDI: CVM ; $ Millions : Seasonally Adjusted
# A181525X --> SA_Unemployment_rate_Persons
df <- mergedData[, c('t', 'A2304402X', 'A2304410X',
 'A181525X', 'Qtr', 'mean.UR.for.qtr')]
endDate <- as.Date(as.yearmon(df$t), frac=1.0)
df$Date <- as.Date(as.yearqtr(endDate), frac=0.5)

### ----- do the regressions and plots 
for(period in c('a', 'q'))
{
 freq <- 0
 if(period == 'a') { freq <- 4; p <- 'TTY' }
 if(period == 'q') { freq <- 1; p <- 'Quarterly' }
  
 GDI.growth.percent.name <- paste(period, 'GDI.growth.percent', sep='')
 GDP.growth.percent.name <- paste(period, 'GDP.growth.percent', sep='')
 UR.diff.name <- paste(period, 'UR.diff', sep='')

 df[, GDI.growth.percent.name] <- calculatePercentGrowth(v=df$A2304410X, freq=freq)
 df[, GDP.growth.percent.name] <- calculatePercentGrowth(v=df$A2304402X, freq=freq)
 df[, UR.diff.name] <- calculateDifference(v=df$A181525X, freq=freq)
 
 # - an initial plot of the data ...
 if(period == 'a')
 {
  dlm <- df[!is.na(df[, GDI.growth.percent.name]), ]
  breaks <- c(GDP.growth.percent.name, GDI.growth.percent.name,
   UR.diff.name)
  labels <- c('GDP Percent Growth',  'GDI Percent Growth',
   'Change in Unemployment Rate (% points)')
  gf <- data.frame(breaks=breaks, labels=labels)
  f <- paste(imageDir ,"cross-check-gdi-gdp-ur-tty-",  sep="")
  chartMulti (df=dlm, graphFrame=gf, saveLocPrefix=f,
      plotText=list(heading='TTY Change in GDP, GDI and the Unemployment Rate',
       yLabel='Percent/Percentage Points'), 
      footnote='Source: ABS 5206 and 6202' )
    }

 for(lag in -3:3)
 {
  l <- as.character(lag)
 
  # - lag the differenced MLF data
  column.name = paste('UR.diff.lagged.', l, sep='')
  column.name = sub('-', 'minus.', column.name)
  df[, column.name] <- lagger(df[, UR.diff.name], lag)
   
  print('-------------------------------------------')
  cat(lag); print(' <== unemployment rate 1st difference lagged by ')
  print('-------------------------------------------')

  for(fn in c('P', 'I'))
  {
   # - set up the regressors and regressand
   yn <- column.name
   if(fn == 'P') { xn <- GDP.growth.percent.name; d <- 'GDP' }
   if(fn == 'I') { xn <- GDI.growth.percent.name; d <- 'GDI' }
   
   # - for the oytput stream ...
   print('====='); print(d); print('=====')
   
   # - get rid of NA rows
   dlm <- df[!is.na(df[, GDI.growth.percent.name]), ] # limit the data set
   dlm <- dlm[!is.na(dlm[, xn]) & !is.na(dlm[, yn]), ] # vectorised and
   
   # - get the regression data and regress ...
   y <- dlm[, yn]
   x <- dlm[, xn]
   fit <- lm(y ~ x)
   print(summary(fit))
 
   # - Okun: y = mx + b; if y = 0 then x = -b/m
   b <- (fit$coefficients)[[1]]
   m <- (fit$coefficients)[[2]]
   okun <- round(-b/m, 3)
   b <- round(b, 3); m <- round(m, 3)
   
   # - Bayesian Information Criterion
   r <- residuals(fit)
   sigma.hat.residuals <- var(residuals(fit))
   T <- length(residuals(fit))
   k <- 2 # two coefficients
   BIC <- round(T * log(sigma.hat.residuals) + k * log(T), 3)
   
   # - R-squared
   r2 <- round(summary(fit)$r.squared, 3)

   # - plot  
   plotTitle <- paste("Okuns Rule (", d, " Growth v delta(UR) lagged ", lag,
    " Qs; R-sq=", r2, "; BIC=", BIC, ')', sep='')
   f <- paste(imageDir, p, '-', plotTitle, fileEnding, sep="")
   plot <- ggplot(data = dlm, mapping = aes_string(x=xn, y=yn)) + 
       geom_point() +
       geom_smooth(method=lm) +
       geom_hline(yintercept=0, colour='black', size=0.4, alpha=0.4) +
       ylab(paste(p, " change in Unemployment Rate (% points)", sep='')) +
       xlab(paste(p, ' change in ', d, ' (%)', sep='')) +
       labs(title = plotTitle)
   plot <- addFootnote(plot, paste('Source: ABS 5206 and 6202; Okun threshold: ',
    okun, '; T=', T, '; y-intercept=', b, '; slope=', m, sep=''))
   ggsave(plot, width=plotWidth, height=plotHeight, filename=f, dpi=dpiset)
   
   if(lag == 0)
   {
    # - an extra plot at lag=0
    dlm$okun <- okun
    breaks <- c('okun', xn)
    labels <- c('Okun', paste(p, d, 'Percent Growth', sep=' ') )
    gf <- data.frame(breaks=breaks, labels=labels)
    f <- paste(imageDir , p, '-', d, '-v-okun-',  sep='')
    chartMulti (df=dlm, graphFrame=gf, saveLocPrefix=f,
           plotText=list(heading=paste(p, ' Growth in ', d, sep=''),
            yLabel='Percent'), 
           footnote=paste('Source: ABS 5206 and 6202;  Okun: ',
            okun, sep='') )
   }
  }
  print('-------------------------------------------')
 }
}

### ----- tidy up when done
dbDisconnect(conn)

1 comment:

  1. I can never post at work, so usually forget to do so, but thanks for doing this work - v. informative post.

    ReplyDelete