With a little rewriting of my R program (which I have posted below), I have compared the lagged data. But first I should explain a couple of other changes. in my initial model I compared the quarterly GDP/GDI figure with the first difference of the UR data for the last month in the quarter. In updating my programs, I am now taking the mean of the UR rate for the three months of the quarter.
The second thing to note is that I have checked the UR first difference data from leading by three quarters (a negative lag) to laging by three quarters.
The third thing to note is that I have added a Bayesian Information Criterion as the comparator, rather than R-squared (which is at best problematic as an information criterion). This turns out to be significant in the selection of the winning model (given the R-squared results.
The conclusion: The GDI growth rare remains better than the GDP growth rate at explaining changes the unemployment rate (selecting the requression with the lowest Bayesian Information Criterion). It does its best job when not lagged. The GDP growth rate (Quarter on Quarter) does its best job when it is lagged by two quarters.
delta(UR)@t = GDPgrowth@t-2 + u
(comparing this quarter with the previous quarter)The GDP growth rate on a through the year basis does its best job when lagged by one quarter.
delta(UR)@t = GDPgrowth@t-1 + u
(comparing this quarter with the same quarter last year)The charts on the winning regression models follow.
The R code follows. Please let me know if you see any errors.
### r-okun.R
###
### GNU R batch file for plotting graphs
###
### ----- create the right environment ...
source("../bin/mysqlConnect.R")
source('../bin/i-plot-constants.R')
library('zoo')
library('plyr')
imageDir <- paste(imageDir, 'okun/', sep='')
### ----- home brew lagging function
lagger <- function(vector, k)
{
# - sanity checks
k <- as.integer(k)
n <- length(vector)
if(n == 0) return (NULL)
if(k > n) return (NULL)
# - do the lag - pad with NAs
lagged = NULL
if(k == 0) lagged <- vector
if(k > 0)
{
lagged <- c(rep(NA, k), vector)
lagged <- lagged[1:n]
}
if(k < 0)
{
k <- abs(k)
lagged <- c(vector, rep(NA, k))
lagged <- lagged[(1+k):(n+k)]
}
return (lagged)
}
### --- get National Accounts from MySQL database
rs <- dbSendQuery(conn, "select * from TT_5206001_key_aggregates_Data1")
natAccounts <- fetch(rs, n = -1)
### --- get Monthly Labour Force (MLF) survey data rfom MySQL database
rs <- dbSendQuery(conn, "select * from TT_6202002_Data1")
seasAdjEmploy <- fetch(rs, n = -1)
### ----- calculate the quarterly mean unemployment rate
endDate <- as.Date(as.yearmon(seasAdjEmploy$t), frac=1.0)
seasAdjEmploy$Qtr <- factor( as.yearqtr(endDate) )
seasAdjEmploy <- ddply(seasAdjEmploy, "Qtr", transform,
mean.UR.for.qtr = mean(A181525X))
### ----- merge National Accounts & MLF - only keep EOQ MLF records
mergedData <- merge(natAccounts, seasAdjEmploy, by = "t", all = FALSE)
### ----- keep needed columns from the merged data.frame - add Date
# A2304402X ==> GDP: CVM ; $ Millions : Seasonally Adjusted
# A2304410X ==> Real GDI: CVM ; $ Millions : Seasonally Adjusted
# A181525X --> SA_Unemployment_rate_Persons
df <- mergedData[, c('t', 'A2304402X', 'A2304410X',
'A181525X', 'Qtr', 'mean.UR.for.qtr')]
endDate <- as.Date(as.yearmon(df$t), frac=1.0)
df$Date <- as.Date(as.yearqtr(endDate), frac=0.5)
### ----- do the regressions and plots
for(period in c('a', 'q'))
{
freq <- 0
if(period == 'a') { freq <- 4; p <- 'TTY' }
if(period == 'q') { freq <- 1; p <- 'Quarterly' }
GDI.growth.percent.name <- paste(period, 'GDI.growth.percent', sep='')
GDP.growth.percent.name <- paste(period, 'GDP.growth.percent', sep='')
UR.diff.name <- paste(period, 'UR.diff', sep='')
df[, GDI.growth.percent.name] <- calculatePercentGrowth(v=df$A2304410X, freq=freq)
df[, GDP.growth.percent.name] <- calculatePercentGrowth(v=df$A2304402X, freq=freq)
df[, UR.diff.name] <- calculateDifference(v=df$A181525X, freq=freq)
# - an initial plot of the data ...
if(period == 'a')
{
dlm <- df[!is.na(df[, GDI.growth.percent.name]), ]
breaks <- c(GDP.growth.percent.name, GDI.growth.percent.name,
UR.diff.name)
labels <- c('GDP Percent Growth', 'GDI Percent Growth',
'Change in Unemployment Rate (% points)')
gf <- data.frame(breaks=breaks, labels=labels)
f <- paste(imageDir ,"cross-check-gdi-gdp-ur-tty-", sep="")
chartMulti (df=dlm, graphFrame=gf, saveLocPrefix=f,
plotText=list(heading='TTY Change in GDP, GDI and the Unemployment Rate',
yLabel='Percent/Percentage Points'),
footnote='Source: ABS 5206 and 6202' )
}
for(lag in -3:3)
{
l <- as.character(lag)
# - lag the differenced MLF data
column.name = paste('UR.diff.lagged.', l, sep='')
column.name = sub('-', 'minus.', column.name)
df[, column.name] <- lagger(df[, UR.diff.name], lag)
print('-------------------------------------------')
cat(lag); print(' <== unemployment rate 1st difference lagged by ')
print('-------------------------------------------')
for(fn in c('P', 'I'))
{
# - set up the regressors and regressand
yn <- column.name
if(fn == 'P') { xn <- GDP.growth.percent.name; d <- 'GDP' }
if(fn == 'I') { xn <- GDI.growth.percent.name; d <- 'GDI' }
# - for the oytput stream ...
print('====='); print(d); print('=====')
# - get rid of NA rows
dlm <- df[!is.na(df[, GDI.growth.percent.name]), ] # limit the data set
dlm <- dlm[!is.na(dlm[, xn]) & !is.na(dlm[, yn]), ] # vectorised and
# - get the regression data and regress ...
y <- dlm[, yn]
x <- dlm[, xn]
fit <- lm(y ~ x)
print(summary(fit))
# - Okun: y = mx + b; if y = 0 then x = -b/m
b <- (fit$coefficients)[[1]]
m <- (fit$coefficients)[[2]]
okun <- round(-b/m, 3)
b <- round(b, 3); m <- round(m, 3)
# - Bayesian Information Criterion
r <- residuals(fit)
sigma.hat.residuals <- var(residuals(fit))
T <- length(residuals(fit))
k <- 2 # two coefficients
BIC <- round(T * log(sigma.hat.residuals) + k * log(T), 3)
# - R-squared
r2 <- round(summary(fit)$r.squared, 3)
# - plot
plotTitle <- paste("Okuns Rule (", d, " Growth v delta(UR) lagged ", lag,
" Qs; R-sq=", r2, "; BIC=", BIC, ')', sep='')
f <- paste(imageDir, p, '-', plotTitle, fileEnding, sep="")
plot <- ggplot(data = dlm, mapping = aes_string(x=xn, y=yn)) +
geom_point() +
geom_smooth(method=lm) +
geom_hline(yintercept=0, colour='black', size=0.4, alpha=0.4) +
ylab(paste(p, " change in Unemployment Rate (% points)", sep='')) +
xlab(paste(p, ' change in ', d, ' (%)', sep='')) +
labs(title = plotTitle)
plot <- addFootnote(plot, paste('Source: ABS 5206 and 6202; Okun threshold: ',
okun, '; T=', T, '; y-intercept=', b, '; slope=', m, sep=''))
ggsave(plot, width=plotWidth, height=plotHeight, filename=f, dpi=dpiset)
if(lag == 0)
{
# - an extra plot at lag=0
dlm$okun <- okun
breaks <- c('okun', xn)
labels <- c('Okun', paste(p, d, 'Percent Growth', sep=' ') )
gf <- data.frame(breaks=breaks, labels=labels)
f <- paste(imageDir , p, '-', d, '-v-okun-', sep='')
chartMulti (df=dlm, graphFrame=gf, saveLocPrefix=f,
plotText=list(heading=paste(p, ' Growth in ', d, sep=''),
yLabel='Percent'),
footnote=paste('Source: ABS 5206 and 6202; Okun: ',
okun, sep='') )
}
}
print('-------------------------------------------')
}
}
### ----- tidy up when done
dbDisconnect(conn)
I can never post at work, so usually forget to do so, but thanks for doing this work - v. informative post.
ReplyDelete