Wednesday, January 1

Reading ABS Excel files in pandas

Experimental code follows for reading excel files from the Australian Bureau of Statistics.

### ABSExcelLoader.py
### written in python 2.7 and pandas 0.12

import pandas as pd

class ABSExcelLoader:

    def load(self, pathName, freq='M', index_name=None, verbose=False):
        """return a dictionary of pandas DataFrame objects for
           each Data work sheet in an ABS Excel spreadsheet"""

        wb = pd.ExcelFile(pathName)
        returnDict = {}

        for name in wb.sheet_names:
            if not 'Data' in name:
                continue

            df = wb.parse(name, skiprows=8, header=9, index_col=0)
            periods = pd.PeriodIndex(pd.Series(df.index), freq=freq)
            df.set_index(keys=periods, inplace=True)
            df.index.name = index_name
            returnDict[name] = df

            if verbose:
                print ("\nFile: '{}', sheet: '{}'".format(pathName, name))
                print (df.iloc[:min(5, len(df)), :min(5, len(df.columns))])

        return returnDict

No comments:

Post a Comment