Fetching GDP Income Expenditure from ABS API¶
This fetches the National Accounts identity information from here:
Initial Setup¶
You can probably skip this if you're largely just interested in seeing the data returned from the API, if you're interested in seeing how I fetched the data from the API then yeah this might be of interest.
In [1]:
from typing import Any, Callable
from dataclasses import dataclass, field
import requests
import pandas as pd
ROOT_URL = 'https://data.api.abs.gov.au/rest/data'
def parse_observations(resp, value_attr = 'name') -> list[tuple[int, dict[str, str]]]:
data, meta = resp['data']['dataSets'], resp['data']['structures']
attributes: List[List[str]] = [structure['attributes']['observation'] for structure in meta]
dimensions: List[List[Tuple[str, List[str]]]] = [[
(dimension['id'], [v[value_attr] for v in dimension['values']])
for dimension in structure['dimensions']['observation']
] for structure in meta]
return [
dict(**{
d_id: d_values[v_idx]
for d_idx, v_idx in enumerate(map(int, o_dims.split(':')))
for d_id, d_values in [dimensions[dataset['structure']][d_idx]]
}, OBSERVED=observations[0], **{
a_meta['id']: a_meta['values'][a_idx]['name'] if a_meta['values'] else None
for a_idx, a_meta in zip(observations[1:], attributes[dataset['structure']])
})
for dataset in data
for o_dims, observations in dataset['observations'].items()
]
class Transform:
class Base:
pass
@dataclass
class PeriodIndex(Base):
freq: str
datetime_column_dst: str
@staticmethod
def apply(df, src, t):
match t:
case Transform.PeriodIndex(freq, dst):
df[src] = pd.PeriodIndex(df[src], freq=freq)
df[dst] = df[src].dt.start_time
@dataclass
class SmdxDimension:
local_id: str
remote_id: str
drop: bool = field(default=False)
filterable: bool = field(default=True)
transforms: list[Transform.Base] = field(default_factory=lambda: [])
def get_from_params(self, params):
match params.get(self.local_id, ''):
case value if value is None:
return ''
case value if isinstance(value, str):
return value
case value if isinstance(value, list):
return '+'.join(map(str, value))
case value:
return str(value)
@dataclass
class SmdxAttribute:
local_id: str
remote_id: str
drop: bool = field(default=False)
@dataclass
class SmdxDataflow:
version: str
dataflow: str
dimensions: list[SmdxDimension]
attributes: list[SmdxAttribute]
def fetch(self,
start_period=None,
end_period=None,
dimension_at_observation='AllDimensions',
detail='full',
filter=None,
drop=None):
filter = filter or {}
drop_cols = drop or []
dataflow_id = f'ABS,{self.dataflow},{self.version}'
datakey = '.'.join(d.get_from_params(filter) for d in self.dimensions if d.filterable)
headers = { "Accept": "application/vnd.sdmx.data+json" }
url_params = { 'detail': detail, 'dimensionAtObservation': dimension_at_observation }
if start_period:
url_params['startPeriod'] = start_period
if end_period:
url_params['endPeriod'] = end_period
response = requests.get(f'{ROOT_URL}/{dataflow_id}/{datakey}', headers=headers, params=url_params)
df = pd.DataFrame.from_records(parse_observations(response.json()))
df = df.rename(columns={d.remote_id: d.local_id for d in self.dimensions})
df = df.rename(columns={a.remote_id: a.local_id for a in self.attributes})
df = df.rename(columns=dict(OBSERVED='observed'))
for col, t in [(d.local_id, t) for d in self.dimensions for t in d.transforms]:
Transform.apply(df, col, t)
df = df.drop(columns=[d.local_id for d in self.dimensions if d.drop])
df = df.drop(columns=[a.local_id for a in self.attributes if a.drop])
df = df.drop(columns=drop_cols)
return df
Define our data flows¶
We're defining the dataflow for the datapoints we're interested in here.
In [2]:
gdp_expenditure = SmdxDataflow('1.0.0', 'ANA_EXP', [
SmdxDimension('measure', 'MEASURE'),
SmdxDimension('data_item', 'DATA_ITEM'),
SmdxDimension('sector', 'SECTOR'),
SmdxDimension('adjustment_type', 'TSEST'),
SmdxDimension('region', 'REGION', filterable=False, drop=True),
SmdxDimension('quarter', 'TIME_PERIOD', transforms=[
Transform.PeriodIndex(freq='Q', datetime_column_dst='period'),
]),
SmdxDimension('freq', 'FREQ'),
], attributes=[
SmdxAttribute('measure', 'UNIT_MEASURE', drop=True),
SmdxAttribute('kind', 'UNIT_MULT'),
SmdxAttribute('observation_status', 'OBS_STATUS', drop=True),
SmdxAttribute('observation_comment', 'OBS_COMMENT', drop=True),
])
gdp_income = SmdxDataflow('1.0.0', 'ANA_INC', dimensions=[
SmdxDimension('measure', 'MEASURE'),
SmdxDimension('data_item', 'DATA_ITEM'),
SmdxDimension('sector', 'SECTOR'),
SmdxDimension('adjustment_type', 'TSEST'),
SmdxDimension('region', 'REGION', filterable=False, drop=True),
SmdxDimension('quarter', 'TIME_PERIOD', transforms=[
Transform.PeriodIndex(freq='Q', datetime_column_dst='period'),
]),
SmdxDimension('freq', 'FREQ'),
], attributes=[
SmdxAttribute('measure', 'UNIT_MEASURE', drop=True),
SmdxAttribute('kind', 'UNIT_MULT'),
SmdxAttribute('observation_status', 'OBS_STATUS', drop=True),
SmdxAttribute('observation_comment', 'OBS_COMMENT', drop=True),
])
Fetching our data¶
We're fetching since the first quarter of 2023, as well as creating additinal rows from processing others:
- Create a
Net Imports and Exports
for each case ofAll sectors
.
In [3]:
from IPython.display import HTML
import numpy as np
start_period = '2023-Q1'
SEASONLY_ADJUSTED = 20
default_kwargs = dict(adjustment_type=SEASONLY_ADJUSTED, freq='Q')
inc_df = gdp_income.fetch(start_period=start_period, drop=['adjustment_type', 'freq'], filter=dict(measure='C', **default_kwargs))
exp_df = gdp_expenditure.fetch(start_period=start_period, drop=['adjustment_type', 'freq'], filter=dict(measure='VCH', **default_kwargs))
exp_df = pd.concat([
exp_df,
(
exp_df[exp_df['sector'] == 'All sectors']
.groupby(['period', 'sector', 'quarter'], group_keys=True)
.apply(lambda g: pd.DataFrame([{
'kind': 'Millions',
'data_item': 'Net Imports and Exports',
'observed': g.loc[g['data_item'] == 'Exports of goods and services', 'observed'].values[0]
- g.loc[g['data_item'] == 'Imports of goods and services', 'observed'].values[0]
}]), include_groups=False)
.reset_index()
.drop(columns=['level_3'])
),
]).reset_index(drop=True)
all_df = pd.concat([
exp_df.assign(source='Expenditure'),
inc_df.assign(source='Income')
])
identity_choices, identity_conditions = zip(*[
('NX', (all_df['sector'] == 'All sectors') & (all_df['data_item'] == 'Net Imports and Exports')),
('M', (all_df['sector'] == 'All sectors') & (all_df['data_item'] == 'Imports of goods and services')),
('X', (all_df['sector'] == 'All sectors') & (all_df['data_item'] == 'Exports of goods and services')),
('T', (all_df['sector'] == 'All sectors') & (all_df['data_item'] == 'Taxes less subsidies on production and imports')),
('Y', (all_df['sector'] == 'All sectors') & (all_df['data_item'] == 'Gross domestic product')),
('ε', (all_df['sector'] == 'All sectors') & (all_df['data_item'] == 'Statistical discrepancy (E)')),
('C', (all_df['sector'] == 'Households') & (all_df['data_item'] == 'Final consumption expenditure')),
('I', (all_df['sector'] == 'Private') & (all_df['data_item'] == 'Gross fixed capital formation - Total private business investment')),
('G', (all_df['sector'] == 'General government') & (all_df['data_item'] == 'Final consumption expenditure')),
])
all_df['component'] = np.select(identity_conditions, identity_choices, default=None)
display(HTML('<h1>Preview Data</h1>'))
display(HTML('<h2>First few rows of Income</h2>'))
display(inc_df.head())
display(HTML('<h2>First few rows of Expenditure</h2>'))
display(exp_df.head())
Preview Data
First few rows of Income
data_item | sector | quarter | observed | kind | period | |
---|---|---|---|---|---|---|
0 | Total factor income | All sectors | 2023Q1 | 590609 | Units | 2023-01-01 |
1 | Total factor income | All sectors | 2023Q2 | 585782 | Units | 2023-04-01 |
2 | Total factor income | All sectors | 2023Q3 | 592649 | Units | 2023-07-01 |
3 | Total factor income | All sectors | 2023Q4 | 600978 | Units | 2023-10-01 |
4 | Total factor income | All sectors | 2024Q1 | 609819 | Units | 2024-01-01 |
First few rows of Expenditure
data_item | sector | quarter | observed | kind | period | |
---|---|---|---|---|---|---|
0 | Domestic final demand | All sectors | 2023Q1 | 611018 | Millions | 2023-01-01 |
1 | Domestic final demand | All sectors | 2023Q2 | 617648 | Millions | 2023-04-01 |
2 | Domestic final demand | All sectors | 2023Q3 | 621220 | Millions | 2023-07-01 |
3 | Domestic final demand | All sectors | 2023Q4 | 621980 | Millions | 2023-10-01 |
4 | Domestic final demand | All sectors | 2024Q1 | 625318 | Millions | 2024-01-01 |
Inspecting our data¶
We're going to outout the contents of our data.
In [4]:
from IPython.display import HTML
import seaborn as sns
import matplotlib.pyplot as plt
def pivot(df):
return df.pivot(index=['sector', 'data_item'], columns='quarter', values='observed')
main_data_items = [
'Final consumption expenditure',
'Net Imports and Exports',
'Imports of goods and services',
'Exports of goods and services',
'Gross domestic product',
'Gross fixed capital formation',
'Gross fixed capital formation - Total private business investment',
'Statistical discrepancy (E)',
'Changes in inventories',
'Domestic final demand',
]
main_sectors = [
'All sectors',
'General government',
'Households',
'Private',
'Public',
'Public corporations',
]
predicate = exp_df['data_item'].isin(main_data_items) & exp_df['sector'].isin(main_sectors)
display(HTML('<h1>Australian National Accounts</h1>'))
display(HTML('<h2><font color="red">Expenditure</font> on Gross Domestic Product (GDP (E))</h2>'))
display(HTML('<p>Main <font color="red">Expenditure</font> items.</p>'))
display(pivot(exp_df[predicate]))
display(HTML(f"""
<details>
<summary>Click here, to see omitted <font color="red">Expenditure</font> items</summary>
{pivot(exp_df[~predicate]).to_html()}
</details>
"""))
display(HTML('<h2><font color="blue">Income</font> from Gross Domestic Product (GDP (I))</h2>'))
display(pivot(inc_df))
display(HTML('<h2 id="questions"><font color="green">Accounting Identities?</font></h2>'))
display(HTML("""
<p>
Presumable these are the correct accounting identities. I'm not entire sure, as:
</p>
<ul>
<li>I haven't really done anything with the sectors "Pubic" or "Public corporations"</li>
<li>
I am wondering if I should be using <b>Gross fixed capital formation - Total private business investment</b>
or just <b>Gross fixed capital formation</b> for Private sector?
</li>
<li>Maybe I'm just suppose to use <b>Changes in inventories</b> from All sectors?</li>
<li>
By the same token am I suppose to be using <b>Domestic final demand</b>? I think I might need to use
that over <b>Household, final consumption expenditure</b>.
</li>
<li>
Do I need to use <b>Gross fixed capital formation</b> for the <b>Government Sector</b>?
</li>
</ul>
"""))
display(all_df[all_df['component'].notna()].pivot(index=['sector', 'data_item', 'component'], columns='quarter', values='observed'))
fig, ax = plt.subplots(1, 2, figsize=(20, 7))
categories = all_df['component'].astype('category').cat.categories
for i, label, subset in [
(0, '', all_df[all_df['component'].isin(['Y', 'I', 'G', 'NX', 'T', 'C'])]),
(1, '(All Components)', all_df[all_df['component'].notna()]),
]:
sns.lineplot(data=subset, ax=ax[i],
x='period', y='observed', palette='hsv', markers=True,
style='component', hue='component', hue_order=categories)
ax[i].set_title(f'\nComponents of the Expenditure model of GDP {label}\n', fontsize=20)
ax[i].set_ylabel('Observed ($m) (log scaled)', fontsize=12)
ax[i].set_ylim(20000)
ax[i].set_yscale('log')
ax[i].set_xlabel('Quarter', fontsize=12)
ax[i].legend(loc='lower left')
Australian National Accounts
Expenditure on Gross Domestic Product (GDP (E))
Main Expenditure items.
quarter | 2023Q1 | 2023Q2 | 2023Q3 | 2023Q4 | 2024Q1 | 2024Q2 | 2024Q3 | 2024Q4 | |
---|---|---|---|---|---|---|---|---|---|
sector | data_item | ||||||||
All sectors | Changes in inventories | 5894 | -2587 | 48 | -983 | 3312 | 1495 | -298 | 38 |
Domestic final demand | 611018 | 617648 | 621220 | 621980 | 625318 | 627193 | 631789 | 635181 | |
Exports of goods and services | 171752 | 178614 | 181050 | 177067 | 177460 | 178567 | 178891 | 180122 | |
Final consumption expenditure | 460912 | 463857 | 465645 | 466321 | 470016 | 471477 | 473313 | 475652 | |
Gross domestic product | 644110 | 646243 | 649635 | 650218 | 651335 | 652593 | 654676 | 658495 | |
Gross fixed capital formation | 150127 | 153854 | 155575 | 155659 | 155302 | 155716 | 158475 | 159528 | |
Imports of goods and services | 144447 | 147947 | 152007 | 147709 | 155938 | 156513 | 156159 | 156342 | |
Net Imports and Exports | 27305 | 30667 | 29043 | 29358 | 21522 | 22054 | 22732 | 23780 | |
Statistical discrepancy (E) | -157 | 701 | -676 | -137 | 1184 | 1852 | 454 | -504 | |
General government | Final consumption expenditure | 136783 | 138052 | 140110 | 140578 | 142590 | 144668 | 146700 | 147679 |
Gross fixed capital formation | 25266 | 25975 | 26275 | 25301 | 25234 | 24704 | 26579 | 26256 | |
Households | Final consumption expenditure | 324131 | 325812 | 325535 | 325743 | 327426 | 326809 | 326613 | 327973 |
Private | Gross fixed capital formation | 116700 | 118911 | 119363 | 120362 | 120264 | 121016 | 120985 | 121381 |
Gross fixed capital formation - Total private business investment | 74120 | 75863 | 76143 | 77767 | 77666 | 77844 | 77137 | 77658 | |
Public | Gross fixed capital formation | 33429 | 34945 | 36212 | 35297 | 35038 | 34699 | 37490 | 38148 |
Public corporations | Gross fixed capital formation | 8162 | 8965 | 9937 | 9996 | 9804 | 9996 | 10911 | 11892 |
Click here, to see omitted Expenditure items
quarter | 2023Q1 | 2023Q2 | 2023Q3 | 2023Q4 | 2024Q1 | 2024Q2 | 2024Q3 | 2024Q4 | |
---|---|---|---|---|---|---|---|---|---|
sector | data_item | ||||||||
All sectors | Gross national expenditure | 616989 | 615030 | 621268 | 620997 | 628630 | 628687 | 631491 | 635219 |
General government - National | Final consumption expenditure | 63149 | 63740 | 64780 | 65384 | 66717 | 68052 | 68371 | 68743 |
Final consumption expenditure - Defence | 11361 | 11501 | 11991 | 11593 | 11670 | 11494 | 11333 | 11439 | |
Final consumption expenditure - Non-defence | 51783 | 52236 | 52789 | 53791 | 55047 | 56558 | 57038 | 57304 | |
Gross fixed capital formation | 5415 | 5759 | 5603 | 5542 | 5830 | 5877 | 7280 | 6532 | |
Gross fixed capital formation - Defence | 2843 | 3191 | 2812 | 2752 | 3012 | 3129 | 4346 | 3737 | |
Gross fixed capital formation - Non-defence | 2567 | 2574 | 2791 | 2790 | 2819 | 2748 | 2934 | 2795 | |
General government - State and local | Final consumption expenditure | 73631 | 74310 | 75330 | 75194 | 75873 | 76617 | 78329 | 78937 |
Gross fixed capital formation | 19852 | 20216 | 20672 | 19759 | 19404 | 18827 | 19299 | 19724 | |
Private | Gross fixed capital formation - Cultivated biological resources | 1440 | 1466 | 1377 | 1336 | 1278 | 1233 | 1192 | 1169 |
Gross fixed capital formation - Dwellings - Alterations and additions | 13923 | 13705 | 13709 | 13239 | 13394 | 13374 | 13494 | 13376 | |
Gross fixed capital formation - Dwellings - New and Used | 19797 | 20198 | 20129 | 19597 | 19682 | 19912 | 20300 | 20274 | |
Gross fixed capital formation - Dwellings - Total | 33721 | 33909 | 33838 | 32836 | 33075 | 33285 | 33794 | 33650 | |
Gross fixed capital formation - Intellectual property products | 14643 | 14795 | 15277 | 15613 | 16247 | 16706 | 16961 | 17396 | |
Gross fixed capital formation - Intellectual property products - Artistic originals | 588 | 530 | 565 | 602 | 648 | 700 | 709 | 719 | |
Gross fixed capital formation - Intellectual property products - Computer software | 7774 | 7811 | 8153 | 8549 | 8945 | 9365 | 9609 | 9895 | |
Gross fixed capital formation - Intellectual property products - Mineral and petroleum exploration | 1286 | 1337 | 1413 | 1303 | 1412 | 1295 | 1259 | 1344 | |
Gross fixed capital formation - Intellectual property products - Research and development | 4999 | 5125 | 5146 | 5159 | 5242 | 5346 | 5385 | 5438 | |
Gross fixed capital formation - Machinery and equipment - Net purchase of second hand assets | -1856 | -1905 | -2128 | -2183 | -2193 | -2401 | -2348 | -2406 | |
Gross fixed capital formation - Machinery and equipment - New | 27827 | 28943 | 29072 | 28714 | 29412 | 28803 | 28967 | 28936 | |
Gross fixed capital formation - Machinery and equipment - Total | 25971 | 27038 | 26944 | 26532 | 27219 | 26402 | 26620 | 26530 | |
Gross fixed capital formation - Non-dwelling construction - Net purchase of second hand assets | -845 | -829 | -1155 | -906 | -866 | -773 | -1217 | -1054 | |
Gross fixed capital formation - Non-dwelling construction - New building | 15147 | 15363 | 15150 | 16291 | 15653 | 15938 | 15324 | 14715 | |
Gross fixed capital formation - Non-dwelling construction - New engineering construction | 17770 | 18070 | 18550 | 18902 | 18135 | 18339 | 18256 | 18903 | |
Gross fixed capital formation - Non-dwelling construction - Total | 32072 | 32604 | 32546 | 34287 | 32922 | 33503 | 32364 | 32563 | |
Gross fixed capital formation - Ownership transfer costs | 8924 | 9174 | 9382 | 9759 | 9523 | 9887 | 10055 | 10073 | |
Public corporations - Commonwealth | Gross fixed capital formation | 2086 | 2399 | 2832 | 2624 | 2667 | 2563 | 2698 | 2916 |
Public corporations - State and local | Gross fixed capital formation | 6076 | 6567 | 7105 | 7372 | 7137 | 7433 | 8213 | 8976 |
Income from Gross Domestic Product (GDP (I))
quarter | 2023Q1 | 2023Q2 | 2023Q3 | 2023Q4 | 2024Q1 | 2024Q2 | 2024Q3 | 2024Q4 | |
---|---|---|---|---|---|---|---|---|---|
sector | data_item | ||||||||
All sectors | Compensation of employees - Employers' social contributions | 32853 | 33550 | 34676 | 35400 | 36080 | 36735 | 37559 | 38452 |
Compensation of employees - Wages and salaries | 264616 | 269366 | 275708 | 279754 | 282578 | 285816 | 290143 | 295889 | |
GROSS DOMESTIC PRODUCT | 650642 | 647528 | 656805 | 665948 | 675166 | 676383 | 679872 | 690644 | |
Gross mixed income | 43675 | 42030 | 42266 | 42123 | 42859 | 42828 | 42396 | 42501 | |
Gross operating surplus | 249465 | 240836 | 240000 | 243700 | 248302 | 246219 | 242243 | 244931 | |
Statistical discrepancy (I) | -769 | 658 | 1037 | 993 | 331 | 293 | 1041 | 1324 | |
Taxes less subsidies on production and imports | 60803 | 61087 | 63119 | 63977 | 65016 | 64491 | 66489 | 67546 | |
Total compensation of employees | 297469 | 302916 | 310384 | 315154 | 318658 | 322551 | 327703 | 334341 | |
Total factor income | 590609 | 585782 | 592649 | 600978 | 609819 | 611599 | 612342 | 621774 | |
Dwellings owned by persons | Gross operating surplus | 46774 | 48342 | 50104 | 51572 | 53061 | 54595 | 56068 | 57273 |
Financial corporations | Gross operating surplus | 27556 | 27911 | 28432 | 28805 | 29225 | 29562 | 29913 | 30442 |
General government | Gross operating surplus | 13625 | 13885 | 14046 | 14306 | 14610 | 14959 | 15335 | 15684 |
Non-financial corporations | Gross operating surplus | 161510 | 150699 | 147418 | 149018 | 151405 | 147103 | 140926 | 141531 |
Private non-financial corporations | Gross operating surplus | 156647 | 145757 | 142072 | 143525 | 145627 | 140998 | 135168 | 135919 |
Public non-financial corporations | Gross operating surplus | 4863 | 4942 | 5346 | 5493 | 5778 | 6104 | 5758 | 5612 |
Total corporations | Gross operating surplus | 189066 | 178610 | 175850 | 177823 | 180631 | 176665 | 170839 | 171974 |
Accounting Identities?
Presumable these are the correct accounting identities. I'm not entire sure, as:
- I haven't really done anything with the sectors "Pubic" or "Public corporations"
- I am wondering if I should be using Gross fixed capital formation - Total private business investment or just Gross fixed capital formation for Private sector?
- Maybe I'm just suppose to use Changes in inventories from All sectors?
- By the same token am I suppose to be using Domestic final demand? I think I might need to use that over Household, final consumption expenditure.
- Do I need to use Gross fixed capital formation for the Government Sector?
quarter | 2023Q1 | 2023Q2 | 2023Q3 | 2023Q4 | 2024Q1 | 2024Q2 | 2024Q3 | 2024Q4 | ||
---|---|---|---|---|---|---|---|---|---|---|
sector | data_item | component | ||||||||
All sectors | Exports of goods and services | X | 171752 | 178614 | 181050 | 177067 | 177460 | 178567 | 178891 | 180122 |
Gross domestic product | Y | 644110 | 646243 | 649635 | 650218 | 651335 | 652593 | 654676 | 658495 | |
Imports of goods and services | M | 144447 | 147947 | 152007 | 147709 | 155938 | 156513 | 156159 | 156342 | |
Net Imports and Exports | NX | 27305 | 30667 | 29043 | 29358 | 21522 | 22054 | 22732 | 23780 | |
Statistical discrepancy (E) | ε | -157 | 701 | -676 | -137 | 1184 | 1852 | 454 | -504 | |
Taxes less subsidies on production and imports | T | 60803 | 61087 | 63119 | 63977 | 65016 | 64491 | 66489 | 67546 | |
General government | Final consumption expenditure | G | 136783 | 138052 | 140110 | 140578 | 142590 | 144668 | 146700 | 147679 |
Households | Final consumption expenditure | C | 324131 | 325812 | 325535 | 325743 | 327426 | 326809 | 326613 | 327973 |
Private | Gross fixed capital formation - Total private business investment | I | 74120 | 75863 | 76143 | 77767 | 77666 | 77844 | 77137 | 77658 |