Get daily billing amounts by account with Cost Explorer API

Goal of this article

Use Cost Explorer API to get the daily billing amount for each account and output the following data in CSV.

Account Id	Account Name	2022/4/1	2022/4/2	2022/4/3	...	2022/4/30
000000000000	account-0000	42.792716528	40.124716527	43.123416527	...	50.922465287
111111111111	account-1111	32.263379809	30.235379809	31.263353594	...	22.133798094
222222222222	account-2222	751.71034839	720.51234839	772.62033294	...	651.71042035
333333333333	account-3333	4.6428	5.1234	7.8765	...	6.2234
444444444444	account-4444	407.74542211	420.12345211	395.12499518	...	417.99454118
555555555555	account-5555	386.78950595	400.12500509	352.89924506	...	370.75102656
...

An equivalent CSV can be downloaded from the AWS Cost Explorer console. This post describes how to retrieve the data daily automatically.

Example API Request

This is a minimal example of using AWS Lambda to retrieve the daily billing amount for the current month.

import datetime
import boto3

def lambda_handler(event, context):
    today = datetime.date.today()
    start = today.replace(day=1).strftime('%Y-%m-%d')
    end = today.strftime('%Y-%m-%d')
    ce = boto3.client('ce')
    response = ce.get_cost_and_usage(
        TimePeriod={
            'Start': start,
            'End' :  end,
        },
        Granularity='DAILY',
        Metrics=[
            'NetUnblendedCost'
        ]
        GroupBy=[
            {
                'Type': 'DIMENSION',
                'Key': 'LINKED_ACCOUNT'
            }
        ]
    )
    return response['ResultsByTime']

One point to note is that there is a time lag before the cost for a given day is determined. For example, if the cost for April 25 is obtained on April 26, it may be less than the actual billing amount.

The timing of when the AWS data will be updated is not disclosed, but it appears that on April 27, the costs for April 25 will be almost finalized.

Processing with Pandas

Since the API response is a nested JSON, we will consider an example of processing it into a CSV using Pandas.

When using Pandas with AWS Lambda, Lambda Layers must be used.

Since each element contains billing data daily, it is processed one day at a time with a for statement.
After flattening the data using pandas.json_normalize, it is concatenated with the billing amount using pandas.concat.
After further renaming the column to the billing date, the results are merged using the Account Id as the key.

    merged_cost = pandas.DataFrame(
        index=[],
        columns=['Account Id']
    )

    for index, item in enumerate(response):
        normalized_json = pandas.json_normalize(item['Groups'])
        split_keys = pandas.DataFrame(
            normalized_json['Keys'].tolist(),
            columns=['Account Id']
        )
        cost = pandas.concat(
            [split_keys, normalized_json['Metrics.NetUnblendedCost.Amount']],
            axis=1
        )
        renamed_cost = cost.rename(
            columns={'Metrics.NetUnblendedCost.Amount': item['TimePeriod']['Start']}
        )
        merged_cost = pandas.merge(merged_cost, renamed_cost, on='Account Id', how='right')

   print(merged_cost)

Account Id  ...     2022-04-25
0   000000000000  ...  15.4985752779
1   111111111111  ...         0.2176
2   222222222222  ...   6.5567854795
3   333333333333  ...   6.6300957379
4   444444444444  ...   8.2720868504
..           ...  ...            ...
19  777777777777  ...  10.0121863554
18  888888888888  ...   6.5976412116
20  999999999999  ...    6.493243618
[20 rows x 26 columns]

Example of Lambda function

After processing with for statement, the list of account names obtained from AWS Organizations API is merged and output in CSV.

from logging import getLogger, INFO
import os
import datetime
import boto3
import pandas
from botocore.exceptions import ClientError

logger = getLogger()
logger.setLevel(INFO)

def upload_s3(output, key, bucket):
    try:
        s3_resource = boto3.resource('s3')
        s3_bucket = s3_resource.Bucket(bucket)
        s3_bucket.upload_file(output, key, ExtraArgs={'ACL': 'bucket-owner-full-control'})
    except ClientError as err:
        logger.error(err.response['Error']['Message'])
        raise

def get_ou_ids(org, parent_id):
    ou_ids = []

    try:
        paginator = org.get_paginator('list_children')
        iterator = paginator.paginate(
            ParentId=parent_id,
            ChildType='ORGANIZATIONAL_UNIT'
        )
        for page in iterator:
            for ou in page['Children']:
                ou_ids.append(ou['Id'])
                ou_ids.extend(get_ou_ids(org, ou['Id']))
    except ClientError as err:
        logger.error(err.response['Error']['Message'])
        raise
    else:
        return ou_ids

def list_accounts():
    org = boto3.client('organizations')
    root_id = 'r-xxxx'
    ou_id_list = [root_id]
    ou_id_list.extend(get_ou_ids(org, root_id))
    accounts = []

    try:
        for ou_id in ou_id_list:
            paginator = org.get_paginator('list_accounts_for_parent')
            page_iterator = paginator.paginate(ParentId=ou_id)
            for page in page_iterator:
                for account in page['Accounts']:
                    item = [
                        account['Id'],
                        account['Name'],
                    ]
                    accounts.append(item)
    except ClientError as err:
        logger.error(err.response['Error']['Message'])
        raise
    else:
        return accounts

def get_cost_json(start, end):
    ce = boto3.client('ce')
    response = ce.get_cost_and_usage(
        TimePeriod={
            'Start': start,
            'End' :  end,
        },
        Granularity='DAILY',
        Metrics=[
            'NetUnblendedCost'
        ],
        GroupBy=[
            {
                'Type': 'DIMENSION',
                'Key': 'LINKED_ACCOUNT'
            }
        ]
    )
    return response['ResultsByTime']

def lambda_handler(event, context):
    today = datetime.date.today()
    start = today.replace(day=1).strftime('%Y-%m-%d')
    end = today.strftime('%Y-%m-%d')
    key = 'daily-cost-' + today.strftime('%Y-%m') + '.csv'
    output_file = '/tmp/output.csv'
    bucket = os.environ['BUCKET']
    account_list = pandas.DataFrame(list_accounts(), columns=['Account Id', 'Account Name'])
    daily_cost_list = get_cost_json(start, end)

    merged_cost = pandas.DataFrame(
        index=[],
        columns=['Account Id']
    )

    for index, item in enumerate(daily_cost_list):
        normalized_json = pandas.json_normalize(item['Groups'])
        split_keys = pandas.DataFrame(
            normalized_json['Keys'].tolist(),
            columns=['Account Id']
        )
        cost = pandas.concat(
            [split_keys, normalized_json['Metrics.NetUnblendedCost.Amount']],
            axis=1
        )
        renamed_cost = cost.rename(
            columns={'Metrics.NetUnblendedCost.Amount': item['TimePeriod']['Start']}
        )
        merged_cost = pandas.merge(merged_cost, renamed_cost, on='Account Id', how='outer')

    daily_cost = pandas.merge(account_list, merged_cost, on='Account Id', how='right')
    daily_cost.to_csv(output_file, index=False)
    upload_s3(output_file, key, bucket)

Now all that is left is setting an arbitrary startup schedule in EventBridge and a Lambda function as the target.

hayao-k.dev