Get daily billing amounts by account with Cost Explorer API

·

4 min read

Goal of this article

Use Cost Explorer API to get the daily billing amount for each account and output the following data in CSV.

Account IdAccount Name2022/4/12022/4/22022/4/3...2022/4/30
000000000000account-000042.79271652840.12471652743.123416527...50.922465287
111111111111account-111132.26337980930.23537980931.263353594...22.133798094
222222222222account-2222751.71034839720.51234839772.62033294...651.71042035
333333333333account-33334.64285.12347.8765...6.2234
444444444444account-4444407.74542211420.12345211395.12499518...417.99454118
555555555555account-5555386.78950595400.12500509352.89924506...370.75102656
...

An equivalent CSV can be downloaded from the AWS Cost Explorer console. This post describes how to retrieve the data daily automatically.

Example API Request

This is a minimal example of using AWS Lambda to retrieve the daily billing amount for the current month.

import datetime
import boto3

def lambda_handler(event, context):
    today = datetime.date.today()
    start = today.replace(day=1).strftime('%Y-%m-%d')
    end = today.strftime('%Y-%m-%d')
    ce = boto3.client('ce')
    response = ce.get_cost_and_usage(
        TimePeriod={
            'Start': start,
            'End' :  end,
        },
        Granularity='DAILY',
        Metrics=[
            'NetUnblendedCost'
        ]
        GroupBy=[
            {
                'Type': 'DIMENSION',
                'Key': 'LINKED_ACCOUNT'
            }
        ]
    )
    return response['ResultsByTime']

One point to note is that there is a time lag before the cost for a given day is determined. For example, if the cost for April 25 is obtained on April 26, it may be less than the actual billing amount.

The timing of when the AWS data will be updated is not disclosed, but it appears that on April 27, the costs for April 25 will be almost finalized.

Processing with Pandas

Since the API response is a nested JSON, we will consider an example of processing it into a CSV using Pandas.

When using Pandas with AWS Lambda, Lambda Layers must be used.

  • Since each element contains billing data daily, it is processed one day at a time with a for statement.
  • After flattening the data using pandas.json_normalize, it is concatenated with the billing amount using pandas.concat.
  • After further renaming the column to the billing date, the results are merged using the Account Id as the key.
    merged_cost = pandas.DataFrame(
        index=[],
        columns=['Account Id']
    )

    for index, item in enumerate(response):
        normalized_json = pandas.json_normalize(item['Groups'])
        split_keys = pandas.DataFrame(
            normalized_json['Keys'].tolist(),
            columns=['Account Id']
        )
        cost = pandas.concat(
            [split_keys, normalized_json['Metrics.NetUnblendedCost.Amount']],
            axis=1
        )
        renamed_cost = cost.rename(
            columns={'Metrics.NetUnblendedCost.Amount': item['TimePeriod']['Start']}
        )
        merged_cost = pandas.merge(merged_cost, renamed_cost, on='Account Id', how='right')

   print(merged_cost)
Account Id  ...     2022-04-25
0   000000000000  ...  15.4985752779
1   111111111111  ...         0.2176
2   222222222222  ...   6.5567854795
3   333333333333  ...   6.6300957379
4   444444444444  ...   8.2720868504
..           ...  ...            ...
19  777777777777  ...  10.0121863554
18  888888888888  ...   6.5976412116
20  999999999999  ...    6.493243618
[20 rows x 26 columns]

Example of Lambda function

After processing with for statement, the list of account names obtained from AWS Organizations API is merged and output in CSV.

from logging import getLogger, INFO
import os
import datetime
import boto3
import pandas
from botocore.exceptions import ClientError

logger = getLogger()
logger.setLevel(INFO)

def upload_s3(output, key, bucket):
    try:
        s3_resource = boto3.resource('s3')
        s3_bucket = s3_resource.Bucket(bucket)
        s3_bucket.upload_file(output, key, ExtraArgs={'ACL': 'bucket-owner-full-control'})
    except ClientError as err:
        logger.error(err.response['Error']['Message'])
        raise

def get_ou_ids(org, parent_id):
    ou_ids = []

    try:
        paginator = org.get_paginator('list_children')
        iterator = paginator.paginate(
            ParentId=parent_id,
            ChildType='ORGANIZATIONAL_UNIT'
        )
        for page in iterator:
            for ou in page['Children']:
                ou_ids.append(ou['Id'])
                ou_ids.extend(get_ou_ids(org, ou['Id']))
    except ClientError as err:
        logger.error(err.response['Error']['Message'])
        raise
    else:
        return ou_ids

def list_accounts():
    org = boto3.client('organizations')
    root_id = 'r-xxxx'
    ou_id_list = [root_id]
    ou_id_list.extend(get_ou_ids(org, root_id))
    accounts = []

    try:
        for ou_id in ou_id_list:
            paginator = org.get_paginator('list_accounts_for_parent')
            page_iterator = paginator.paginate(ParentId=ou_id)
            for page in page_iterator:
                for account in page['Accounts']:
                    item = [
                        account['Id'],
                        account['Name'],
                    ]
                    accounts.append(item)
    except ClientError as err:
        logger.error(err.response['Error']['Message'])
        raise
    else:
        return accounts

def get_cost_json(start, end):
    ce = boto3.client('ce')
    response = ce.get_cost_and_usage(
        TimePeriod={
            'Start': start,
            'End' :  end,
        },
        Granularity='DAILY',
        Metrics=[
            'NetUnblendedCost'
        ],
        GroupBy=[
            {
                'Type': 'DIMENSION',
                'Key': 'LINKED_ACCOUNT'
            }
        ]
    )
    return response['ResultsByTime']

def lambda_handler(event, context):
    today = datetime.date.today()
    start = today.replace(day=1).strftime('%Y-%m-%d')
    end = today.strftime('%Y-%m-%d')
    key = 'daily-cost-' + today.strftime('%Y-%m') + '.csv'
    output_file = '/tmp/output.csv'
    bucket = os.environ['BUCKET']
    account_list = pandas.DataFrame(list_accounts(), columns=['Account Id', 'Account Name'])
    daily_cost_list = get_cost_json(start, end)

    merged_cost = pandas.DataFrame(
        index=[],
        columns=['Account Id']
    )

    for index, item in enumerate(daily_cost_list):
        normalized_json = pandas.json_normalize(item['Groups'])
        split_keys = pandas.DataFrame(
            normalized_json['Keys'].tolist(),
            columns=['Account Id']
        )
        cost = pandas.concat(
            [split_keys, normalized_json['Metrics.NetUnblendedCost.Amount']],
            axis=1
        )
        renamed_cost = cost.rename(
            columns={'Metrics.NetUnblendedCost.Amount': item['TimePeriod']['Start']}
        )
        merged_cost = pandas.merge(merged_cost, renamed_cost, on='Account Id', how='outer')

    daily_cost = pandas.merge(account_list, merged_cost, on='Account Id', how='right')
    daily_cost.to_csv(output_file, index=False)
    upload_s3(output_file, key, bucket)

Now all that is left is setting an arbitrary startup schedule in EventBridge and a Lambda function as the target.