Downloading EIA’s data with Python

Below is a snippet to download the many data sets available on the US DoE‘s website (www.eia.gov).

Required user’s inputs are a EIA token and the codes for the series one wants to download.

Output is a pandas dataframe. Dates returned are strings. It is easy to parse them as Python date objects, anyway.

I mean nice things can be achieved with Python with only little prior time spent.

#############################################################
# Edouard TALLENT @TaGoMa.Tech -  November, 2014            #
# EIA data interface                                        #
# QuantCorner @ http://quantcorner.wordpress.com            #
#############################################################

import json
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from urllib import request
from urllib.error import URLError, HTTPError

class EIAgov(object):
    def __init__(self, token, series):
        '''
        Purpose:
        Initialise the EIAgov class by requesting:
        - EIA token
        - id code(s) of the series to be downloaded

        Parameters:
        - token: string
        - series: string or list of strings
        '''
        self.token = token
        self.series = series

    '''
    def __repr__(self):
        return str(self.series)
    '''

    def Raw(self, ser):
        # Construct url
        url = 'http://api.eia.gov/series/?api_key=' + self.token + '&series_id=' + ser.upper()

        try:
            # URL request, URL opener, read content
            req = request.Request(url)
            opener = request.urlopen(req)
            content = opener.read().decode() # Convert bytes to UTF-8

            # Jsonify 'content' object
            jso = json.loads(content)
            return jso

        except HTTPError as e:
            print('HTTP error type.')
            print('Error code: ', e.code)

        except URLError as e:
            print('URL type error.')
            print('Reason: ', e.reason)

    def GetData(self):
        # Deal with the date series                       
        date_ = self.Raw(self.series[0])        
        date_series = date_['series'][0]['data']
        end = len(date_series)
        date = []
        for i in range (end):
            date.append(date_series[i][0])
        df = pd.DataFrame(data=date)
        df.columns = ['Date']

        # Deal with data
        for j in range (0, len(self.series)):
            data_ = self.Raw(self.series[j])
            data_series = data_['series'][0]['data']
            data = []
            for k in range (end):       # 'end' is defined above
                data.append(data_series[k][1])
            df[self.series[j]] = data
                         
        return df

if __name__ == '__main__':
    tok = '[YOUR_TOKEN_HERE]'
    
    '''
    # Electricity - Monthly data
    test1 = ['ELEC.REV.AL-ALL.M', 'ELEC.REV.AK-ALL.M', 'ELEC.REV.CA-ALL.M']
    data = EIAgov(tok, test1)
    print(data.GetData())
    '''
    
    '''
    # Petroleum and products imports - quarterly data
    test2 = ['STEO.RNNIPUS.Q', 'STEO.PAIMPORT.Q', 'STEO.UONIPUS.Q']
    data = EIAgov(tok, test2)
    print(data.GetData())
    '''
    # Petroleum and products supply - annual data
    test3 = ['STEO.DFPSPP1.A', 'STEO.DFPSPP2.A', 'STEO.DFPSPP3.A', 'STEO.DFPSPP4.A', 'STEO.DFPSPP5.A']
    data = EIAgov(tok, test3)
    print(data.GetData())

'''

Out[9]: 
    Date  STEO.DFPSPP1.A  STEO.DFPSPP2.A  STEO.DFPSPP3.A  STEO.DFPSPP4.A  \
0   2015        42.18884        30.62301        43.30924        3.864321   
1   2014        40.18176        29.59070        40.47515        3.914372   
2   2013        38.93400        29.51000        41.49300        3.801000   
3   2012        45.23200        31.33400        38.80100        4.099000   
4   2011        55.27200        33.36300        42.05700        4.159000   
5   2010        62.74800        32.04500        51.13200        3.718000   
6   2009        68.27500        32.28200        48.85400        3.125000   
7   2008        56.71100        32.65600        39.70400        3.022000   
8   2007        55.68100        30.13400        31.27700        3.279000   
9   2006        68.61800        27.12200        32.51800        3.234000   
10  2005        58.58100        29.10700        31.83400        2.886000   
11  2004        50.32600        29.65100        29.83600        3.282000   
12  2003        56.78900        33.34400        31.49000        3.481000   
13  2002        54.50100        31.53200        31.94600        3.791000   
14  2001        62.07200        33.78300        32.75300        3.407000   
15  2000        41.09300        29.60700        31.28500        3.316000   
16  1999        48.46400        32.09100        29.54600        3.262000   
17  1998        76.36700        33.44000        31.16400        3.053000   
18  1997        59.93200        31.22600        31.96500        2.824000   
19  1996        47.39000        32.09400        31.50100        2.891000   
20  1995        51.97100        31.45900        29.94800        3.094000   
21  1994        66.31400        34.69100        28.32000        3.152000   
22  1993        62.53900        34.36000        29.01700        2.794000   
23  1992        65.08000        31.29700        30.78100        2.632000   
24  1991        63.42500        32.98700        31.73300        3.240000   
25  1990        55.58700        32.70600        28.15100        3.273000   

    STEO.DFPSPP5.A  
0         14.39004  
1         14.26472  
2         13.80500  
3         15.34300  
4         14.36100  
5         14.66300  
6         13.42800  
7         13.92000  
8         13.57300  
9         12.15900  
10        13.61400  
11        13.17700  
12        11.43800  
13        12.31500  
14        12.49800  
15        12.72600  
16        12.10000  
17        12.05100  
18        12.48000  
19        12.85300  
20        13.74200  
21        12.74300  
22        12.15700  
23        10.81700  
24        12.13000  
25        12.48700  

'''