#!/usr/bin/env python # coding: utf-8 # ## XBRL US API - Python example # This sample Python code queries the XBRL US Public Filings Database; it is based on a notebook created by [Ties de Kok](https://www.tiesdekok.com). # ### Authenticate for access token # Run the cell below, then type your XBRL US Web account email, account password, Client ID, and secret (get these from https://xbrl.us/access-token), pressing the Enter key on the keyboard after each entry. # # XBRL US limits records returned for a query to improve efficiency; this script loops to collect all data from the Public Filings Database for a query. **Non-members might not be able to return all data for a query** - join XBRL US for comprehensive access - https://xbrl.us/join. # In[ ]: print('Enter your XBRL US Web account email: ') import os, re, sys, json import requests import pandas as pd from IPython.display import display, HTML import numpy as np import getpass from datetime import datetime import urllib from urllib.parse import urlencode email = input() password = getpass.getpass(prompt='Password: ') clientid = getpass.getpass(prompt='Client ID: ') secret = getpass.getpass(prompt='Secret: ') body_auth = {'username' : ''.join(email), 'client_id': ''.join(clientid), 'client_secret' : ''.join(secret), 'password' : ''.join(password), 'grant_type' : 'password', 'platform' : 'ipynb' } payload = urlencode(body_auth) url = 'https://api.xbrl.us/oauth2/token' headers = {"Content-Type": "application/x-www-form-urlencoded"} res = requests.request("POST", url, data=payload, headers=headers) auth_json = res.json() if 'error' in auth_json: print ("\n\nThere was a problem generating an access token with these credentials. Run the first cell again to enter credentials.") else: print ("\n\nYour access token expires in 60 minutes. After it expires, run the cell immediately below this one to generate a new token and continue to use the query cell. \n\nFor now, skip ahead to the section 'Make a Query'.") access_token = auth_json['access_token'] refresh_token = auth_json['refresh_token'] newaccess = '' newrefresh = '' #print('access token: ' + access_token + ' refresh token: ' + refresh_token) # #### Refresh token # The cell below is only needed to refresh an expired access token after 60 minutes. When the access token no longer returns results, run the cell below to refresh the access token or re-enter credentials by running the cell above. Until the refresh token process is needed, skip ahead to **Make a Query**. # # In[ ]: token = token if newrefresh != '' else refresh_token refresh_auth = {'client_id': ''.join(clientid), 'client_secret' : ''.join(secret), 'grant_type' : 'refresh_token', 'platform' : 'ipynb', 'refresh_token' : ''.join(token) } refreshres = requests.post(url, data=refresh_auth) refresh_json = refreshres.json() access_token = refresh_json['access_token'] refresh_token = refresh_json['refresh_token']#print('access token: ' + access_token + 'refresh token: ' + refresh_token) print('Your access token is refreshed for 60 minutes. If it expires again, run this cell to generate a new token and continue to use the query cells below.') print(access_token) # ### Make a query # After the access token confirmation appears above, you can modify the query below, then use the **_Cell >> Run_** menu option from the cell **immediately below this text** to run the entire query for results. # # The sample results are from 10+ years of data for companies in an SIC code, and may take several minutes to recreate. **To test for results quickly, modify the _params_** to comment out report.sic-code and uncomment entity.cik and period.fiscal-year so the search runs for several companies across a few years. # # Refer to XBRL API documentation at https://xbrlus.github.io/xbrl-api/#/Facts/getFactDetails for other endpoints and parameters to filter and return. # In[ ]: # Define the parameters for the filter and fields to be returned, # run the loop to return results offset_value = 0 res_df = [] # Define the parameters of the query - this query returns all of the most-recent # reported fiscal year values for all years as defined below (XBRL_Elements) # in companies reporting with SIC code 2080 XBRL_Elements = ["Assets", "AssetsCurrent", "Liabilities", "LiabilitiesAndStockholdersEquity", "CashCashEquivalentsAndShortTermInvestments" ] sic_code = [2080 ] periods = ['Y'] # In the default example, the companies_cik and years parameters # are commented out and not used in the query. Comment out report.sic-code # and uncomment entity.cik and period.fiscal-year to change the query. years = [2021, 2020, 2019, 2018, 2017 ] companies_cik = ['0000789019', ## Microsoft (MSFT) '0001018724', ## Amazon (AMZN) '0000320193', ## Apple (AAPL) '0000051143' ## IBM (IBM) ] # Define data fields to return (multi-sort based on order) fields = [ # this is the list of the characteristics of the data being returned by the query 'period.fiscal-year.sort(DESC)', 'entity.name.sort(ASC)', 'concept.local-name.sort(ASC)', 'fact.value', 'unit', 'fact.decimals', 'report.filing-date', 'report.sic-code' ] string_sic = [str(int) for int in sic_code] string_years = [str(int) for int in years] params = { # this is the list of what's being queried against the search endpoint 'concept.local-name': ','.join(XBRL_Elements), 'report.sic-code': ','.join(string_sic), #'entity.cik': ','.join(companies_cik), #'period.fiscal-year': ','.join(string_years), 'period.fiscal-period': ','.join(periods), 'fact.ultimus': 'TRUE', # return only the latest occurrence of a specific fact (eg. 2018 revenues) 'fact.has-dimensions': 'FALSE', # generally, 'FALSE' will return face financial data only 'fields': ','.join(fields) } # Execute the query with loop for all results search_endpoint = 'https://api.xbrl.us/api/v1/fact/search' orig_fields = params['fields'] count = 0 query_start = datetime.now() printed = False while True: if not printed: print("On", query_start.strftime("%c"), email, "(client ID:", str(clientid.split('-')[0]), "...) started the query and") printed = True res = requests.get(search_endpoint, params=params, headers={'Authorization' : 'Bearer {}'.format(access_token)}) res_json = res.json() if 'error' in res_json: print('There was an error: {}'.format(res_json['error_description'])) break print("up to", str(offset_value + res_json['paging']['limit']), "records are found so far ...") res_df += res_json['data'] if res_json['paging']['count'] < res_json['paging']['limit']: print(" - this set contained fewer than the", res_json['paging']['limit'], "possible, only", str(res_json['paging']['count']), "records.") break else: offset_value += res_json['paging']['limit'] if 100 == res_json['paging']['limit']: params['fields'] = orig_fields + ',fact.offset({})'.format(offset_value) if offset_value == 10 * res_json['paging']['limit']: break elif 500 == res_json['paging']['limit']: params['fields'] = orig_fields + ',fact.offset({})'.format(offset_value) if offset_value == 4 * res_json['paging']['limit']: break params['fields'] = orig_fields + ',fact.offset({})'.format(offset_value) if not 'error' in res_json: current_datetime = datetime.now().replace(microsecond=0) time_taken = current_datetime - query_start index = pd.DataFrame(res_df).index total_rows = len(index) your_limit = res_json['paging']['limit'] limit_message = "If the results below match the limit noted above, you might not be seeing all rows, and should consider upgrading (https://xbrl.us/access-token).\n" if your_limit == 100: print("\nThis non-Member account has a limit of " , 10 * your_limit, " rows per query from our Public Filings Database. " + limit_message) elif your_limit == 500: print("\nThis Basic Individual Member account has a limit of ", 4 * your_limit, " rows per query from our Public Filings Database. " + limit_message) print("\nAt " + current_datetime.strftime("%c") + ", the query finished with ", str(total_rows), " rows returned in " + str(time_taken) + " for \n" + urllib.parse.unquote(res.url)) df = pd.DataFrame(res_df) # the format truncates the HTML display of numerical values to two decimals; .csv data is unaffected pd.options.display.float_format = '{:,.2f}'.format display(HTML(df.to_html())) # In[ ]: # If you run this program locally, you can save the output to a file # on your computer (modify D:\results.csv to your system) df.to_csv(r"D:\results.csv",sep=",")