#!/usr/bin/env python
# coding: utf-8

# ## XBRL US API - ITEP data example  
# 
# ### Authenticate for access token 
# Run the cell below, then type your XBRL US Web account email, account password, Client ID, and secret as noted, pressing the Enter key on the keyboard after each entry.
# 
# XBRL US limits records returned for a query to improve efficiency; this script loops to collect all data from the Public Filings Database for a query. **Non-members might not be able to return all data for a query** - join XBRL US for comprehensive access - https://xbrl.us/join.

# In[ ]:


import os, re, sys, json
import requests
import pandas as pd
from IPython.display import display, HTML
import numpy as np
import getpass
from datetime import datetime
import urllib
from urllib.parse import urlencode


class tokenInfoClass:
    access_token = None
    refresh_token = None
    email = None
    username = None
    client_id = None
    client_secret = None
    url = 'https://api.xbrl.us/oauth2/token'
    headers = {"Content-Type": "application/x-www-form-urlencoded"}
	
def refresh(info):
    refresh_auth = {
                'client_id': info.client_id, 
				'client_secret' : info.client_secret, 
				'grant_type' : 'refresh_token', 
				'platform' : 'ipynb', 
				'refresh_token' : info.refresh_token 
                }
    refreshres = requests.post(info.url, data=refresh_auth, headers=info.headers)
    refresh_json = refreshres.json()
    info.access_token = refresh_json['access_token']
    info.refresh_token = refresh_json['refresh_token']
    print('Your access token(%s) is refreshed for 60 minutes. If it expires again, run this cell to generate a new token and continue to use the query cells below.' % (info.access_token))
    return info	

tokenInfo = tokenInfoClass()

tokenInfo.email = input('Enter your XBRL US Web account email: ')
tokenInfo.password = getpass.getpass(prompt='Password: ')
tokenInfo.client_id = getpass.getpass(prompt='Client ID: ')
tokenInfo.client_secret = getpass.getpass(prompt='Secret: ')

body_auth = {'username' : tokenInfo.email, 
            'client_id': tokenInfo.client_id, 
            'client_secret' : tokenInfo.client_secret, 
            'password' : tokenInfo.password, 
            'grant_type' : 'password', 
            'platform' : 'ipynb' }

#print(body_auth)

payload = urlencode(body_auth)
res = requests.request("POST", tokenInfo.url, data=payload, headers=tokenInfo.headers)
auth_json = res.json()

if 'error' in auth_json:
    print("\n\nThere was a problem generating the access token: %s.  Run the first cell again and enter the credentials." % (auth_json['error_description']))
else:
    tokenInfo.access_token = auth_json['access_token']
    tokenInfo.refresh_token = auth_json['refresh_token']
    print ("\n\nYour access token expires in 60 minutes. After it expires, it should be regenerated automatically.  If not, run the cell rerun the first query cell. \n\nFor now, skip ahead to the section 'Make a Query'.")
	
#print(vars(tokenInfo))
print('\n\naccess token: ' + tokenInfo.access_token + ' refresh token: ' + tokenInfo.refresh_token)


# ### Make a query 
# After the access token confirmation appears above, you can modify the query below and use the **_Cell >> Run_** menu option with the cell **immediately below this text** to run the query for updated results. 
# 
# The sample results are from 10+ years of data for companies in an SIC code for two years, and may take several minutes to recreate.  To test for results quickly, modify the **_params_** to  comment out report.sic-code, uncomment entity.cik and reverse commenting on XBRL_Elements so the search returns a few facts for companies.
#   
# Refer to XBRL API documentation at https://xbrlus.github.io/xbrl-api/#/Facts/getFactDetails for other endpoints and parameters to filter and return. 

# In[ ]:


# Define the parameter variables of the query

endpoint = 'fact'

XBRL_Elements = [
                #  'Assets',
                #  'AssetsCurrent',
                #  'Liabilities',
                #  'LiabilitiesAndStockholdersEquity',
                   'CashCashEquivalentsAndShortTermInvestments',
                   'EffectiveIncomeTaxRateReconciliationAtFederalStatutoryIncomeTaxRate',
                   'EffectiveIncomeTaxRateReconciliationTaxCutsAndJobsActOf2017Percent',
                   'EffectiveIncomeTaxRateReconciliationTaxCutsAndJobsActOf2017TransitionTaxOnAccumulatedForeignEarningsPercent',
                   'EffectiveIncomeTaxRateReconciliationStateAndLocalIncomeTaxes',
                   'EffectiveIncomeTaxRateReconciliationForeignIncomeTaxRateDifferential',
                   'EffectiveIncomeTaxRateReconciliationTaxCredits',
                   'EffectiveIncomeTaxRateReconciliationChangeInEnactedTaxRate',
                   'EffectiveIncomeTaxRateReconciliationChangeInDeferredTaxAssetsValuationAllowance',
                   'EffectiveIncomeTaxRateReconciliationShareBasedCompensationExcessTaxBenefitPercent',
                   'EffectiveIncomeTaxRateReconciliationOtherAdjustments',
                   'EffectiveIncomeTaxRateReconciliationOtherReconcilingItemsPercent',
                   'EffectiveIncomeTaxRateContinuingOperations',
                   'IncomeTaxReconciliationIncomeTaxExpenseBenefitAtFederalStatutoryIncomeTaxRate',
                   'EffectiveIncomeTaxRateReconciliationTaxCutsAndJobsActOf2017Amount',
                   'EffectiveIncomeTaxRateReconciliationTaxCutsAndJobsActOf2017TransitionTaxOnAccumulatedForeignEarningsAmount',
                   'IncomeTaxReconciliationStateAndLocalIncomeTaxes',
                   'IncomeTaxReconciliationForeignIncomeTaxRateDifferential',
                   'IncomeTaxReconciliationTaxCredits',
                   'IncomeTaxReconciliationOtherReconcilingItems'
                ]
# add more SIC codes or years as comma-separated values

sic_code = [2834
           ]

years = [2021,
         2020,
         2019
        ]

periods = ['Y']

companies_cik = ['0000789019', ## Microsoft (MSFT)
                 '0001018724', ## Amazon (AMZN)
                 '0000320193', ## Apple (AAPL)
                 '0000051143'  ## IBM (IBM)
                ]

# Define data fields to return (multi-sort based on order)

fields = [ # this is the list of the characteristics of the data being returned by the query
         'period.fiscal-year.sort(DESC)',
         'entity.name.sort(ASC)',
         'concept.local-name.sort(ASC)',
         'fact.numerical-value',
         'unit',
         'fact.decimals',
         'report.accession',
         'report.filing-date'
         'report.document-type',
         'report.sic-code',
         ]

string_sic = [str(int) for int in sic_code]
string_years = [str(int) for int in years]

# Set unique rows as True of False (True drops any duplicate rows)
unique = True

# Limit the number of rows displayed by the notebook (does not impact the data frame)
rows_to_display = 10 # Set as '' to display all rows in the notebook

# Below is the list of what's being queried using the search endpoint.
 
params = { 
         'concept.local-name': ','.join(XBRL_Elements),
         'report.sic-code': ','.join(string_sic),
        #'entity.cik': ','.join(companies_cik),
         'period.fiscal-year': ','.join(string_years),
         'period.fiscal-period': ','.join(periods),
         'report.document-type': '10-K,10-K/A',
         'fact.ultimus': 'TRUE', # return only the latest reporting for a specific fact 
         # 'fact.has-dimensions': 'FALSE', generally, 'FALSE' will return face financial data only
         'fields': ','.join(fields)
         }


### Execute the query with loop for all results 
### THIS SECTION DOES NOT NEED TO BE EDITED

search_endpoint = 'https://api.xbrl.us/api/v1/' + endpoint + '/search'
if unique:
    search_endpoint += "?unique"
orig_fields = params['fields']
offset_value = 0
res_df = []
count = 0
query_start = datetime.now()
printed = False
run_query = True

while True:
    if not printed:
        print("On", query_start.strftime("%c"), tokenInfo.email, "(client ID:", str(tokenInfo.client_id.split('-')[0]), "...) started the query and")
        printed = True
    retry = 0
    while retry < 3:
        res = requests.get(search_endpoint, params=params, headers={'Authorization' : 'Bearer {}'.format(tokenInfo.access_token)})
        res_json = res.json()
        if 'error' in res_json:
            if res_json['error_description'] == 'Bad or expired token':
                tokenInfo = refresh(tokenInfo)
            else: 
                print('There was an error: {}'.format(res_json['error_description']))
                run_query = False
                break
        else: 
		        break
        retry +=1
        if retry >= 3:
            print("Can't refresh the access token.  Run the first query block, then rerun the query.")
            run_query = False

    if not run_query:
       break

    print("up to", str(offset_value + res_json['paging']['limit']), "records are found so far ...")

    res_df += res_json['data']

    if res_json['paging']['count'] < res_json['paging']['limit']:
        print(" - this set contained fewer than the", res_json['paging']['limit'], "possible, only", str(res_json['paging']['count']), "records.")
        break
    else: 
        offset_value += res_json['paging']['limit'] 
        if 100 == res_json['paging']['limit']:
                params['fields'] = orig_fields + ',' + endpoint + '.offset({})'.format(offset_value)
                if offset_value == 10 * res_json['paging']['limit']:
                        break 
        elif 500 == res_json['paging']['limit']:
                params['fields'] = orig_fields + ',' + endpoint + '.offset({})'.format(offset_value)
                if offset_value == 4 * res_json['paging']['limit']:
                        break 
        params['fields'] = orig_fields + ',' + endpoint + '.offset({})'.format(offset_value)

if not 'error' in res_json:
    current_datetime = datetime.now().replace(microsecond=0)
    time_taken = current_datetime - query_start
    index = pd.DataFrame(res_df).index
    total_rows = len(index)
    your_limit = res_json['paging']['limit']
    limit_message = "If the results below match the limit noted above, you might not be seeing all rows, and should consider upgrading (https://xbrl.us/access-token).\n"
    
    if your_limit == 100:
        print("\nThis non-Member account has a limit of " , 10 * your_limit, " rows per query from our Public Filings Database. " + limit_message)
    elif your_limit == 500:
        print("\nThis Basic Individual Member account has a limit of ", 4 * your_limit, " rows per query from our Public Filings Database. " + limit_message)
    
    print("\nAt " + current_datetime.strftime("%c") +  ", the query finished with  ", str(total_rows), "  rows returned in " + str(time_taken) + " for \n" +  urllib.parse.unquote(res.url))
    
    df = pd.DataFrame(res_df)
    # the format truncates the HTML display of numerical values to two decimals; .csv data is unaffected
    pd.options.display.float_format = '{:,.2f}'.format
    display(HTML(df.to_html(max_rows=rows_to_display)))


# In[ ]:


# If you run this program locally, you can save the output to a file on your computer (modify D:\results.csv to your system)
df.to_csv(r"D:\results.csv",sep=",")