import json
import requests
baseURL = "http://corpus-db.org"
def getMeta(subject):
metaResponse = requests.get(baseURL+"/api/subject/"+subject)
meta = json.loads(metaResponse.text)
return meta
detectiveMeta = getMeta('Detective and mystery stories')
englishDetectives = [item for item in detectiveMeta if item['languages'] == "['en']"]
len(englishDetectives)
586
for item in englishDetectives[:10]:
print(item['author'], item['title'], item['id'])
Doyle, Arthur Conan The Return of Sherlock Holmes 108.0 Collins, Wilkie The Haunted Hotel: A Mystery of Modern Venice 170.0 Rohmer, Sax The Insidious Dr. Fu Manchu 173.0 Chesterton, G. K. (Gilbert Keith) The Innocence of Father Brown 204.0 Doyle, Arthur Conan The Return of Sherlock Holmes 221.0 Chesterton, G. K. (Gilbert Keith) The Wisdom of Father Brown 223.0 Doyle, Arthur Conan A Study in Scarlet 244.0 Gaboriau, Emile The Count's Millions 305.0 Rinehart, Mary Roberts Where There's a Will 330.0 Michelson, Miriam In the Bishop's Carriage 481.0
def getText(idStr):
response = requests.get(baseURL+'/api/id/'+idStr+'/fulltext')
text = json.loads(response.text)[0]['text']
return text
tenEnglishDetectives = englishDetectives[:10]
for item in tenEnglishDetectives:
text = getText(item['id'])
item['text'] = text
tenEnglishDetectives[0]['text'][:200]
'\n\n\n\n\n\n\n\nTHE RETURN OF SHERLOCK HOLMES,\n\nA Collection of Holmes Adventures\n\n\nby Sir Arthur Conan Doyle\n\n\n\n\nCONTENTS:\n\n The Adventure Of The Empty House\n\n The Adventure Of The Norwood Builder\n\n '
type(tenEnglishDetectives)
list
open('detectives.json', 'w').write(json.dumps(tenEnglishDetectives))
4605130