get_XXX_url
¶@staticmethod
def get_soup_url(url):
return url
@staticmethod
def get_pdf_url(url):
return url
@staticmethod
def get_tex_url(url):
return url
import sys
sys.path.append("../tests/")
import inspect
from data import JournalData
from gummy import journals
from gummy.journals import *
[success] local driver can be built. [failure] remote driver can't be built. DRIVER_TYPE: local
journal = input().lower()
Genetics
crawler = journals.get(journal)
print(inspect.getsource(crawler.get_soup_url))
print(inspect.getsource(crawler.get_pdf_url))
print(inspect.getsource(crawler.get_tex_url))
@staticmethod def get_soup_url(url): return url @staticmethod def get_pdf_url(url): return url @staticmethod def get_tex_url(url): return url
url = JournalData.get(journal)
print(url)
https://www.genetics.org/content/176/4/2177
urls = [
"https://www.genetics.org/content/176/4/2177",
"https://www.genetics.org/content/genetics/176/4/2177.full.pdf",
"https://www.genetics.org/content/176/4/2177.full.pdf",
]
get_soup_url
¶for url in urls:
print(url.rstrip(".full.pdf").replace("/content/genetics/", "/content/"))
https://www.genetics.org/content/176/4/2177 https://www.genetics.org/content/176/4/2177 https://www.genetics.org/content/176/4/2177
get_pdf_url
¶for url in urls:
print(url.rstrip(".full.pdf").replace("/content/genetics/", "/content/").replace("/content/", "/content/genetics/")+".full.pdf")
https://www.genetics.org/content/genetics/176/4/2177.full.pdf https://www.genetics.org/content/genetics/176/4/2177.full.pdf https://www.genetics.org/content/genetics/176/4/2177.full.pdf
get_tex_url
¶
print(inspect.getsource(crawler.get_soup_url))
print(inspect.getsource(crawler.get_pdf_url))
print(inspect.getsource(crawler.get_tex_url))
@staticmethod def get_soup_url(url): return url.rstrip(".full.pdf").replace("/content/genetics/", "/content/") @staticmethod def get_pdf_url(url): return GeneticsCrawler.get_soup_url(url).replace("/content/", "/content/genetics/")+".full.pdf" @staticmethod def get_tex_url(url): return url