import pandas as pd
from textblob import Word
headers = pd.read_csv("header.csv")
headers['Header']
0 GPRPy 1 Simplemost installation 2 Running the software 3 Running automatically generated scripts 4 In case of trouble 5 Uninstalling GPRPy 6 News 7 Scale-recurrent Network for Deep Image Deblur... 8 Our results on real data 9 Results on the testing dataset 10 More cases on real photos from previous papers: 11 Prerequisites 12 Installation 13 Testing 14 Evaluation 15 Training 16 Models 17 How to choose 18 Reference 19 Contact 20 Reference 21 connect to the API 22 download single scene by known product id 23 search by polygon, time, and Hub query keywords 24 download all results from the search 25 GeoJSON FeatureCollection containing footprin... 26 GeoPandas GeoDataFrame with the metadata of t... 27 Get basic information about the product: its ... 28 its download url 29 Get the product's full metadata available on ... ... 831 Local config 832 Example of a json file 833 Example of a js module 834 Custom renderers 835 Known plugins 836 pyGeoPressure --> 837 Features 838 Getting Started 839 Installation 840 Example 841 Pore Pressure Prediction using well log data 842 Documentation 843 Contribute 844 Report Bugs 845 Suggest Enhancements 846 Submit Pull Requests 847 Support 848 License 849 News! 850 Contents 851 Introduction 852 Requirements 853 Quick Start 854 Training 855 Evaluation 856 Prediction 857 Baseline 858 Contributors 859 Citation 860 License Name: Header, Length: 861, dtype: object
citation = [Word("citation").synsets[2], Word("reference").synsets[1], Word("cite").synsets[3]]
run = [Word("run").synsets[9],Word("run").synsets[34],Word("execute").synsets[4]]
install = [Word("installation").synsets[0],Word("install").synsets[0],Word("setup").synsets[1],Word("prepare").synsets[0],Word("preparation").synsets[0],Word("manual").synsets[0],Word("guide").synsets[2],Word("guide").synsets[9]]
download = [Word("download").synsets[0]]
requirement = [Word("requirement").synsets[2],Word("prerequisite").synsets[0],Word("prerequisite").synsets[1],Word("dependency").synsets[0],Word("dependent").synsets[0]]
contact = [Word("contact").synsets[9]]
description = [Word("description").synsets[0],Word("description").synsets[1],Word("introduction").synsets[3],Word("introduction").synsets[6],Word("basics").synsets[0],Word("initiation").synsets[1],Word("start").synsets[0],Word("start").synsets[4],Word("started").synsets[0],Word("started").synsets[1],Word("started").synsets[7],Word("started").synsets[8],Word("overview").synsets[0],Word("summary").synsets[0],Word("summary").synsets[2]]
contributor = [Word("contributor").synsets[0]]
documentation = [Word("documentation").synsets[1]]
license = [Word("license").synsets[3],Word("license").synsets[0]]
usage = [Word("usage").synsets[0],Word("example").synsets[0],Word("example").synsets[5],Word("implement").synsets[1],Word("implementation").synsets[1],Word("demo").synsets[1],Word("tutorial").synsets[0],Word("tutorial").synsets[1]]
update = [Word("updating").synsets[0],Word("updating").synsets[3]]
issues = [Word("issues").synsets[0],Word("errors").synsets[5],Word("problems").synsets[0],Word("problems").synsets[2]]
support = [Word("support").synsets[7],Word("help").synsets[0],Word("help").synsets[9],Word("report").synsets[0],Word("report").synsets[6]]
group = dict()
group.update({"citation":citation})
group.update({"download":download})
group.update({"run":run})
group.update({"installation":install})
group.update({"requirement":requirement})
group.update({"contact":contact})
group.update({"description":description})
group.update({"contributor":contributor})
group.update({"documentation":documentation})
group.update({"license":license})
group.update({"usage":usage})
group.update({"update":update})
group.update({"issues":issues})
group.update({"support":support})
def find_sim(wordlist,wd): #returns the max probability between a word and subgroup
simvalue = []
for sense in wordlist:
if(wd.path_similarity(sense)!=None):
simvalue.append(wd.path_similarity(sense))
if(len(simvalue)!=0):
return max(simvalue)
else:
return 0
def match_group(word_syn,group,threshold):
currmax = 0
maxgroup = ""
simvalues = dict()
for sense in word_syn: #for a given sense of a word
similarities = []
for key, value in group.items(): #value has all the similar words
path_sim = find_sim(value,sense)
# print("Similarity is:",path_sim)
if(path_sim>threshold): #then append to the list
if(path_sim>currmax):
maxgroup = key
currmax = path_sim
return maxgroup
datadf = pd.DataFrame({'Header': [], 'Group': []})
matchedgroups = []
for h in headers["Header"]:
sentence = h.split(" ")[1:]
for s in sentence:
synn = Word(s).synsets
if(len(synn)>0):
bestgroup = match_group(synn,group,0.6)
if(bestgroup!=""):
datadf = datadf.append({'Header' : h, 'Group' : bestgroup}, ignore_index=True)
print(datadf)
datadf.to_csv('header_groups.csv', index=False)
Header Group 0 Simplemost installation installation 1 Running the software run 2 Running automatically generated scripts run 3 In case of trouble issues 4 Prerequisites requirement 5 Installation installation 6 Reference citation 7 Contact contact 8 Reference citation 9 download single scene by known product id download 10 download all results from the search download 11 Get basic information about the product: its ... description 12 its download url download 13 Get the product's full metadata available on ... description 14 Introduction description 15 Prepare training data installation 16 Begin to train description 17 Quick start description 18 Citation citation 19 a record schema. We can get initial values f... description 20 coordinate reference system as the source. Th... citation 21 Get a point on the boundary of the record's description 22 executes ``dst.flush(); dst.close()``. run 23 Requirements requirement 24 Installation installation 25 Linux Setup with virtualenv installation 26 Install TensorFlow installation 27 Windows Setup with python 3 and Anaconda installation 28 if you need to get chumpy description 29 Demo usage .. ... ... 344 License license 345 Introduction description 346 Documentation documentation 347 Issues issues 348 License license 349 Installation installation 350 Documentation documentation 351 License license 352 Get the 1st bending mode shape. Results are ... description 353 to avoid getting the "Factor is exactly singu... description 354 to avoid getting the "Factor is exactly singu... issues 355 Install or Update installation 356 Install or Update update 357 Usage usage 358 Example of a json file usage 359 Example of a js module usage 360 Getting Started description 361 Getting Started description 362 Installation installation 363 Example usage 364 Documentation documentation 365 Report Bugs support 366 Support support 367 License license 368 Introduction description 369 Requirements requirement 370 Quick Start description 371 Contributors contributor 372 Citation citation 373 License license [374 rows x 2 columns]