from setup_corpus import build_corpora
corpora = build_corpora()
print(corpora)
Selected Category: description description has 280 samples; installation has 70 samples; invocation has 70 samples; citation has 70 samples; Selected Category: installation description has 200 samples; installation has 800 samples; invocation has 200 samples; citation has 200 samples; Selected Category: invocation description has 279 samples; installation has 279 samples; invocation has 1118 samples; citation has 279 samples; Selected Category: citation description has 77 samples; installation has 77 samples; invocation has 77 samples; citation has 309 samples; {'description': excerpt description 0 Puppeteer is a Node library which provides a h... True 1 The major contributors of this repository incl... True 2 Integral Regression is initially described in ... True 3 We build a 3D pose estimation system based mai... True 4 The Integral Regression is also known as soft-... True 5 This is an official implementation for Integra... True 6 The original implementation is based on our in... True 7 LibGEOS is a LGPL-licensed package for manipul... True 8 Among other things, it allows you to parse Wel... True 9 This repository contains the experiments in th... True 10 For the results presented in the paper, we did... True 11 Batch normalization is currently not supported... True 12 Open-source Ground Penetrating Radar processin... True 13 Pytorch implementation for high-resolution (e.... True 14 The PVGeo Python package contains VTK powered ... True 15 A PyVista (and VTK) interface for the Open Min... True 16 GeoNotebook is an application that provides cl... True 17 Fiona is OGR's neat and nimble API for Python ... True 18 Fiona is designed to be simple and dependable.... True 19 Shapely is a BSD-licensed Python package for m... True 20 Rain streaks can severely degrade the visibili... True 21 The pytorch branch contains: True 22 the pytorch implementation of Peak Response Ma... True 23 the PASCAL-VOC demo (training, inference, and ... True 24 Lithology and stratigraphic logs for wells and... True 25 This Python module allows you to: True 26 Interactively control an instance of ANSYS v14... True 27 Extract data directly from binary ANSYS v14.5+... True 28 Rapidly read in binary result (.rst), binary m... True 29 Official implementation of GANimation. In this... True .. ... ... 460 author = {Xinlei Chen and Li-Jia Li and Li Fei... False 461 journal={arXiv preprint arXiv:1809.06079}, False 462 booktitle = {Proceedings of the IEEE Conferenc... False 463 booktitle = {IEEE Conference on Computer Visio... False 464 @article{yu2018pygeopressure, False 465 Tristan van Leeuwen, TristanvanLeeuwen False 466 year={2018} False 467 @inproceedings{chen18iterative, False 468 Dieter Werthmüller, prisae False 469 } False 470 title = {Two-Stream Convolutional Networks for... False 471 If you find our work useful in your research, ... False 472 booktitle = {International Conference on Machi... False 473 } False 474 volume = {3}, False 475 Citation False 476 author = {Lim, Bee and Son, Sanghyun and Kim, ... False 477 Citation False 478 Title = {{R-FCN}: Object Detection via Region-... False 479 M. Attene. A lightweight approach to repairing... False 480 year={2018} False 481 False 482 @InProceedings{kato2018renderer False 483 year = {2018} False 484 Learning Spatio-Temporal Features with 3D Resi... False 485 author={Chen, Yuhua and Li, Wen and Sakaridis,... False 486 } False 487 Calcagno, P., Chilès, J. P., Courrioux, G., & ... False 488 Year = {2017} False 489 HyVR can be attributed by citing the following... False [490 rows x 2 columns], 'installation': excerpt installation 0 Neural Renderer (this repository) False 1 This repository only contains the core compone... False 2 Additionally, the aim is not to support the fu... False 3 Lithology and stratigraphic logs for wells and... False 4 Faster R-CNN False 5 Basically, he wears a top hat, lives in your c... False 6 A Jupyter / Leaflet bridge enabling interactiv... False 7 mplstereonet provides lower-hemisphere equal-a... False 8 By default, a modified Kamb method with expone... False 9 Detectron is Facebook AI Research's software s... False 10 This work is based on our research paper, whic... False 11 This Python module allows you to: False 12 spatial regression and statistical modeling on... False 13 Rain streaks can severely degrade the visibili... False 14 PySAL, the Python spatial analysis library, is... False 15 Shapely is a BSD-licensed Python package for m... False 16 At FAIR, Detectron has enabled numerous resear... False 17 Import meshes from many common formats (use py... False 18 This is a NodeJS port of pymasker. It provides... False 19 For simplicity, each dot represents one U-Net.... False 20 Export meshes as VTK, STL, OBJ, or PLY file types False 21 Sandbox False 22 This is the code for the paper False 23 Modelling routines: False 24 The Laplacian Pyramid Super-Resolution Network... False 25 fdesign: Design digital linear filters for the... False 26 The goal of Detectron is to provide a high-qua... False 27 Airwave (semi-analytical in the case of step r... False 28 Geographic information systems use GeoTIFF and... False 29 tiles server for live feedback when coding False ... ... ... 1370 Algorithm and Citation Policy False 1371 Title = {Multi{P}ose{N}et: Fast Multi... False 1372 volume = {4}, False 1373 @inproceedings{LapSRN, False 1374 @inproceedings{tesfaldet2018, False 1375 author = {Yiping Chen and Jingkang Wang and Jo... False 1376 If you use this code or pre-trained models, pl... False 1377 Lajaunie, C., Courrioux, G., & Manuel, L. (199... False 1378 } False 1379 and Michael J. Black False 1380 Title = {{R-FCN}: Object Detection via Region-... False 1381 For a more detailed elaboration of the theory ... False 1382 To better understand how the algorithm works, ... False 1383 author={Sun, Xiao and Xiao, Bin and Liang, Shu... False 1384 booktitle = {The IEEE Conference on Computer V... False 1385 } False 1386 @article{zhang2018rdnir, False 1387 title={Integral human pose regression}, False 1388 booktitle={The IEEE Conference on Computer Vis... False 1389 Citation False 1390 } False 1391 } False 1392 year = {2018} False 1393 Citing DaSiamRPN False 1394 title = {Detectron}, False 1395 booktitle = {The IEEE Conference on Computer V... False 1396 year = {2017} False 1397 Learning Spatio-Temporal Features with 3D Resi... False 1398 Xia Li, Jianlong Wu, Zhouchen Lin, Hong Liu, H... False 1399 @inproceedings{wang2018vid2vid, False [1400 rows x 2 columns], 'invocation': excerpt invocation 0 Just so you get an idea, it took NYPL staff co... False 1 This repository contains the experiments in th... False 2 The code is built on EDSR (Torch) and tested o... False 3 Surface contact points: 3D coordinates of poin... False 4 Additionally, the aim is not to support the fu... False 5 Renderer backend for tilelive.js that uses nod... False 6 Resulting tiles conform to the JSON equivalent... False 7 construction of graphs from spatial data False 8 Single-image 3D mesh reconstruction False 9 model - model spatial relationships in data wi... False 10 The original motivation for HyVR was the lack ... False 11 SEG-Y Revisions False 12 gprMax is principally written in Python 3 with... False 13 Among other things, it allows you to parse Wel... False 14 Note this is not a package for reading LiDAR d... False 15 TetGen is a program to generate tetrahedral me... False 16 project loading False 17 PyVista is a helper module for the Visualizati... False 18 Segyio can handle a lot of files that are SEG-... False 19 In this repository, we release demo code and p... False 20 tiles server for live feedback when coding False 21 Complete full-space (electric and magnetic sou... False 22 analytical: interface to the analytical, space... False 23 Linear operators and inverse problems are at t... False 24 Nikos Kolotouros provides PyTorch re-implement... False 25 TetGen provides various features to generate g... False 26 For now, only Carto based projects are support... False 27 Tilematrix supports metatiling and tile buffer... False 28 This Python module is an interface to Hang Si'... False 29 A highly efficient JavaScript library for slic... False ... ... ... 1925 title={Scale-recurrent Network for Deep Image ... False 1926 Huikai Wu, Shuai Zheng, Junge Zhang, Kaiqi Huang False 1927 booktitle={CVPR}, False 1928 @inproceedings{li2018recurrent, False 1929 Presented at CVPR 2018 False 1930 {ethanlee, jlwu1992, zlin, hongliu}@pku.edu.cn... False 1931 booktitle = {IEEE Conferene on Computer Vision... False 1932 Citation False 1933 title = {Two-Stream Convolutional Networks for... False 1934 @inproceedings{tao2018srndeblur, False 1935 References False 1936 Key Laboratory of Machine Perception (MOE), Sc... False 1937 title = {{PyVista}: 3D plotting and mesh analy... False 1938 HyVR can be attributed by citing the following... False 1939 } False 1940 journal = {Journal of Open Source Software} False 1941 Year = {2018} False 1942 } False 1943 Tristan van Leeuwen, TristanvanLeeuwen False 1944 @inproceedings{zhang2018residual, False 1945 journal={arXiv preprint arXiv:1711.08229}, False 1946 title = {Detectron}, False 1947 } False 1948 If you use Detectron in your research or wish ... False 1949 booktitle={BMVC}, False 1950 booktitle={Proceedings of the European Confere... False 1951 author = {Xinlei Chen and Abhinav Gupta}, False 1952 @inproceedings{LapSRN, False 1953 url = {https://doi.org/10.21105/joss.01450}, False 1954 publisher = {The Open Journal}, False [1955 rows x 2 columns], 'citation': excerpt citation 0 model - model spatial relationships in data wi... False 1 Features False 2 A scene graph is a structured representation o... False 3 Renderer backend for tilelive.js that uses nod... False 4 The input is assumed to represent a single clo... False 5 GemPy was designed from the beginning to suppo... False 6 Complete full-space (electric and magnetic sou... False 7 The mapshaper command line program supports es... False 8 Very lite but extendable mapping framework to ... False 9 Learn Once, Write Anywhere: We don't make assu... False 10 graph construction from polygonal lattices, li... False 11 exploratory spatio-temporal data analysis False 12 The file read parameters are based on GSSI's D... False 13 If you give it all of OpenStreetMap and zoom o... False 14 PySAL, the Python spatial analysis library, is... False 15 Resulting tiles conform to the JSON equivalent... False 16 The input scene graph is processed with a grap... False 17 A Jupyter / Leaflet bridge enabling interactiv... False 18 SEG-Y Revisions False 19 This is the implementation of our CVPR 2018 wo... False 20 mplstereonet also includes a number of utiliti... False 21 All traces in a file are assumed to be of the ... False 22 ResNet{50,101,152} False 23 This repository contains the experiments in th... False 24 This is the code for the paper False 25 We build a 3D pose estimation system based mai... False 26 Overview False 27 Flow-Guided Feature Aggregation (FGFA) is init... False 28 The major contributors of this repository incl... False 29 mplleaflet is a Python library that converts a... False .. ... ... 510 booktitle = {Computer Vision and Pattern Recog... True 511 year={2018} True 512 } True 513 Yulun Zhang, Yapeng Tian, Yu Kong, Bineng Zhon... True 514 Yulun Zhang, Yapeng Tian, Yu Kong, Bineng Zhon... True 515 @InProceedings{Lim_2017_CVPR_Workshops, True 516 author = {Lim, Bee and Son, Sanghyun and Kim, ... True 517 title = {Enhanced Deep Residual Networks for S... True 518 booktitle = {The IEEE Conference on Computer V... True 519 month = {July}, True 520 year = {2017} True 521 } True 522 @inproceedings{zhang2018residual, True 523 title={Residual Dense Network for Image Super-... True 524 author={Zhang, Yulun and Tian, Yapeng and Kong... True 525 booktitle={CVPR}, True 526 year={2018} True 527 @article{zhang2018rdnir, True 528 title={Residual Dense Network for Image Restor... True 529 booktitle={arXiv}, True 530 @inproceedings{tang2018quantized, True 531 title={Quantized densely connected U-Nets for ... True 532 author={Tang, Zhiqiang and Peng, Xi and Geng, ... True 533 booktitle={ECCV}, True 534 year={2018} True 535 } True 536 @inproceedings{tang2018cu, True 537 title={CU-Net: Coupled U-Nets}, True 538 author={Tang, Zhiqiang and Peng, Xi and Geng, ... True 539 booktitle={BMVC}, True [540 rows x 2 columns]}
doing something haha
from sklearn.model_selection import cross_val_score, cross_validate, StratifiedKFold
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, roc_curve, auc
pipeline = make_pipeline(TfidfVectorizer(), MultinomialNB())
cv = StratifiedKFold(n_splits = 5, shuffle=True)
for category in corpora:
scores = cross_val_score(pipeline, corpora[category].excerpt, corpora[category][category], cv=cv)
print(f"Category: {category}\nScores: {scores}\nAccuracy: {scores.mean():.4f} (+/- {scores.std()*2:.4f})")
Category: description Scores: [0.79591837 0.75510204 0.78571429 0.75510204 0.74489796] Accuracy: 0.7673 (+/- 0.0396) Category: installation Scores: [0.85714286 0.84285714 0.86785714 0.84285714 0.83928571] Accuracy: 0.8500 (+/- 0.0217) Category: invocation Scores: [0.88010204 0.84693878 0.86189258 0.87179487 0.87692308] Accuracy: 0.8675 (+/- 0.0240) Category: citation Scores: [0.88990826 0.91666667 0.93518519 0.85185185 0.92523364] Accuracy: 0.9038 (+/- 0.0600)
import numpy as np
from scipy import interp
import matplotlib.pyplot as plt
X = corpora['description'].excerpt
y = corpora['description'].description
tprs = []
aucs = []
mean_fpr = np.linspace(0, 1, 100)
i = 0
print('Description ROC')
for train, test in cv.split(X, y):
probas_ = pipeline.fit(X[train], y[train]).predict_proba(X[test])
# Compute ROC curve and area under the curve
fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1])
tprs.append(interp(mean_fpr, fpr, tpr))
tprs[-1][0] = 0.0
roc_auc = auc(fpr, tpr)
aucs.append(roc_auc)
plt.plot(fpr, tpr, lw=1, alpha=0.3, label='ROC fold %d (AUC = %0.2f)' % (i, roc_auc))
i+=1
plt.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r',
label='Chance', alpha=.8)
mean_tpr = np.mean(tprs, axis=0)
mean_tpr[-1] = 1.0
mean_auc = auc(mean_fpr, mean_tpr)
std_auc = np.std(aucs)
plt.plot(mean_fpr, mean_tpr, color='b',
label=r'Mean ROC (AUC = %0.2f $\pm$ %0.2f)' % (mean_auc, std_auc),
lw=2, alpha=.8)
std_tpr = np.std(tprs, axis=0)
tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
plt.fill_between(mean_fpr, tprs_lower, tprs_upper, color='grey', alpha=.2,
label=r'$\pm$ 1 std. dev.')
plt.xlim([-0.05, 1.05])
plt.ylim([-0.05, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve for Description Classification')
plt.legend(loc="lower right")
plt.show()
Description ROC
X = corpora['installation'].excerpt
y = corpora['installation'].installation
tprs = []
aucs = []
mean_fpr = np.linspace(0, 1, 100)
i = 0
print('Installation ROC')
for train, test in cv.split(X, y):
probas_ = pipeline.fit(X[train], y[train]).predict_proba(X[test])
# Compute ROC curve and area under the curve
fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1])
tprs.append(interp(mean_fpr, fpr, tpr))
tprs[-1][0] = 0.0
roc_auc = auc(fpr, tpr)
aucs.append(roc_auc)
plt.plot(fpr, tpr, lw=1, alpha=0.3, label='ROC fold %d (AUC = %0.2f)' % (i, roc_auc))
i+=1
plt.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r',
label='Chance', alpha=.8)
mean_tpr = np.mean(tprs, axis=0)
mean_tpr[-1] = 1.0
mean_auc = auc(mean_fpr, mean_tpr)
std_auc = np.std(aucs)
plt.plot(mean_fpr, mean_tpr, color='b',
label=r'Mean ROC (AUC = %0.2f $\pm$ %0.2f)' % (mean_auc, std_auc),
lw=2, alpha=.8)
std_tpr = np.std(tprs, axis=0)
tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
plt.fill_between(mean_fpr, tprs_lower, tprs_upper, color='grey', alpha=.2,
label=r'$\pm$ 1 std. dev.')
plt.xlim([-0.05, 1.05])
plt.ylim([-0.05, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve for Installation Classification')
plt.legend(loc="lower right")
plt.show()
Installation ROC
X = corpora['invocation'].excerpt
y = corpora['invocation'].invocation
tprs = []
aucs = []
mean_fpr = np.linspace(0, 1, 100)
i = 0
print('Installation ROC')
for train, test in cv.split(X, y):
probas_ = pipeline.fit(X[train], y[train]).predict_proba(X[test])
# Compute ROC curve and area under the curve
fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1])
tprs.append(interp(mean_fpr, fpr, tpr))
tprs[-1][0] = 0.0
roc_auc = auc(fpr, tpr)
aucs.append(roc_auc)
plt.plot(fpr, tpr, lw=1, alpha=0.3, label='ROC fold %d (AUC = %0.2f)' % (i, roc_auc))
i+=1
plt.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r',
label='Chance', alpha=.8)
mean_tpr = np.mean(tprs, axis=0)
mean_tpr[-1] = 1.0
mean_auc = auc(mean_fpr, mean_tpr)
std_auc = np.std(aucs)
plt.plot(mean_fpr, mean_tpr, color='b',
label=r'Mean ROC (AUC = %0.2f $\pm$ %0.2f)' % (mean_auc, std_auc),
lw=2, alpha=.8)
std_tpr = np.std(tprs, axis=0)
tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
plt.fill_between(mean_fpr, tprs_lower, tprs_upper, color='grey', alpha=.2,
label=r'$\pm$ 1 std. dev.')
plt.xlim([-0.05, 1.05])
plt.ylim([-0.05, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve for Invocation Classification')
plt.legend(loc="lower right")
plt.show()
Installation ROC
X = corpora['citation'].excerpt
y = corpora['citation'].citation
tprs = []
aucs = []
mean_fpr = np.linspace(0, 1, 100)
i = 0
print('Citation ROC')
for train, test in cv.split(X, y):
probas_ = pipeline.fit(X[train], y[train]).predict_proba(X[test])
# Compute ROC curve and area under the curve
fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1])
tprs.append(interp(mean_fpr, fpr, tpr))
tprs[-1][0] = 0.0
roc_auc = auc(fpr, tpr)
aucs.append(roc_auc)
plt.plot(fpr, tpr, lw=1, alpha=0.3, label='ROC fold %d (AUC = %0.2f)' % (i, roc_auc))
i+=1
plt.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r',
label='Chance', alpha=.8)
mean_tpr = np.mean(tprs, axis=0)
mean_tpr[-1] = 1.0
mean_auc = auc(mean_fpr, mean_tpr)
std_auc = np.std(aucs)
plt.plot(mean_fpr, mean_tpr, color='b',
label=r'Mean ROC (AUC = %0.2f $\pm$ %0.2f)' % (mean_auc, std_auc),
lw=2, alpha=.8)
std_tpr = np.std(tprs, axis=0)
tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
plt.fill_between(mean_fpr, tprs_lower, tprs_upper, color='grey', alpha=.2,
label=r'$\pm$ 1 std. dev.')
plt.xlim([-0.05, 1.05])
plt.ylim([-0.05, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve for Citation Classification')
plt.legend(loc="lower right")
plt.show()
Citation ROC