#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().run_cell_magic('html', '', '\n
\n\n') # In[2]: get_ipython().run_cell_magic('capture', '', '%load_ext autoreload\n%autoreload 2\nimport sys\nsys.path.append("..")\nfrom statnlpbook.util import execute_notebook\nimport statnlpbook.parsing as parsing\nfrom statnlpbook.transition import *\nfrom statnlpbook.dep import *\nimport pandas as pd\nfrom io import StringIO\nfrom IPython.display import display, HTML\n\nexecute_notebook(\'transition-based_dependency_parsing.ipynb\')\n') # # $$ # \newcommand{\Xs}{\mathcal{X}} # \newcommand{\Ys}{\mathcal{Y}} # \newcommand{\y}{\mathbf{y}} # \newcommand{\balpha}{\boldsymbol{\alpha}} # \newcommand{\bbeta}{\boldsymbol{\beta}} # \newcommand{\aligns}{\mathbf{a}} # \newcommand{\align}{a} # \newcommand{\source}{\mathbf{s}} # \newcommand{\target}{\mathbf{t}} # \newcommand{\ssource}{s} # \newcommand{\starget}{t} # \newcommand{\repr}{\mathbf{f}} # \newcommand{\repry}{\mathbf{g}} # \newcommand{\x}{\mathbf{x}} # \newcommand{\prob}{p} # \newcommand{\a}{\alpha} # \newcommand{\b}{\beta} # \newcommand{\vocab}{V} # \newcommand{\params}{\boldsymbol{\theta}} # \newcommand{\param}{\theta} # \DeclareMathOperator{\perplexity}{PP} # \DeclareMathOperator{\argmax}{argmax} # \DeclareMathOperator{\argmin}{argmin} # \newcommand{\train}{\mathcal{D}} # \newcommand{\counts}[2]{\#_{#1}(#2) } # \newcommand{\length}[1]{\text{length}(#1) } # \newcommand{\indi}{\mathbb{I}} # $$ # In[3]: get_ipython().run_line_magic('load_ext', 'tikzmagic') # # Parsing # + Syntactic constituency # + Syntactic dependencies # + Parsing algorithms # + Evaluation # # Syntactic constituency # ## Reminder: parts of speech (POS) # # [Parts of speech](sequence_labeling_slides.ipynb) categorise the syntactic function of words. # # [Penn Treebank POS tagset](https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html): # # Tag || Example # :--- | :--- | :--- # CC | Coordinating conjunction | *and* # CD | Cardinal number | *1* # DT | Determiner | *the* # EX | Existential there | *there* # FW | Foreign word | *שלום* # IN | Preposition or subordinating conjunction | *in* # JJ | Adjective | *high* # JJR | Adjective, comparative | *higher* # JJS | Adjective, superlative | *highest* # LS | List item marker | *,* # MD | Modal | *can* # NN | Noun, singular or mass | *desk* # NNS | Noun, plural | *desks* # NNP | Proper noun, singular | *Denmark* # NNPS | Proper noun, plural | *Danes* # PDT | Predeterminer | *both* # POS | Possessive ending | *'s* # PRP | Personal pronoun | *you* # PRP$ | Possessive pronoun | *your* # RB | Adverb | *well* # RBR | Adverb, comparative | *better* # RBS | Adverb, superlative | *best* # RP | Particle | # SYM | Symbol | # TO | to | # UH | Interjection | # VB | Verb, base form | *see* # VBD | Verb, past tense | *saw* # VBG | Verb, gerund or present participle | *seeing* # VBN | Verb, past participle | *seen* # VBP | Verb, non-3rd person singular present | *see* # VBZ | Verb, 3rd person singular present | *sees* # WDT | Wh-determiner | # WP | Wh-pronoun | # WP\$ | Possessive wh-pronoun | # WRB | Wh-adverb | # ## Syntactic constituents # # **Phrases** also have a grammatical function when they are syntactic constituents. # # [Penn Treebank constituent tagset](https://www.ldc.upenn.edu/sites/www.ldc.upenn.edu/files/penn-etb-2-style-guidelines.pdf): # # Phrase Level || Example # :--- | :--- | :--- # ADJP | Adjective Phrase | *really high* # ADVP | Adverb Phrase | *very well* # CONJP | Conjunction Phrase | *as well as* # FRAG | Fragment | # INTJ | Interjection | # LST | List marker | # NP | Noun Phrase | *high desk* # PP | Prepositional Phrase | *at home* # PRN | Parenthetical | # PRT | Particle. Category for words that should be tagged RP | # QP | Quantifier Phrase (i.e. complex measure/amount phrase); used within NP | # RRC | Reduced Relative Clause | # VP | Verb Phrase | *see the desk* # WHADJP | Wh-adjective Phrase. Adjectival phrase containing a wh-adverb | *how hot* # WHAVP | Wh-adverb Phrase, containing a wh-adverb | *how well* # WHNP | Wh-noun Phrase, containing some wh-word | *which book* # WHPP | Wh-prepositional Phrase, containing a wh-noun phrase | *of which* # X | Unknown, uncertain, or unbracketable. | # Clause Level || # :--- | :--- # S | simple declarative clause, i.e. one that is not introduced by a (possible empty) subordinating conjunction or a wh-word and that does not exhibit subject-verb inversion. # SBAR | Clause introduced by a (possibly empty) subordinating conjunction. # SBARQ | Direct question introduced by a wh-word or a wh-phrase. Indirect questions and relative clauses should be bracketed as SBAR, not SBARQ. # SINV | Inverted declarative sentence, i.e. one in which the subject follows the tensed verb or modal. # SQ | Inverted yes/no question, or main clause of a wh-question, following the wh-phrase in SBARQ. # ## Trees # # A **tree** is a connected acyclic undirected graph. # # Graphs consist of **nodes** and **edges** between them. # #![]() |
# ![]() |
#