!pip install jieba
!pip install refo
!pip install rdflib
from refo import finditer, Predicate, Star, Any
import re
import rdflib
import jieba
import jieba.posseg as pseg
Knowledge
URI_PREFIX = 'http://kgdemo.com/'
triples = [
('宝马', 'is_what', '宝马(BMW)是德国豪华汽车品牌'),
('宝马', 'is_how', '德系大品牌值得信赖,各方面口碑都很好'),
('宝马', 'is_compared', '各有千秋,但是人生苦短,我选宝马'),
('捷豹', 'is_what', '捷豹(Jaguar)英国豪华汽车品牌,英国皇室御用品牌,1935年诞生'),
('宾利', 'is_how', '举世闻名的豪华汽车制造品牌,非常昂贵哦'),
('帕加尼', 'is_what', '帕加尼(Pagani)是一家位于意大利摩德纳的超级跑车制造商,该车厂坚持手工打造车辆,其汽车产量非常少,价格也十分昂贵'),
('广汽本田', 'is_what', '广汽本田汽车有限公司(原广州本田汽车有限公司;简称广汽本田)于1998年7月1日成立,它是由广州汽车集团公司与日本本田技研工业株式会社共同出资组建的合资公司,双方各占50%股份,合作年限为30年'),
('北京奔驰', 'is_how', '大品牌值得信赖,我经常在宝马的后视镜里看到它'),
]
graph = rdflib.Graph()
resources = set([r for triple in triples for r in triple])
resource2uri = {r: URI_PREFIX + r for r in resources}
uri2resource = {uri: r for r, uri in resource2uri.items()}
for (s, p, o) in triples:
s_uri = rdflib.URIRef(resource2uri[s])
p_uri = rdflib.URIRef(resource2uri[p])
o_uri = rdflib.URIRef(resource2uri[o])
graph.add((s_uri, p_uri, o_uri))
jieba.add_word('广汽本田')
jieba.add_word('北京奔驰')
Rule
class W(Predicate):
def __init__(self, token='.*', pos='.*'):
self.token = re.compile(token + '$')
self.pos = re.compile(pos + '$')
super(W, self).__init__(self.match)
def match(self, word):
m1 = self.token.match(word.token)
m2 = self.pos.match(word.pos)
return m1 and m2
class Rule(object):
def __init__(self, condition=None, action=None):
self.condition = condition
self.action = action
def apply(self, sentence):
matches = []
for m in finditer(self.condition, sentence):
i, j = m.span()
matches.extend(sentence[i:j])
return self.action(matches)
class Word(object):
def __init__(self, token, pos):
self.token = token
self.pos = pos
def what_is_xxx(matches):
if len(matches) > 0:
print("Query:", " ".join([word.token+'|'+word.pos for word in matches]))
for word in matches:
if word.pos == 'nr' or word.pos == 'nz' or word.pos == 'ns' or word.pos == 'x':
return (
"""
PREFIX : <%s>
SELECT DISTINCT ?o WHERE {
:%s :is_what ?o .
}
""" % (URI_PREFIX, word.token)
)
def how_is_xxx(matches):
if len(matches) > 0:
print("Query:", " ".join([word.token+'|'+word.pos for word in matches]))
for word in matches:
if word.pos == 'nr' or word.pos == 'nz' or word.pos == 'ns' or word.pos == 'x':
return (
"""
PREFIX : <%s>
SELECT DISTINCT ?o WHERE {
:%s :is_how ?o .
}
""" % (URI_PREFIX, word.token)
)
def xxx_compared_to(matches):
if len(matches) > 0:
print("Query:", " ".join([word.token+'|'+word.pos for word in matches]))
for word in matches:
if word.pos == 'nr' or word.pos == 'v':
return (
"""
PREFIX : <%s>
SELECT DISTINCT ?o WHERE {
:%s :is_compared ?o .
}
""" % (URI_PREFIX, word.token)
)
noun = (W(pos='nr') | W(pos='nz') | W(pos='ns'))
rules = [
Rule(condition = (noun | W(pos='v')) + W('和') + (
noun | W(pos='v')) + Star(W('比')) + (
W('怎么样') | W('怎样') | (W('哪个') + (W('好') | W('更好') ) )),
action = xxx_compared_to),
Rule(condition = (noun | W(pos='x')) + Star(Any(), greedy=False) + \
(W('怎么样') | W('怎样') | W('如何') | (W('好用') + W('吗')) ),
action = how_is_xxx),
Rule(condition = W(pos='r') + W('是') + (noun | W(pos='x')) | \
(noun | W(pos='x')) + W('是') + W(pos='r') | \
(W('讲解') | W('介绍') | W('了解')) + Star(Any(), greedy=False) + (noun | W(pos='x')),
action = what_is_xxx),
]
for utt in ['宝马是什么',
'我想了解一下宝马',
'给我介绍一下宝马',
'给我讲解一下捷豹这个汽车品牌',
'给我介绍一下帕加尼',
'我想了解一下广汽本田',
'宝马这个牌子的汽车怎么样',
'宾利这个牌子的汽车怎么样',
'北京奔驰怎么样',
'宝马如何呢',
'宝马汽车好用吗',
'宝马和奔驰比怎么样',
'宝马和奔驰比哪个好',
'宝马和奔驰比哪个更好',]:
is_matched = False
for rule in rules:
db_query = rule.apply([Word(word, tag) for word, tag in pseg.cut(utt)])
if db_query:
for row in graph.query(db_query):
print(db_query)
print('Output:', uri2resource[row.o.toPython()])
print()
print()
is_matched = True
break
if not is_matched:
print('Not Matched:', [(word, tag) for word, tag in pseg.cut(utt)])