Skip to content

Instantly share code, notes, and snippets.

@thecolorblue
Last active August 28, 2018 16:37
Show Gist options
  • Select an option

  • Save thecolorblue/f9c23f8e107f6b03a5b22d168a18e293 to your computer and use it in GitHub Desktop.

Select an option

Save thecolorblue/f9c23f8e107f6b03a5b22d168a18e293 to your computer and use it in GitHub Desktop.
nlp example
from __future__ import unicode_literals, print_function
import spacy
import json
from spacy.matcher import PhraseMatcher
from spacy.tokens import Doc, Span, Token
import gi
gi.require_version('Gst', '1.0')
from gi.repository import GObject, Gst
def getActionAndTarget(root):
if len([m for m in root['modifiers'] if m['arc'] == 'prt']):
action = root['word'] + ' ' + [m for m in root['modifiers'] if m['arc'] == 'prt'][0]['word']
else:
action = root['word']
if len([t for t in root['modifiers'] if t['arc'] == 'dobj']):
target = [t for t in root['modifiers'] if t['arc'] == 'dobj'][0]['word']
elif len([t for t in root['modifiers'] if t['arc'] == 'prt']):
partToken = [t for t in root['modifiers'] if t['arc'] == 'prt'][0]
target = [m for m in partToken['modifiers'] if m['arc'] == 'pobj'][0]['word']
else:
target = ''
if len([t for t in root['modifiers'] if t['POS_coarse'] == 'VERB']):
[getActionAndTarget(t) for t in root['modifiers'] if t['POS_coarse'] == 'VERB']
print('action: ', action)
print('target: ', target)
def translate(text="Turn on the tv", *tech):
nlp = spacy.load('en_core_web_sm')
if not tech:
tech = ["tv", "lights", "speakers"]
component = TechRecognizer(nlp, tech)
nlp.add_pipe(component, last=True)
doc = nlp(text)
tree = doc.print_tree()
print('text: ', text)
for root in tree:
# print(json.dumps(tree, indent=2))
getActionAndTarget(root)
# print('chunks:', [chunk for chunk in doc.noun_chunks])
# print('ents: ', [ent for ent in doc.ents])
# for s in doc.sents:
# print('types:', [t.dep_ for t in s])
# print('action:', s.root)
# print('action head:', s.root.head)
# print('action:', )
# print([t.text for t in s.root.children])
# print([(t.text, t.head.text) for t in s])
# print('root:', s.root)
# print('right:', [t.text for t in s.root.rights])
# print('left:', [t.text for t in s.root.lefts.text])
# print('right edge:', s.root.right_edge)
# print('left edge:', s.root.left_edge)
class TechRecognizer(object):
name = 'tech'
def __init__(self, nlp, companies=tuple(), label='TECH'):
self.label = nlp.vocab.strings[label]
patterns = [nlp(org) for org in companies]
self.matcher = PhraseMatcher(nlp.vocab)
self.matcher.add('TECH', None, *patterns)
Token.set_extension('is_tech', default=False, force=True)
Doc.set_extension('has_tech', getter=self.has_tech, force=True)
Span.set_extension('has_tech', getter=self.has_tech, force=True)
def __call__(self, doc):
matches = self.matcher(doc)
spans = []
for _, start, end in matches:
entity = Span(doc, start, end, label=self.label)
spans.append(entity)
for token in entity:
token._.set('is_tech', True)
doc.ents = list(doc.ents) + [entity]
for span in spans:
span.merge()
return doc
def has_tech(self, tokens):
return any([t._.get('is_tech') for t in tokens])
def bus_message(bus, message):
structure = message.get_structure()
if structure and structure.get_name() == "deepspeech":
text = structure.get_value("text")
translate(unicode(text))
return True
if __name__ == "__main__":
GObject.threads_init()
Gst.init(None)
loop = GObject.MainLoop()
pipeline = Gst.parse_launch("pulsesrc ! audioconvert ! audiorate ! audioresample ! deepspeech silence-threshold=0.3 silence-length=20 ! fakesink")
bus = pipeline.get_bus()
bus.add_signal_watch()
bus.connect ("message", bus_message)
pipeline.set_state(Gst.State.PLAYING)
loop.run()
# main('Sarah, can you please turn on the tv for our meeting')
# main('The lights are a little bright. Can you turn off the lights?')
# main('Mute the call')
# main('Start up the room, turn on the lights, and bake a cake and muffins.')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment