Noun phrases with spacy

后端 未结 3 1272
予麋鹿
予麋鹿 2020-12-07 21:22

How can I extract noun phrases from text using spacy?
I am not referring to part of speech tags. In the documentation I cannot find anything about noun phrases or regul

3条回答
  •  情书的邮戳
    2020-12-07 21:39

    import spacy
    nlp = spacy.load("en_core_web_sm")
    doc =nlp('Bananas are an excellent source of potassium.')
    for np in doc.noun_chunks:
        print(np.text)
    '''
      Bananas
      an excellent source
      potassium
    '''
    
    for word in doc:
        print('word.dep:', word.dep, ' | ', 'word.dep_:', word.dep_)
    '''
      word.dep: 429  |  word.dep_: nsubj
      word.dep: 8206900633647566924  |  word.dep_: ROOT
      word.dep: 415  |  word.dep_: det
      word.dep: 402  |  word.dep_: amod
      word.dep: 404  |  word.dep_: attr
      word.dep: 443  |  word.dep_: prep
      word.dep: 439  |  word.dep_: pobj
      word.dep: 445  |  word.dep_: punct
    '''
    
    from spacy.symbols import *
    np_labels = set([nsubj, nsubjpass, dobj, iobj, pobj])
    print('np_labels:', np_labels)
    '''
      np_labels: {416, 422, 429, 430, 439}
    '''
    

    https://www.geeksforgeeks.org/use-yield-keyword-instead-return-keyword-python/

    def iter_nps(doc):
        for word in doc:
            if word.dep in np_labels:
                yield(word.dep_)
    
    iter_nps(doc)
    '''
      
    '''
    
    ## Modified method:
    def iter_nps(doc):
        for word in doc:
            if word.dep in np_labels:
                print(word.text, word.dep_)
    
    iter_nps(doc)
    '''
      Bananas nsubj
      potassium pobj
    '''
    
    doc = nlp('BRCA1 is a tumor suppressor protein that functions to maintain genomic stability.')
    for np in doc.noun_chunks:
        print(np.text)
    '''
      BRCA1
      a tumor suppressor protein
      genomic stability
    '''
    
    iter_nps(doc)
    '''
      BRCA1 nsubj
      that nsubj
      stability dobj
    '''
    

提交回复
热议问题