Emergent Activity Analysis DemoΒΆ

  • Set up path and load modules

[ ]:
import pandas as pd
import os, sys
import spacy
from spacy import displacy

cwd = os.getcwd()
sys.path.append(os.path.join(cwd, os.pardir, 'src'))

# Load DACKAR module
from dackar.utils.tagKeywordListReader import entityLibrary
from dackar.utils.nlp.nlp_utils import generatePatternList, resetPipeline
from dackar.pipelines.GeneralEntity import GeneralEntity

# Load language model
nlp = spacy.load("en_core_web_lg", exclude=[])

  • Load internal entity library about nuclear

[ ]:
fileName = '../data/tag_keywords_lists.xlsx'
lib = entityLibrary(fileName)
library = lib.getLibrary()
  • Generate pattern list that can be used by NLP NER module

[ ]:
patterns = []
for lib in library.keys():
  patternsEnts = generatePatternList(library[lib], label=lib, id='entity', nlp=nlp, attr="LEMMA")
  patterns = patterns + patternsEnts
  • Example for nuclear NER

[ ]:
resetPipeline(nlp, [])

rawText = "CCW P1 shut down due to water in upper bearing (per SRE - WR # 921999), CD1E isolated and 45000-V43 closed, installed TCR # 23501 to raise vibration alarm setpoints"
doc = nlp(rawText)
generalEntity = GeneralEntity(nlp, patterns)
processedDoc = generalEntity(doc)

if len(processedDoc.ents) > 0:
    displacy.render(processedDoc, style='ent', jupyter=True)
    displacy.render(processedDoc, style='dep', jupyter=True)
for ent in processedDoc.ents:
    print("Entity: ", ent.text, "Alias: ", ent._.alias)