{ "cells": [ { "cell_type": "markdown", "id": "bd41e798", "metadata": {}, "source": [ "## Emergent Activity Analysis Demo" ] }, { "cell_type": "markdown", "id": "40e5fe7a", "metadata": {}, "source": [ "- Set up path and load modules " ] }, { "cell_type": "code", "execution_count": 1, "id": "2891ade9-9dc5-4f4e-9b04-76610a68def7", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import os, sys\n", "import spacy\n", "from spacy import displacy\n", "\n", "cwd = os.getcwd()\n", "sys.path.append(os.path.join(cwd, os.pardir, 'src'))\n", "\n", "# Load DACKAR module\n", "from dackar.utils.tagKeywordListReader import entityLibrary\n", "from dackar.utils.nlp.nlp_utils import generatePatternList, resetPipeline\n", "from dackar.pipelines.GeneralEntity import GeneralEntity\n", "\n", "# Load language model\n", "nlp = spacy.load(\"en_core_web_lg\", exclude=[])\n" ] }, { "cell_type": "markdown", "id": "f4fbe46a", "metadata": {}, "source": [ "- Load internal entity library about nuclear" ] }, { "cell_type": "code", "execution_count": 2, "id": "0aa6cbe2", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of listed keywords: 16\n" ] } ], "source": [ "fileName = '../data/tag_keywords_lists.xlsx'\n", "lib = entityLibrary(fileName)\n", "library = lib.getLibrary()" ] }, { "cell_type": "markdown", "id": "9ff4faf5", "metadata": {}, "source": [ "- Generate pattern list that can be used by NLP NER module" ] }, { "cell_type": "code", "execution_count": 3, "id": "6c267ccb", "metadata": {}, "outputs": [], "source": [ "patterns = []\n", "for lib in library.keys():\n", " patternsEnts = generatePatternList(library[lib], label=lib, id='entity', nlp=nlp, attr=\"LEMMA\")\n", " patterns = patterns + patternsEnts" ] }, { "cell_type": "markdown", "id": "6800b46b", "metadata": {}, "source": [ "- Example for nuclear NER " ] }, { "cell_type": "code", "execution_count": 4, "id": "a8087128", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
CCW P1 \n", "\n", " shut down\n", " fail_type_n\n", "\n", " due to \n", "\n", " water\n", " opd_hyd_pne\n", "\n", " in upper \n", "\n", " bearing\n", " comp_mech_rot\n", "\n", " (per \n", "\n", " SRE - WR\n", " ORG\n", "\n", " # \n", "\n", " 921999\n", " MONEY\n", "\n", "), CD1E \n", "\n", " isolated\n", " mnt_ops\n", "\n", " and \n", "\n", " 45000-V43\n", " MONEY\n", "\n", " closed, \n", "\n", " installed\n", " mnt_ops\n", "\n", " TCR # \n", "\n", " 23501\n", " MONEY\n", "\n", " to raise \n", "\n", " vibration\n", " deg_mech\n", "\n", " \n", "\n", " alarm\n", " ast_I&C\n", "\n", " setpoints
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n", " CCW\n", " PROPN\n", "\n", "\n", "\n", " P1\n", " PROPN\n", "\n", "\n", "\n", " shut\n", " VERB\n", "\n", "\n", "\n", " down\n", " ADP\n", "\n", "\n", "\n", " due\n", " ADP\n", "\n", "\n", "\n", " to\n", " ADP\n", "\n", "\n", "\n", " water\n", " NOUN\n", "\n", "\n", "\n", " in\n", " ADP\n", "\n", "\n", "\n", " upper\n", " ADJ\n", "\n", "\n", "\n", " bearing (\n", " NOUN\n", "\n", "\n", "\n", " per\n", " ADP\n", "\n", "\n", "\n", " SRE -\n", " PROPN\n", "\n", "\n", "\n", " WR #\n", " NOUN\n", "\n", "\n", "\n", " 921999),\n", " NUM\n", "\n", "\n", "\n", " CD1E\n", " NOUN\n", "\n", "\n", "\n", " isolated\n", " VERB\n", "\n", "\n", "\n", " and\n", " CCONJ\n", "\n", "\n", "\n", " 45000-\n", " NUM\n", "\n", "\n", "\n", " V43\n", " PROPN\n", "\n", "\n", "\n", " closed,\n", " VERB\n", "\n", "\n", "\n", " installed\n", " VERB\n", "\n", "\n", "\n", " TCR #\n", " NOUN\n", "\n", "\n", "\n", " 23501\n", " NUM\n", "\n", "\n", "\n", " to\n", " PART\n", "\n", "\n", "\n", " raise\n", " VERB\n", "\n", "\n", "\n", " vibration\n", " NOUN\n", "\n", "\n", "\n", " alarm\n", " NOUN\n", "\n", "\n", "\n", " setpoints\n", " NOUN\n", "\n", "\n", "\n", " \n", " \n", " compound\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " nsubj\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " prt\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " prep\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " pcomp\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " pobj\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " prep\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " amod\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " pobj\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " prep\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " nmod\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " pobj\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " nummod\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " nsubj\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " advcl\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " cc\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " nummod\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " nsubj\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " conj\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " conj\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " dobj\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " nummod\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " aux\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " advcl\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " compound\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " compound\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " dobj\n", " \n", " \n", "\n", "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Entity: shut down Alias: None\n", "Entity: water Alias: None\n", "Entity: bearing Alias: None\n", "Entity: SRE - WR Alias: None\n", "Entity: 921999 Alias: None\n", "Entity: isolated Alias: None\n", "Entity: 45000-V43 Alias: None\n", "Entity: installed Alias: None\n", "Entity: 23501 Alias: None\n", "Entity: vibration Alias: None\n", "Entity: alarm Alias: None\n" ] } ], "source": [ "resetPipeline(nlp, [])\n", "\n", "rawText = \"CCW P1 shut down due to water in upper bearing (per SRE - WR # 921999), CD1E isolated and 45000-V43 closed, installed TCR # 23501 to raise vibration alarm setpoints\"\n", "doc = nlp(rawText)\n", "generalEntity = GeneralEntity(nlp, patterns)\n", "processedDoc = generalEntity(doc)\n", "\n", "if len(processedDoc.ents) > 0:\n", " displacy.render(processedDoc, style='ent', jupyter=True)\n", " displacy.render(processedDoc, style='dep', jupyter=True)\n", "for ent in processedDoc.ents:\n", " print(\"Entity: \", ent.text, \"Alias: \", ent._.alias)" ] } ], "metadata": { "hide_input": false, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.9" } }, "nbformat": 4, "nbformat_minor": 5 }