{ "cells": [ { "cell_type": "markdown", "id": "d6f6e3b8", "metadata": {}, "source": [ "## Work Order Processing Demo" ] }, { "cell_type": "markdown", "id": "e8e6cfa2", "metadata": {}, "source": [ "#### Setup path and load DACKAR modules" ] }, { "cell_type": "code", "execution_count": 1, "id": "f65246b3-e6f7-4726-bc90-cd92b5106722", "metadata": {}, "outputs": [], "source": [ "%reload_ext autoreload\n", "%autoreload 2\n", "\n", "# Import libraries\n", "import os, sys\n", "import logging\n", "import warnings\n", "import spacy\n", "from spacy import displacy\n", "warnings.filterwarnings(\"ignore\")\n", "\n", "cwd = os.getcwd()\n", "frameworkDir = os.path.abspath(os.path.join(cwd, os.pardir, 'src'))\n", "sys.path.append(frameworkDir)\n", "\n", "from dackar.workflows.WorkOrderProcessing import WorkOrderProcessing\n", "from dackar.utils.nlp.nlp_utils import generatePatternList\n", "\n", "logging.basicConfig(format='%(asctime)s %(name)-20s %(levelname)-8s %(message)s', datefmt='%d-%b-%y %H:%M:%S', level=logging.INFO)\n", "logging.getLogger().setLevel(logging.ERROR)" ] }, { "cell_type": "markdown", "id": "485dacf2", "metadata": {}, "source": [ "#### Generate entities patterns and process text using WorkOrderProcessing class\n", "\n", "The following information will be identified:\n", "\n", "- Entities \n", "- Alias associated with entities\n", "- Status associated with entities" ] }, { "cell_type": "code", "execution_count": 2, "id": "e31b504e-29da-4e48-a85c-80231436a200", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "28-May-25 09:57:58 dackar.workflows.WorkflowBase INFO Create instance of WorkOrderProcessing\n", "28-May-25 09:58:00 dackar.utils.nlp.nlp_utils INFO Model: core_web_lg, Language: en\n", "28-May-25 09:58:00 dackar.utils.nlp.nlp_utils INFO Available pipelines:pysbdSentenceBoundaries, tok2vec, tagger, parser, attribute_ruler, lemmatizer, mergePhrase, normEntities, initCoref, aliasResolver, anaphorCoref, anaphorEntCoref\n" ] }, { "data": { "text/html": [ "
\n", "\n", " 1-91120-P1\n", " cws_component\n", "\n", ", CLEAN PUMP AND MOTOR. \n", "\n", " 1-91120-PM1\n", " cws_component\n", "\n", " REQUIRES OIL. \n", "\n", " 91120\n", " cws_component\n", "\n", ", CLEAN TRASH SCREEN
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Specify Entities Labels and IDs\n", "entLabel = \"cws_component\" # user defined entity label\n", "entId = \"OPM\"\n", "# Load language model\n", "nlp = spacy.load(\"en_core_web_lg\", exclude=[])\n", "matcher = WorkOrderProcessing(nlp, entID=entId)\n", "\n", "entIDList = ['1-91120-P1', '1-91120-PM1', '91120']\n", "patternsEnts = generatePatternList(entIDList, label=entLabel, id=entId, nlp=nlp, attr=\"LEMMA\")\n", "matcher.addEntityPattern('cws_entity_ruler', patternsEnts)\n", "\n", "text=\"1-91120-P1, CLEAN PUMP AND MOTOR. 1-91120-PM1 REQUIRES OIL. 91120, CLEAN TRASH SCREEN\"\n", "\n", "doc = nlp(text)\n", "displacy.render(doc, style='ent', jupyter=True)" ] }, { "cell_type": "markdown", "id": "fbc7e334", "metadata": {}, "source": [ "#### Processing work order accumulatively" ] }, { "cell_type": "code", "execution_count": 3, "id": "235673ea-9868-42ee-8e48-9f7d46e8c653", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "28-May-25 09:58:00 dackar.workflows.WorkOrderProcessing INFO Start to extract health status\n", "28-May-25 09:58:00 dackar.workflows.WorkOrderProcessing INFO End of health status extraction!\n", "28-May-25 09:58:00 dackar.workflows.WorkOrderProcessing INFO Start to extract causal relation using OPM model information\n", "28-May-25 09:58:00 dackar.workflows.WorkOrderProcessing INFO End of causal relation extraction!\n", "28-May-25 09:58:00 dackar.workflows.WorkOrderProcessing INFO Start to extract health status\n", "28-May-25 09:58:00 dackar.workflows.WorkOrderProcessing INFO End of health status extraction!\n", "28-May-25 09:58:00 dackar.workflows.WorkOrderProcessing INFO Start to extract causal relation using OPM model information\n", "28-May-25 09:58:00 dackar.workflows.WorkOrderProcessing INFO End of causal relation extraction!\n", "28-May-25 09:58:00 dackar.workflows.WorkOrderProcessing INFO Start to extract health status\n", "28-May-25 09:58:00 dackar.workflows.WorkOrderProcessing INFO End of health status extraction!\n", "28-May-25 09:58:00 dackar.workflows.WorkOrderProcessing INFO Start to extract causal relation using OPM model information\n", "28-May-25 09:58:00 dackar.workflows.WorkOrderProcessing INFO End of causal relation extraction!\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
entityaliasentity_textstatusconjecturenegationnegation_text
01-91120-P1unit 1 pumpunit 1 pumpCLEAN PUMP AND MOTORFalseFalse
11-91120-PM1unit 1 pump motorunit 1 pump motorOILFalseFalse
291120pumppumpCLEAN TRASH SCREENFalseFalse
\n", "
" ], "text/plain": [ " entity alias entity_text status \\\n", "0 1-91120-P1 unit 1 pump unit 1 pump CLEAN PUMP AND MOTOR \n", "1 1-91120-PM1 unit 1 pump motor unit 1 pump motor OIL \n", "2 91120 pump pump CLEAN TRASH SCREEN \n", "\n", " conjecture negation negation_text \n", "0 False False \n", "1 False False \n", "2 False False " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "matcher.reset()\n", "sents = list(text.split('.'))\n", "for sent in sents:\n", " matcher(sent)\n", "matcher._entStatus" ] }, { "cell_type": "markdown", "id": "b563795f", "metadata": {}, "source": [ "#### Accessing attributes of entities" ] }, { "cell_type": "code", "execution_count": 4, "id": "a8e7de6c-ce1a-4600-a598-b2124797a7d2", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1-91120-P1 unit 1 pump OPM cws_component\n", "1-91120-PM1 unit 1 pump motor OPM cws_component\n", "91120 pump OPM cws_component\n" ] } ], "source": [ "for ent in doc.ents:\n", " print(ent.text, ent._.alias, ent.ent_id_, ent.label_)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.9" } }, "nbformat": 4, "nbformat_minor": 5 }