Skip to content

Commit 44c7ef4

Browse files
authored
#309 Add parsers for provider ATT (#313)
* Add dateutil and pandas[optional] as dependencies. * Add openpyxl as optional dependency - required for open format spreadhseets in pandas. * Add generic Xlsx Parser. * Add parsers for ATT service provider. * Skip 'Xlsx' tests if optional packages are not installed. * Resolve issue with naive datetime. * Remove trailing space requirement in regex. * Update test emails. * Add end to end tests for provider ATT. * Catch exceptions in tests caused by missing, optional imports. * Add coverage as a developer dependency. * Specify exception as ProviderError rather than generic Exception. * Raise 'ParserError' if fail to match record. * Check for circuit ID key name when normalizing circuit IDs. * Combine lines as requested. * Raise 'ParserError' if spreadsheet is empty. * Use poetry-install-options '--all-extras' to install optional packages in CI.
1 parent 1bd1ffe commit 44c7ef4

23 files changed

+1112
-8
lines changed

.github/workflows/ci.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
---
22
name: "CI"
3-
concurrency: # Cancel any existing runs of this workflow for this same PR
3+
concurrency: # Cancel any existing runs of this workflow for this same PR
44
group: "${{ github.workflow }}-${{ github.ref }}"
55
cancel-in-progress: true
6-
on: # yamllint disable
6+
on: # yamllint disable
77
push:
88
branches:
99
- "main"
@@ -129,7 +129,7 @@ jobs:
129129
uses: "networktocode/gh-action-setup-poetry-environment@v6"
130130
with:
131131
python-version: "${{ matrix.python-version }}"
132-
poetry-install-options: "--with dev"
132+
poetry-install-options: "--all-extras"
133133
- name: "Run poetry Install"
134134
run: "poetry install"
135135
- name: "Run poetry Install"

README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ By default, there is a `GenericProvider` that supports a `SimpleProcessor` using
6969
#### Supported providers based on other parsers
7070

7171
- Apple
72+
- ATT
7273
- AWS
7374
- AquaComms
7475
- BSO
@@ -134,6 +135,12 @@ The library is available as a Python package in pypi and can be installed with p
134135

135136
`pip install circuit-maintenance-parser[openai]`
136137

138+
#### Xlsx Spreadsheets
139+
140+
Some providers may attach a spreadsheet in their circuit maintenance notifications. Support for this is provided by installing the optional xlsx package.
141+
142+
`pip install circuit-maintenance-parser[xlsx]`
143+
137144
## How to use it?
138145

139146
The library requires two things:

circuit_maintenance_parser/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
AquaComms,
1515
Apple,
1616
Arelion,
17+
ATT,
1718
Cogent,
1819
Colt,
1920
CrownCastle,
@@ -44,6 +45,7 @@
4445
Apple,
4546
AquaComms,
4647
Arelion,
48+
ATT,
4749
AWS,
4850
BSO,
4951
Cogent,

circuit_maintenance_parser/parser.py

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
"""Definition of Mainentance Notification base classes."""
2+
3+
import io
24
import logging
35
import os
46
import base64
@@ -12,10 +14,17 @@
1214

1315
import bs4 # type: ignore
1416
from bs4.element import ResultSet # type: ignore
15-
1617
from pydantic import BaseModel, PrivateAttr
1718
from icalendar import Calendar # type: ignore
1819

20+
try:
21+
from pandas import read_excel
22+
23+
READ_EXCEL_PRESENT = True
24+
except ImportError:
25+
READ_EXCEL_PRESENT = False
26+
27+
1928
from circuit_maintenance_parser.errors import ParserError
2029
from circuit_maintenance_parser.output import Status, Impact, CircuitImpact
2130
from circuit_maintenance_parser.constants import EMAIL_HEADER_SUBJECT, EMAIL_HEADER_DATE
@@ -497,3 +506,35 @@ def parse_content(self, content):
497506
)
498507

499508
return [data]
509+
510+
511+
class Xlsx(Parser):
512+
"""Xlsx parser."""
513+
514+
_data_types = PrivateAttr(
515+
[
516+
"application/octet-stream",
517+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
518+
]
519+
)
520+
521+
def parser_hook(self, raw: bytes, content_type: str):
522+
"""Execute parsing."""
523+
if not READ_EXCEL_PRESENT:
524+
raise RuntimeError(
525+
"Missing import 'pandas' required to read xlsx files. Install main package with option '[xlsx]'"
526+
)
527+
528+
file_obj = io.BytesIO(raw)
529+
xls = read_excel(file_obj)
530+
xls = xls.drop_duplicates()
531+
records = xls.to_dict(orient="records")
532+
if not records:
533+
raise ParserError("No rows found in attached spreadsheet.")
534+
results = list(self.parse_xlsx(records))
535+
return results
536+
537+
@staticmethod
538+
def parse_xlsx(records: List[Dict]) -> List[Dict]:
539+
"""Provide placeholder method."""
540+
raise NotImplementedError
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
"""ATT Parser."""
2+
3+
import logging
4+
import re
5+
import string
6+
from typing import Dict, List
7+
8+
import dateutil
9+
from bs4.element import ResultSet # type: ignore
10+
from circuit_maintenance_parser.errors import ParserError
11+
from circuit_maintenance_parser.parser import CircuitImpact, Html, Impact, Status, Xlsx
12+
13+
logger = logging.getLogger(__name__)
14+
15+
RE_EVENT = re.compile(
16+
r"Event ID: (.*)[ \n]"
17+
r"Customer Impact Description: (.*)[ \n]"
18+
r"Summary: (.*)[ \n]"
19+
r"Description: (.*)[ \n]"
20+
r"Business Risk: (.*)"
21+
)
22+
RE_MAINTENANCE_WINDOW_GMT = re.compile(r"Start Time: (.* GMT).*End Time: (.* GMT)")
23+
RE_MAINTENANCE_WINDOW_NO_TIMEZONE = re.compile(r"Start Time: (.*)[ \n]End Time: (.*)")
24+
25+
26+
class XlsxParserATT1(Xlsx):
27+
"""Xlsx Parser for ATT file attachments."""
28+
29+
@staticmethod
30+
def parse_xlsx(records: List[Dict]) -> List[Dict]:
31+
"""Parses ATT xlsx attachments."""
32+
impact = Impact.OUTAGE
33+
account_name, circuit_id_key = get_account_and_circuit_id_key(records[0])
34+
circuit_ids = [r[circuit_id_key] for r in records]
35+
if "Circuit/Asset" in records[0]:
36+
circuit_ids = [normalize_lec_circuit_id(cid) for cid in circuit_ids]
37+
circuits = [CircuitImpact(impact=impact, circuit_id=cid) for cid in circuit_ids]
38+
data = [
39+
{
40+
"account": account_name,
41+
"circuits": circuits,
42+
}
43+
]
44+
return data
45+
46+
47+
class HtmlParserATT1(Html):
48+
"""Notifications Parser for ATT notifications."""
49+
50+
def parse_html(self, soup):
51+
"""Parse ATT HTML notification."""
52+
logger.debug("Parsing ATT HTML notification.")
53+
data = self.parse_p_tags(soup)
54+
data["start"] = self.dt2ts(data["start"])
55+
data["end"] = self.dt2ts(data["end"])
56+
data["status"] = Status.CONFIRMED
57+
return [data]
58+
59+
@staticmethod
60+
def parse_p_tags(soup: ResultSet) -> Dict:
61+
"""Parse <p> tags in HTML."""
62+
data = {}
63+
p_tags = soup.find_all("p")
64+
65+
for tag in p_tags:
66+
text = remove_unprintable(tag.text.strip())
67+
68+
if match := RE_EVENT.search(text):
69+
event_id, impact, summary, description, _ = match.groups()
70+
data["maintenance_id"] = event_id
71+
data["summary"] = f"{summary}: {impact} {description}"
72+
73+
elif match := RE_MAINTENANCE_WINDOW_GMT.search(text):
74+
start_time_text, end_time_text = match.groups()
75+
data["start"] = dateutil.parser.parse(start_time_text)
76+
data["end"] = dateutil.parser.parse(end_time_text)
77+
78+
elif match := RE_MAINTENANCE_WINDOW_NO_TIMEZONE.search(text):
79+
start_time_text, end_time_text = match.groups()
80+
data["start"] = dateutil.parser.parse(start_time_text + " GMT")
81+
data["end"] = dateutil.parser.parse(end_time_text + " GMT")
82+
83+
return data
84+
85+
86+
def get_account_and_circuit_id_key(record: Dict) -> tuple[str, str]:
87+
"""Return the account name and the key used to retrieve circuits IDs.
88+
89+
The key names may vary depending on the ATT business unit that initiated the notice.
90+
"""
91+
if account := record.get("Customer"):
92+
circuit_id_key = "Circuit/Asset"
93+
elif account := record.get("Customer Name"):
94+
circuit_id_key = "Circuit ID"
95+
elif account := record.get("Customer Names"):
96+
circuit_id_key = "Customer Circuit ID"
97+
else:
98+
raise ParserError("Could not parse 'Customer Name' and 'Circuit ID'.")
99+
100+
return str(account), circuit_id_key
101+
102+
103+
def normalize_lec_circuit_id(circuit_id: str) -> str:
104+
"""Standardize circuit IDs."""
105+
circuit_id, *_ = circuit_id.split()
106+
circuit_id = re.sub(r"^0+", "", circuit_id) # Remove leading zeros.
107+
circuit_id = re.sub(r"0+$", "ATI", circuit_id) # Remove trailing zeros.
108+
return circuit_id
109+
110+
111+
def remove_unprintable(text: str) -> str:
112+
"""Remove non-printing characters from text."""
113+
return "".join(c for c in text if c in string.printable)

circuit_maintenance_parser/provider.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
from circuit_maintenance_parser.parsers.apple import SubjectParserApple, TextParserApple
1919
from circuit_maintenance_parser.parsers.aquacomms import HtmlParserAquaComms1, SubjectParserAquaComms1
20+
from circuit_maintenance_parser.parsers.att import HtmlParserATT1, XlsxParserATT1
2021
from circuit_maintenance_parser.parsers.aws import SubjectParserAWS1, TextParserAWS1
2122
from circuit_maintenance_parser.parsers.bso import HtmlParserBSO1
2223
from circuit_maintenance_parser.parsers.cogent import HtmlParserCogent1, SubjectParserCogent1, TextParserCogent1
@@ -235,6 +236,17 @@ class Arelion(GenericProvider):
235236
_default_organizer = PrivateAttr("support@arelion.com")
236237

237238

239+
class ATT(GenericProvider):
240+
"""ATT provider custom class."""
241+
242+
_processors: List[GenericProcessor] = PrivateAttr(
243+
[
244+
CombinedProcessor(data_parsers=[EmailDateParser, HtmlParserATT1, XlsxParserATT1]),
245+
]
246+
)
247+
_default_organizer = PrivateAttr("g31654@att.com")
248+
249+
238250
class AWS(GenericProvider):
239251
"""AWS provider custom class."""
240252

0 commit comments

Comments
 (0)