Skip to content

Commit 897fed8

Browse files
[Outlook] Feature: Office365 multi-user support
Introduced BaseOffice365User abstract base class to standardize Office 365 user handling. Added MultiOffice365Users to manage multiple emails from config. Added client_emails (comma-separated) in OutlookDataSource config. Resolved issue with fetching too many users causing SMTP server not found error.
1 parent ce5a4c1 commit 897fed8

File tree

2 files changed

+189
-29
lines changed

2 files changed

+189
-29
lines changed

connectors/sources/outlook.py

Lines changed: 108 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,11 @@
77

88
import asyncio
99
import os
10+
from abc import ABC, abstractmethod
1011
from copy import copy
1112
from datetime import date
1213
from functools import cached_property, partial
14+
from typing import List
1315

1416
import aiofiles
1517
import aiohttp
@@ -348,13 +350,13 @@ async def get_user_accounts(self):
348350
yield user_account
349351

350352

351-
class Office365Users:
352-
"""Fetch users from Office365 Active Directory"""
353+
class BaseOffice365User(ABC):
354+
"""Abstract base class for Office 365 user management"""
353355

354356
def __init__(self, client_id, client_secret, tenant_id):
355-
self.tenant_id = tenant_id
356357
self.client_id = client_id
357358
self.client_secret = client_secret
359+
self.tenant_id = tenant_id
358360

359361
@cached_property
360362
def _get_session(self):
@@ -403,6 +405,21 @@ async def _fetch_token(self):
403405
except Exception as exception:
404406
self._check_errors(response=exception)
405407

408+
@abstractmethod
409+
async def get_users(self):
410+
pass
411+
412+
@abstractmethod
413+
async def get_user_accounts(self):
414+
pass
415+
416+
417+
class Office365Users(BaseOffice365User):
418+
"""Fetch users from Office365 Active Directory"""
419+
420+
def __init__(self, client_id, client_secret, tenant_id):
421+
super().__init__(client_id, client_secret, tenant_id)
422+
406423
@retryable(
407424
retries=RETRIES,
408425
interval=RETRY_INTERVAL,
@@ -456,6 +473,57 @@ async def get_user_accounts(self):
456473
yield user_account
457474

458475

476+
class MultiOffice365Users(BaseOffice365User):
477+
"""Fetch multiple Office365 users based on a list of email addresses."""
478+
479+
def __init__(self, client_id, client_secret, tenant_id, client_emails: List[str]):
480+
super().__init__(client_id, client_secret, tenant_id)
481+
self.client_emails = client_emails
482+
483+
async def get_users(self):
484+
access_token = await self._fetch_token()
485+
for email in self.client_emails:
486+
url = f"https://graph.microsoft.com/v1.0/users/{email}"
487+
try:
488+
async with self._get_session.get(
489+
url=url,
490+
headers={
491+
"Authorization": f"Bearer {access_token}",
492+
"Content-Type": "application/json",
493+
},
494+
) as response:
495+
json_response = await response.json()
496+
yield json_response
497+
except Exception:
498+
raise
499+
500+
async def get_user_accounts(self):
501+
async for user in self.get_users():
502+
mail = user.get("mail")
503+
if mail is None:
504+
continue
505+
506+
credentials = OAuth2Credentials(
507+
client_id=self.client_id,
508+
tenant_id=self.tenant_id,
509+
client_secret=self.client_secret,
510+
identity=Identity(primary_smtp_address=mail),
511+
)
512+
configuration = Configuration(
513+
credentials=credentials,
514+
auth_type=OAUTH2,
515+
service_endpoint=EWS_ENDPOINT,
516+
retry_policy=FaultTolerance(max_wait=120),
517+
)
518+
user_account = Account(
519+
primary_smtp_address=mail,
520+
config=configuration,
521+
autodiscover=False,
522+
access_type=IMPERSONATION,
523+
)
524+
yield user_account
525+
526+
459527
class OutlookDocFormatter:
460528
"""Format Outlook object documents to Elasticsearch document"""
461529

@@ -583,6 +651,27 @@ def attachment_doc_formatter(self, attachment, attachment_type, timezone):
583651
}
584652

585653

654+
class UserFactory:
655+
"""Factory class for creating Office365 user instances"""
656+
657+
@staticmethod
658+
def create_user(configuration: dict) -> BaseOffice365User:
659+
if configuration.get("client_emails"):
660+
client_emails = [email.strip() for email in configuration["client_emails"].split(",")]
661+
return MultiOffice365Users(
662+
client_id=configuration["client_id"],
663+
client_secret=configuration["client_secret"],
664+
tenant_id=configuration["tenant_id"],
665+
client_emails=client_emails
666+
)
667+
else:
668+
return Office365Users(
669+
client_id=configuration["client_id"],
670+
client_secret=configuration["client_secret"],
671+
tenant_id=configuration["tenant_id"]
672+
)
673+
674+
586675
class OutlookClient:
587676
"""Outlook client to handle API calls made to Outlook"""
588677

@@ -605,11 +694,7 @@ def set_logger(self, logger_):
605694
@cached_property
606695
def _get_user_instance(self):
607696
if self.is_cloud:
608-
return Office365Users(
609-
client_id=self.configuration["client_id"],
610-
client_secret=self.configuration["client_secret"],
611-
tenant_id=self.configuration["tenant_id"],
612-
)
697+
return UserFactory.create_user(self.configuration)
613698

614699
return ExchangeUsers(
615700
ad_server=self.configuration["active_directory_server"],
@@ -666,9 +751,12 @@ async def get_tasks(self, account):
666751
yield task
667752

668753
async def get_contacts(self, account):
669-
folder = account.root / "Top of Information Store" / "Contacts"
670-
for contact in await asyncio.to_thread(folder.all().only, *CONTACT_FIELDS):
671-
yield contact
754+
try:
755+
folder = account.root / "Top of Information Store" / "Contacts"
756+
for contact in await asyncio.to_thread(folder.all().only, *CONTACT_FIELDS):
757+
yield contact
758+
except Exception:
759+
raise
672760

673761

674762
class OutlookDataSource(BaseDataSource):
@@ -735,6 +823,13 @@ def get_default_configuration(cls):
735823
"sensitive": True,
736824
"type": "str",
737825
},
826+
"client_emails": {
827+
"depends_on": [{"field": "data_source", "value": OUTLOOK_CLOUD}],
828+
"label": "Client Email Addresses (comma-separated)",
829+
"order": 5,
830+
"required": False,
831+
"type": "str",
832+
},
738833
"exchange_server": {
739834
"depends_on": [{"field": "data_source", "value": OUTLOOK_SERVER}],
740835
"label": "Exchange Server",
@@ -1072,9 +1167,11 @@ async def get_docs(self, filtering=None):
10721167
dictionary: dictionary containing meta-data of the files.
10731168
"""
10741169
async for account in self.client._get_user_instance.get_user_accounts():
1170+
self._logger.debug(f"Processing account: {account}")
10751171
timezone = account.default_timezone or DEFAULT_TIMEZONE
10761172

10771173
async for mail in self._fetch_mails(account=account, timezone=timezone):
1174+
self._logger.debug(f"Fetched mail: {mail}")
10781175
yield mail
10791176

10801177
async for contact in self._fetch_contacts(

tests/sources/test_outlook.py

Lines changed: 81 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,7 @@ async def create_outlook_source(
374374
tenant_id="foo",
375375
client_id="bar",
376376
client_secret="faa",
377+
client_emails=None,
377378
exchange_server="127.0.0.1",
378379
active_directory_server="127.0.0.1",
379380
username="fee",
@@ -383,12 +384,16 @@ async def create_outlook_source(
383384
ssl_ca="",
384385
use_text_extraction_service=False,
385386
):
387+
if client_emails is None:
388+
client_emails = ""
389+
386390
async with create_source(
387391
OutlookDataSource,
388392
data_source=data_source,
389393
tenant_id=tenant_id,
390394
client_id=client_id,
391395
client_secret=client_secret,
396+
client_emails=client_emails,
392397
exchange_server=exchange_server,
393398
active_directory_server=active_directory_server,
394399
username=username,
@@ -415,26 +420,36 @@ def get_stream_reader():
415420
return async_mock
416421

417422

418-
def side_effect_function(url, headers):
423+
def side_effect_function(client_emails=None):
419424
"""Dynamically changing return values for API calls
420425
Args:
421426
url, ssl: Params required for get call
427+
client_emails: Optional string of comma-separated email addresses
422428
"""
423-
if url == "https://graph.microsoft.com/v1.0/users?$top=999":
424-
return get_json_mock(
425-
mock_response={
426-
"@odata.nextLink": "https://graph.microsoft.com/v1.0/users?$top=999&$skipToken=fake-skip-token",
427-
"value": [{"mail": "test.user@gmail.com"}],
428-
},
429-
status=200,
430-
)
431-
elif (
432-
url
433-
== "https://graph.microsoft.com/v1.0/users?$top=999&$skipToken=fake-skip-token"
434-
):
435-
return get_json_mock(
436-
mock_response={"value": [{"mail": "dummy.user@gmail.com"}]}, status=200
437-
)
429+
def inner(url, headers):
430+
if client_emails:
431+
emails = [email.strip() for email in client_emails.split(",")]
432+
for email in emails:
433+
if url == f"https://graph.microsoft.com/v1.0/users/{email}":
434+
users_response = {"value": [{"mail": email}]}
435+
return get_json_mock(mock_response=users_response, status=200)
436+
elif url == "https://graph.microsoft.com/v1.0/users?$top=999":
437+
return get_json_mock(
438+
mock_response={
439+
"@odata.nextLink": "https://graph.microsoft.com/v1.0/users?$top=999&$skipToken=fake-skip-token",
440+
"value": [{"mail": "test.user@gmail.com"}],
441+
},
442+
status=200,
443+
)
444+
elif (
445+
url
446+
== "https://graph.microsoft.com/v1.0/users?$top=999&$skipToken=fake-skip-token"
447+
):
448+
return get_json_mock(
449+
mock_response={"value": [{"mail": "dummy.user@gmail.com"}]}, status=200
450+
)
451+
452+
return inner
438453

439454

440455
@pytest.mark.asyncio
@@ -459,6 +474,7 @@ def side_effect_function(url, headers):
459474
"tenant_id": "foo",
460475
"client_id": "bar",
461476
"client_secret": "",
477+
"client_emails": None,
462478
}
463479
),
464480
],
@@ -497,6 +513,17 @@ async def test_validate_configuration_with_invalid_dependency_fields_raises_erro
497513
"tenant_id": "foo",
498514
"client_id": "bar",
499515
"client_secret": "foo.bar",
516+
"client_emails": None
517+
}
518+
),
519+
(
520+
# Outlook Cloud with non-blank dependent fields & client_emails provided
521+
{
522+
"data_source": OUTLOOK_CLOUD,
523+
"tenant_id": "foo",
524+
"client_id": "bar",
525+
"client_secret": "foo.bar",
526+
"client_emails": "test.user@gmail.com"
500527
}
501528
),
502529
],
@@ -552,7 +579,7 @@ async def test_ping_for_cloud():
552579
):
553580
with mock.patch(
554581
"aiohttp.ClientSession.get",
555-
side_effect=side_effect_function,
582+
side_effect=side_effect_function(),
556583
):
557584
await source.ping()
558585

@@ -597,13 +624,49 @@ async def test_get_users_for_cloud():
597624
):
598625
with mock.patch(
599626
"aiohttp.ClientSession.get",
600-
side_effect=side_effect_function,
627+
side_effect=side_effect_function(),
601628
):
602629
async for response in source.client._get_user_instance.get_users():
603630
user_mails = [user["mail"] for user in response["value"]]
604631
users.extend(user_mails)
605632
assert users == ["test.user@gmail.com", "dummy.user@gmail.com"]
606633

634+
client_emails = "one.user@gmail.com"
635+
async with create_outlook_source(client_emails=client_emails) as source:
636+
users = []
637+
with mock.patch(
638+
"aiohttp.ClientSession.post",
639+
return_value=get_json_mock(
640+
mock_response={"access_token": "fake-token"}, status=200
641+
),
642+
):
643+
with mock.patch(
644+
"aiohttp.ClientSession.get",
645+
side_effect=side_effect_function(client_emails),
646+
):
647+
async for response in source.client._get_user_instance.get_users():
648+
user_mails = [user["mail"] for user in response["value"]]
649+
users.extend(user_mails)
650+
assert users == ["one.user@gmail.com"]
651+
652+
client_emails = "first.user@gmail.com, second.user@gmail.com"
653+
async with create_outlook_source(client_emails=client_emails) as source:
654+
users = []
655+
with mock.patch(
656+
"aiohttp.ClientSession.post",
657+
return_value=get_json_mock(
658+
mock_response={"access_token": "fake-token"}, status=200
659+
),
660+
):
661+
with mock.patch(
662+
"aiohttp.ClientSession.get",
663+
side_effect=side_effect_function(client_emails),
664+
):
665+
async for response in source.client._get_user_instance.get_users():
666+
user_mails = [user["mail"] for user in response["value"]]
667+
users.extend(user_mails)
668+
assert set(users) == {"first.user@gmail.com", "second.user@gmail.com"}
669+
607670

608671
@pytest.mark.asyncio
609672
@patch("connectors.sources.outlook.Connection")

0 commit comments

Comments
 (0)