Skip to content

Commit 5b4fee7

Browse files
tw4likreymer
andauthored
Remove workflows from GET profile endpoint + add inUse flag instead (#2703)
Connected to #2661 - Removes crawl workflows from being returned as part of the profile response. - Frontend: removes display of workflows in profile details. - Adds 'inUse' flag to all profile responses to indicate profile is in use by at least one workflow - Adds 'profileid' as possible filter for workflows search in preparation for filtering by profile id (#2708) - Make 'profile_in_use' a proper error (returning 400) on profile delete. --------- Co-authored-by: Ilya Kreymer <ikreymer@gmail.com>
1 parent b915e73 commit 5b4fee7

File tree

7 files changed

+79
-201
lines changed

7 files changed

+79
-201
lines changed

backend/btrixcloud/crawlconfigs.py

Lines changed: 13 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
ConfigRevision,
2626
CrawlConfig,
2727
CrawlConfigOut,
28-
CrawlConfigProfileOut,
2928
CrawlOut,
3029
UpdateCrawlConfig,
3130
Organization,
@@ -597,6 +596,7 @@ async def get_crawl_configs(
597596
page: int = 1,
598597
created_by: Optional[UUID] = None,
599598
modified_by: Optional[UUID] = None,
599+
profileid: Optional[UUID] = None,
600600
first_seed: Optional[str] = None,
601601
name: Optional[str] = None,
602602
description: Optional[str] = None,
@@ -607,7 +607,7 @@ async def get_crawl_configs(
607607
sort_direction: int = -1,
608608
) -> tuple[list[CrawlConfigOut], int]:
609609
"""Get all crawl configs for an organization is a member of"""
610-
# pylint: disable=too-many-locals,too-many-branches
610+
# pylint: disable=too-many-locals,too-many-branches,too-many-statements
611611
# Zero-index page for query
612612
page = page - 1
613613
skip = page * page_size
@@ -623,6 +623,9 @@ async def get_crawl_configs(
623623
if modified_by:
624624
match_query["modifiedBy"] = modified_by
625625

626+
if profileid:
627+
match_query["profileid"] = profileid
628+
626629
if name:
627630
match_query["name"] = name
628631

@@ -708,25 +711,12 @@ async def get_crawl_configs(
708711

709712
return configs, total
710713

711-
async def get_crawl_config_info_for_profile(
712-
self, profileid: UUID, org: Organization
713-
) -> list[CrawlConfigProfileOut]:
714-
"""Return all crawl configs that are associated with a given profileid"""
715-
query = {"profileid": profileid, "inactive": {"$ne": True}}
716-
if org:
717-
query["oid"] = org.id
718-
719-
results = []
720-
721-
cursor = self.crawl_configs.find(query, projection=["_id"])
722-
workflows = await cursor.to_list(length=1000)
723-
for workflow_dict in workflows:
724-
workflow_out = await self.get_crawl_config_out(
725-
workflow_dict.get("_id"), org
726-
)
727-
results.append(CrawlConfigProfileOut.from_dict(workflow_out.to_dict()))
728-
729-
return results
714+
async def is_profile_in_use(self, profileid: UUID, org: Organization) -> bool:
715+
"""return true/false if any active workflows exist with given profile"""
716+
res = await self.crawl_configs.find_one(
717+
{"profileid": profileid, "inactive": {"$ne": True}, "oid": org.id}
718+
)
719+
return res is not None
730720

731721
async def get_running_crawl(self, cid: UUID) -> Optional[CrawlOut]:
732722
"""Return the id of currently running crawl for this config, if any"""
@@ -1371,6 +1361,7 @@ async def get_crawl_configs(
13711361
# createdBy, kept as userid for API compatibility
13721362
userid: Optional[UUID] = None,
13731363
modifiedBy: Optional[UUID] = None,
1364+
profileid: Optional[UUID] = None,
13741365
firstSeed: Optional[str] = None,
13751366
name: Optional[str] = None,
13761367
description: Optional[str] = None,
@@ -1394,6 +1385,7 @@ async def get_crawl_configs(
13941385
org,
13951386
created_by=userid,
13961387
modified_by=modifiedBy,
1388+
profileid=profileid,
13971389
first_seed=firstSeed,
13981390
name=name,
13991391
description=description,

backend/btrixcloud/models.py

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -514,15 +514,6 @@ class CrawlConfigOut(CrawlConfigCore, CrawlConfigAdditional):
514514
lastStartedByName: Optional[str] = None
515515

516516

517-
# ============================================================================
518-
class CrawlConfigProfileOut(BaseMongoModel):
519-
"""Crawl Config basic info for profiles"""
520-
521-
name: str
522-
firstSeed: str
523-
seedCount: int
524-
525-
526517
# ============================================================================
527518
class UpdateCrawlConfig(BaseModel):
528519
"""Update crawl config name, crawl schedule, or tags"""
@@ -2319,12 +2310,7 @@ class Profile(BaseMongoModel):
23192310
crawlerChannel: Optional[str] = None
23202311
proxyId: Optional[str] = None
23212312

2322-
2323-
# ============================================================================
2324-
class ProfileWithCrawlConfigs(Profile):
2325-
"""Profile with list of crawlconfigs using this profile"""
2326-
2327-
crawlconfigs: List[CrawlConfigProfileOut] = []
2313+
inUse: bool = False
23282314

23292315

23302316
# ============================================================================

backend/btrixcloud/profiles.py

Lines changed: 13 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
from .pagination import DEFAULT_PAGE_SIZE, paginated_format
1414
from .models import (
1515
Profile,
16-
ProfileWithCrawlConfigs,
1716
ProfileFile,
1817
UrlIn,
1918
ProfileLaunchBrowserIn,
@@ -31,7 +30,6 @@
3130
SuccessResponseStorageQuota,
3231
ProfilePingResponse,
3332
ProfileBrowserGetUrlResponse,
34-
CrawlConfigProfileOut,
3533
)
3634
from .utils import dt_now
3735

@@ -353,33 +351,20 @@ async def list_profiles(
353351
profiles = [Profile.from_dict(res) for res in items]
354352
return profiles, total
355353

356-
async def get_profile(
357-
self, profileid: UUID, org: Optional[Organization] = None
358-
) -> Profile:
354+
async def get_profile(self, profileid: UUID, org: Organization) -> Profile:
359355
"""get profile by id and org"""
360-
query: dict[str, object] = {"_id": profileid}
361-
if org:
362-
query["oid"] = org.id
356+
query: dict[str, object] = {"_id": profileid, "oid": org.id}
363357

364358
res = await self.profiles.find_one(query)
365359
if not res:
366360
raise HTTPException(status_code=404, detail="profile_not_found")
367361

368-
return Profile.from_dict(res)
369-
370-
async def get_profile_with_configs(
371-
self, profileid: UUID, org: Organization
372-
) -> ProfileWithCrawlConfigs:
373-
"""get profile for api output, with crawlconfigs"""
374-
375-
profile = await self.get_profile(profileid, org)
376-
377-
crawlconfigs = await self.get_crawl_configs_for_profile(profileid, org)
378-
379-
return ProfileWithCrawlConfigs(crawlconfigs=crawlconfigs, **profile.dict())
362+
profile = Profile.from_dict(res)
363+
profile.inUse = await self.crawlconfigs.is_profile_in_use(profileid, org)
364+
return profile
380365

381366
async def get_profile_storage_path_and_proxy(
382-
self, profileid: UUID, org: Optional[Organization] = None
367+
self, profileid: UUID, org: Organization
383368
) -> tuple[str, str]:
384369
"""return profile path filename (relative path) for given profile id and org"""
385370
try:
@@ -392,9 +377,7 @@ async def get_profile_storage_path_and_proxy(
392377

393378
return "", ""
394379

395-
async def get_profile_name(
396-
self, profileid: UUID, org: Optional[Organization] = None
397-
) -> str:
380+
async def get_profile_name(self, profileid: UUID, org: Organization) -> str:
398381
"""return profile for given profile id and org"""
399382
try:
400383
profile = await self.get_profile(profileid, org)
@@ -405,25 +388,14 @@ async def get_profile_name(
405388

406389
return ""
407390

408-
async def get_crawl_configs_for_profile(
409-
self, profileid: UUID, org: Organization
410-
) -> list[CrawlConfigProfileOut]:
411-
"""Get list of crawl configs with basic info for that use a particular profile"""
412-
413-
crawlconfig_info = await self.crawlconfigs.get_crawl_config_info_for_profile(
414-
profileid, org
415-
)
416-
417-
return crawlconfig_info
418-
419391
async def delete_profile(
420392
self, profileid: UUID, org: Organization
421393
) -> dict[str, Any]:
422394
"""delete profile, if not used in active crawlconfig"""
423-
profile = await self.get_profile_with_configs(profileid, org)
395+
profile = await self.get_profile(profileid, org)
424396

425-
if len(profile.crawlconfigs) > 0:
426-
return {"error": "in_use", "crawlconfigs": profile.crawlconfigs}
397+
if profile.inUse:
398+
raise HTTPException(status_code=400, detail="profile_in_use")
427399

428400
query: dict[str, object] = {"_id": profileid}
429401
if org:
@@ -571,7 +543,7 @@ async def commit_browser_to_existing(
571543

572544
else:
573545
metadata = await browser_get_metadata(browser_commit.browserid, org)
574-
profile = await ops.get_profile(profileid)
546+
profile = await ops.get_profile(profileid, org)
575547
await ops.commit_to_profile(
576548
browser_commit=ProfileCreate(
577549
browserid=browser_commit.browserid,
@@ -588,12 +560,12 @@ async def commit_browser_to_existing(
588560

589561
return {"updated": True}
590562

591-
@router.get("/{profileid}", response_model=ProfileWithCrawlConfigs)
563+
@router.get("/{profileid}", response_model=Profile)
592564
async def get_profile(
593565
profileid: UUID,
594566
org: Organization = Depends(org_crawl_dep),
595567
):
596-
return await ops.get_profile_with_configs(profileid, org)
568+
return await ops.get_profile(profileid, org)
597569

598570
@router.delete("/{profileid}", response_model=SuccessResponseStorageQuota)
599571
async def delete_profile(

backend/test/test_profiles.py

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -144,8 +144,6 @@ def profile_config_id(admin_auth_headers, default_org_id, profile_id):
144144
assert resource["storage"]["name"]
145145
assert resource.get("replicas") or resource.get("replicas") == []
146146

147-
assert data.get("crawlconfigs") == []
148-
149147
# Use profile in a workflow
150148
r = requests.post(
151149
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/",
@@ -207,7 +205,7 @@ def test_commit_browser_to_new_profile(admin_auth_headers, default_org_id, profi
207205
def test_get_profile(admin_auth_headers, default_org_id, profile_id, profile_config_id):
208206
start_time = time.monotonic()
209207
time_limit = 10
210-
# Check get endpoint again and check that crawlconfigs is updated
208+
# Check get endpoint again and check that inUse is updated
211209
while True:
212210
try:
213211
r = requests.get(
@@ -239,13 +237,8 @@ def test_get_profile(admin_auth_headers, default_org_id, profile_id, profile_con
239237
assert resource["storage"]["name"]
240238
assert resource.get("replicas") or resource.get("replicas") == []
241239

242-
crawl_configs = data.get("crawlconfigs")
243-
assert crawl_configs
244-
assert len(crawl_configs) == 1
245-
assert crawl_configs[0]["id"] == profile_config_id
246-
assert crawl_configs[0]["name"] == "Profile Test Crawl"
247-
assert crawl_configs[0]["firstSeed"] == "https://webrecorder.net/"
248-
assert crawl_configs[0]["seedCount"] == 1
240+
assert "crawlconfigs" not in data
241+
assert data["inUse"] == True
249242
break
250243
except:
251244
if time.monotonic() - start_time > time_limit:
@@ -260,7 +253,6 @@ def test_commit_second_profile(profile_2_id):
260253
def test_list_profiles(admin_auth_headers, default_org_id, profile_id, profile_2_id):
261254
start_time = time.monotonic()
262255
time_limit = 10
263-
# Check get endpoint again and check that crawlconfigs is updated
264256
while True:
265257
try:
266258
r = requests.get(

0 commit comments

Comments
 (0)