Skip to content

Remove workflows from GET profile endpoint + Add inUse flag #2703

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Jul 2, 2025
34 changes: 13 additions & 21 deletions backend/btrixcloud/crawlconfigs.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
ConfigRevision,
CrawlConfig,
CrawlConfigOut,
CrawlConfigProfileOut,
CrawlOut,
UpdateCrawlConfig,
Organization,
Expand Down Expand Up @@ -597,6 +596,7 @@ async def get_crawl_configs(
page: int = 1,
created_by: Optional[UUID] = None,
modified_by: Optional[UUID] = None,
profileid: Optional[UUID] = None,
first_seed: Optional[str] = None,
name: Optional[str] = None,
description: Optional[str] = None,
Expand All @@ -607,7 +607,7 @@ async def get_crawl_configs(
sort_direction: int = -1,
) -> tuple[list[CrawlConfigOut], int]:
"""Get all crawl configs for an organization is a member of"""
# pylint: disable=too-many-locals,too-many-branches
# pylint: disable=too-many-locals,too-many-branches,too-many-statements
# Zero-index page for query
page = page - 1
skip = page * page_size
Expand All @@ -623,6 +623,9 @@ async def get_crawl_configs(
if modified_by:
match_query["modifiedBy"] = modified_by

if profileid:
match_query["profileid"] = profileid

if name:
match_query["name"] = name

Expand Down Expand Up @@ -708,25 +711,12 @@ async def get_crawl_configs(

return configs, total

async def get_crawl_config_info_for_profile(
self, profileid: UUID, org: Organization
) -> list[CrawlConfigProfileOut]:
"""Return all crawl configs that are associated with a given profileid"""
query = {"profileid": profileid, "inactive": {"$ne": True}}
if org:
query["oid"] = org.id

results = []

cursor = self.crawl_configs.find(query, projection=["_id"])
workflows = await cursor.to_list(length=1000)
for workflow_dict in workflows:
workflow_out = await self.get_crawl_config_out(
workflow_dict.get("_id"), org
)
results.append(CrawlConfigProfileOut.from_dict(workflow_out.to_dict()))

return results
async def is_profile_in_use(self, profileid: UUID, org: Organization) -> bool:
"""return true/false if any active workflows exist with given profile"""
res = await self.crawl_configs.find_one(
{"profileid": profileid, "inactive": {"$ne": True}, "oid": org.id}
)
return res is not None

async def get_running_crawl(self, cid: UUID) -> Optional[CrawlOut]:
"""Return the id of currently running crawl for this config, if any"""
Expand Down Expand Up @@ -1371,6 +1361,7 @@ async def get_crawl_configs(
# createdBy, kept as userid for API compatibility
userid: Optional[UUID] = None,
modifiedBy: Optional[UUID] = None,
profileid: Optional[UUID] = None,
firstSeed: Optional[str] = None,
name: Optional[str] = None,
description: Optional[str] = None,
Expand All @@ -1394,6 +1385,7 @@ async def get_crawl_configs(
org,
created_by=userid,
modified_by=modifiedBy,
profileid=profileid,
first_seed=firstSeed,
name=name,
description=description,
Expand Down
16 changes: 1 addition & 15 deletions backend/btrixcloud/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -514,15 +514,6 @@ class CrawlConfigOut(CrawlConfigCore, CrawlConfigAdditional):
lastStartedByName: Optional[str] = None


# ============================================================================
class CrawlConfigProfileOut(BaseMongoModel):
"""Crawl Config basic info for profiles"""

name: str
firstSeed: str
seedCount: int


# ============================================================================
class UpdateCrawlConfig(BaseModel):
"""Update crawl config name, crawl schedule, or tags"""
Expand Down Expand Up @@ -2319,12 +2310,7 @@ class Profile(BaseMongoModel):
crawlerChannel: Optional[str] = None
proxyId: Optional[str] = None


# ============================================================================
class ProfileWithCrawlConfigs(Profile):
"""Profile with list of crawlconfigs using this profile"""

crawlconfigs: List[CrawlConfigProfileOut] = []
inUse: bool = False


# ============================================================================
Expand Down
54 changes: 13 additions & 41 deletions backend/btrixcloud/profiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from .pagination import DEFAULT_PAGE_SIZE, paginated_format
from .models import (
Profile,
ProfileWithCrawlConfigs,
ProfileFile,
UrlIn,
ProfileLaunchBrowserIn,
Expand All @@ -31,7 +30,6 @@
SuccessResponseStorageQuota,
ProfilePingResponse,
ProfileBrowserGetUrlResponse,
CrawlConfigProfileOut,
)
from .utils import dt_now

Expand Down Expand Up @@ -353,33 +351,20 @@ async def list_profiles(
profiles = [Profile.from_dict(res) for res in items]
return profiles, total

async def get_profile(
self, profileid: UUID, org: Optional[Organization] = None
) -> Profile:
async def get_profile(self, profileid: UUID, org: Organization) -> Profile:
"""get profile by id and org"""
query: dict[str, object] = {"_id": profileid}
if org:
query["oid"] = org.id
query: dict[str, object] = {"_id": profileid, "oid": org.id}

res = await self.profiles.find_one(query)
if not res:
raise HTTPException(status_code=404, detail="profile_not_found")

return Profile.from_dict(res)

async def get_profile_with_configs(
self, profileid: UUID, org: Organization
) -> ProfileWithCrawlConfigs:
"""get profile for api output, with crawlconfigs"""

profile = await self.get_profile(profileid, org)

crawlconfigs = await self.get_crawl_configs_for_profile(profileid, org)

return ProfileWithCrawlConfigs(crawlconfigs=crawlconfigs, **profile.dict())
profile = Profile.from_dict(res)
profile.inUse = await self.crawlconfigs.is_profile_in_use(profileid, org)
return profile

async def get_profile_storage_path_and_proxy(
self, profileid: UUID, org: Optional[Organization] = None
self, profileid: UUID, org: Organization
) -> tuple[str, str]:
"""return profile path filename (relative path) for given profile id and org"""
try:
Expand All @@ -392,9 +377,7 @@ async def get_profile_storage_path_and_proxy(

return "", ""

async def get_profile_name(
self, profileid: UUID, org: Optional[Organization] = None
) -> str:
async def get_profile_name(self, profileid: UUID, org: Organization) -> str:
"""return profile for given profile id and org"""
try:
profile = await self.get_profile(profileid, org)
Expand All @@ -405,25 +388,14 @@ async def get_profile_name(

return ""

async def get_crawl_configs_for_profile(
self, profileid: UUID, org: Organization
) -> list[CrawlConfigProfileOut]:
"""Get list of crawl configs with basic info for that use a particular profile"""

crawlconfig_info = await self.crawlconfigs.get_crawl_config_info_for_profile(
profileid, org
)

return crawlconfig_info

async def delete_profile(
self, profileid: UUID, org: Organization
) -> dict[str, Any]:
"""delete profile, if not used in active crawlconfig"""
profile = await self.get_profile_with_configs(profileid, org)
profile = await self.get_profile(profileid, org)

if len(profile.crawlconfigs) > 0:
return {"error": "in_use", "crawlconfigs": profile.crawlconfigs}
if profile.inUse:
raise HTTPException(status_code=400, detail="profile_in_use")

query: dict[str, object] = {"_id": profileid}
if org:
Expand Down Expand Up @@ -571,7 +543,7 @@ async def commit_browser_to_existing(

else:
metadata = await browser_get_metadata(browser_commit.browserid, org)
profile = await ops.get_profile(profileid)
profile = await ops.get_profile(profileid, org)
await ops.commit_to_profile(
browser_commit=ProfileCreate(
browserid=browser_commit.browserid,
Expand All @@ -588,12 +560,12 @@ async def commit_browser_to_existing(

return {"updated": True}

@router.get("/{profileid}", response_model=ProfileWithCrawlConfigs)
@router.get("/{profileid}", response_model=Profile)
async def get_profile(
profileid: UUID,
org: Organization = Depends(org_crawl_dep),
):
return await ops.get_profile_with_configs(profileid, org)
return await ops.get_profile(profileid, org)

@router.delete("/{profileid}", response_model=SuccessResponseStorageQuota)
async def delete_profile(
Expand Down
14 changes: 3 additions & 11 deletions backend/test/test_profiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,6 @@ def profile_config_id(admin_auth_headers, default_org_id, profile_id):
assert resource["storage"]["name"]
assert resource.get("replicas") or resource.get("replicas") == []

assert data.get("crawlconfigs") == []

# Use profile in a workflow
r = requests.post(
f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/",
Expand Down Expand Up @@ -207,7 +205,7 @@ def test_commit_browser_to_new_profile(admin_auth_headers, default_org_id, profi
def test_get_profile(admin_auth_headers, default_org_id, profile_id, profile_config_id):
start_time = time.monotonic()
time_limit = 10
# Check get endpoint again and check that crawlconfigs is updated
# Check get endpoint again and check that inUse is updated
while True:
try:
r = requests.get(
Expand Down Expand Up @@ -239,13 +237,8 @@ def test_get_profile(admin_auth_headers, default_org_id, profile_id, profile_con
assert resource["storage"]["name"]
assert resource.get("replicas") or resource.get("replicas") == []

crawl_configs = data.get("crawlconfigs")
assert crawl_configs
assert len(crawl_configs) == 1
assert crawl_configs[0]["id"] == profile_config_id
assert crawl_configs[0]["name"] == "Profile Test Crawl"
assert crawl_configs[0]["firstSeed"] == "https://webrecorder.net/"
assert crawl_configs[0]["seedCount"] == 1
assert "crawlconfigs" not in data
assert data["inUse"] == True
break
except:
if time.monotonic() - start_time > time_limit:
Expand All @@ -260,7 +253,6 @@ def test_commit_second_profile(profile_2_id):
def test_list_profiles(admin_auth_headers, default_org_id, profile_id, profile_2_id):
start_time = time.monotonic()
time_limit = 10
# Check get endpoint again and check that crawlconfigs is updated
while True:
try:
r = requests.get(
Expand Down
Loading
Loading