|
2 | 2 | import tempfile
|
3 | 3 | import urllib.parse
|
4 | 4 | from collections.abc import Generator
|
| 5 | +from datetime import datetime |
| 6 | +from datetime import timezone |
5 | 7 | from typing import Any
|
| 8 | +from typing import Dict |
6 | 9 | from typing import List
|
7 | 10 | from typing import Tuple
|
8 |
| -from typing import Dict, Union |
9 |
| -from datetime import datetime, timezone |
| 11 | +from typing import Union |
10 | 12 |
|
11 | 13 | from zulip import Client
|
12 | 14 |
|
@@ -42,30 +44,30 @@ def __init__(
|
42 | 44 |
|
43 | 45 | # Clean and normalize the URL
|
44 | 46 | realm_url = realm_url.strip().lower()
|
45 |
| - |
| 47 | + |
46 | 48 | # Remove any trailing slashes
|
47 |
| - realm_url = realm_url.rstrip('/') |
48 |
| - |
| 49 | + realm_url = realm_url.rstrip("/") |
| 50 | + |
49 | 51 | # Ensure the URL has a scheme
|
50 |
| - if not realm_url.startswith(('http://', 'https://')): |
51 |
| - realm_url = f'https://{realm_url}' |
52 |
| - |
| 52 | + if not realm_url.startswith(("http://", "https://")): |
| 53 | + realm_url = f"https://{realm_url}" |
| 54 | + |
53 | 55 | try:
|
54 | 56 | parsed = urllib.parse.urlparse(realm_url)
|
55 |
| - |
| 57 | + |
56 | 58 | # Extract the base domain without any paths or ports
|
57 |
| - netloc = parsed.netloc.split(':')[0] # Remove port if present |
58 |
| - |
| 59 | + netloc = parsed.netloc.split(":")[0] # Remove port if present |
| 60 | + |
59 | 61 | if not netloc:
|
60 | 62 | raise ValueError(
|
61 | 63 | f"Invalid realm URL format: {realm_url}. "
|
62 | 64 | f"URL must include a valid domain name."
|
63 | 65 | )
|
64 |
| - |
| 66 | + |
65 | 67 | # Always use HTTPS for security
|
66 | 68 | self.base_url = f"https://{netloc}"
|
67 | 69 | self.client: Client | None = None
|
68 |
| - |
| 70 | + |
69 | 71 | except Exception as e:
|
70 | 72 | raise ValueError(
|
71 | 73 | f"Failed to parse Zulip realm URL: {realm_url}. "
|
@@ -151,7 +153,7 @@ def _message_to_doc(self, message: Message) -> Document:
|
151 | 153 | "has_reactions": str(len(message.reactions) > 0),
|
152 | 154 | "content_type": str(message.content_type or "text"),
|
153 | 155 | }
|
154 |
| - |
| 156 | + |
155 | 157 | # Always include edit timestamp in metadata when available
|
156 | 158 | if edit_time is not None:
|
157 | 159 | metadata["edit_timestamp"] = str(message.last_edit_timestamp)
|
|
0 commit comments