|
1 | 1 | from os import environ |
2 | | - |
| 2 | +import os |
| 3 | +from pathlib import Path |
| 4 | +from dotenv import load_dotenv |
| 5 | +from utils import logger |
| 6 | + |
| 7 | +config_cache = {} |
| 8 | + |
| 9 | +def load_env_files(): |
| 10 | + """load .env file""" |
| 11 | + base_dir = Path.cwd() |
| 12 | + env_file = base_dir / '.env' |
| 13 | + |
| 14 | + if env_file.exists(): |
| 15 | + logger.info(f"Loading environment variables from {env_file}") |
| 16 | + load_dotenv(dotenv_path=str(env_file), override=True) |
| 17 | + else: |
| 18 | + logger.info("No .env file found, using environment variables only") |
| 19 | + |
| 20 | +load_env_files() |
| 21 | + |
| 22 | +def reload_config(): |
| 23 | + """reload config, used for command line parameter override environment variable""" |
| 24 | + global config_cache |
| 25 | + config_cache = {} |
| 26 | + init_config() |
| 27 | + logger.info("Configuration reloaded") |
| 28 | + |
| 29 | +def get_env(key: str, default=None): |
| 30 | + """ |
| 31 | + get value from environment variable or .env file, |
| 32 | + .env file has higher priority |
| 33 | + """ |
| 34 | + return os.environ.get(key, default) |
3 | 35 |
|
4 | 36 | def to_str(key: str, default: str = "") -> str: |
5 | 37 | """Converts string to string.""" |
6 | | - |
7 | | - value = environ.get(key, default) |
8 | | - return value.strip() |
| 38 | + if key in config_cache: |
| 39 | + return config_cache[key] |
| 40 | + |
| 41 | + value = get_env(key, default) |
| 42 | + result = value.strip() if value else default.strip() |
| 43 | + |
| 44 | + config_cache[key] = result |
| 45 | + return result |
9 | 46 |
|
10 | 47 |
|
11 | 48 | def to_none_str(key: str, default: str = None) -> str: |
12 | 49 | """Converts string to string.""" |
13 | | - |
14 | | - value = environ.get(key, default) |
15 | | - return value.strip() if value else None |
| 50 | + if f"none_str:{key}" in config_cache: |
| 51 | + return config_cache[f"none_str:{key}"] |
| 52 | + |
| 53 | + value = get_env(key, default) |
| 54 | + result = value.strip() if value else None |
| 55 | + |
| 56 | + config_cache[f"none_str:{key}"] = result |
| 57 | + return result |
16 | 58 |
|
17 | 59 |
|
18 | 60 | def to_endpoint(key: str, default: str = "") -> str: |
19 | 61 | """Converts string to string.""" |
20 | | - return to_str(key, default).rstrip("/") |
| 62 | + if f"endpoint:{key}" in config_cache: |
| 63 | + return config_cache[f"endpoint:{key}"] |
| 64 | + |
| 65 | + result = to_str(key, default).rstrip("/") |
| 66 | + |
| 67 | + config_cache[f"endpoint:{key}"] = result |
| 68 | + return result |
21 | 69 |
|
22 | 70 |
|
23 | 71 | def to_list(key: str, default: list) -> list: |
24 | 72 | """Converts comma-separated string to list.""" |
25 | | - key = to_str(key, "") |
26 | | - if not key: |
| 73 | + if f"list:{key}" in config_cache: |
| 74 | + return config_cache[f"list:{key}"] |
| 75 | + |
| 76 | + key_value = to_str(key, "") |
| 77 | + if not key_value: |
| 78 | + config_cache[f"list:{key}"] = default |
27 | 79 | return default |
28 | 80 |
|
29 | | - return [item for item in key.split(",") if item] |
| 81 | + result = [item for item in key_value.split(",") if item] |
| 82 | + |
| 83 | + config_cache[f"list:{key}"] = result |
| 84 | + return result |
30 | 85 |
|
31 | 86 |
|
32 | 87 | def to_bool(key: str, default: bool) -> bool: |
33 | 88 | """Converts string to bool.""" |
| 89 | + if f"bool:{key}" in config_cache: |
| 90 | + return config_cache[f"bool:{key}"] |
| 91 | + |
34 | 92 | value = to_str(key, "") |
35 | 93 | if not value: |
| 94 | + config_cache[f"bool:{key}"] = default |
36 | 95 | return default |
37 | 96 |
|
38 | | - return value.lower() == "true" or value == "1" |
| 97 | + result = value.lower() == "true" or value == "1" |
| 98 | + |
| 99 | + config_cache[f"bool:{key}"] = result |
| 100 | + return result |
39 | 101 |
|
40 | 102 |
|
41 | 103 | def to_float(key: str, default: float) -> float: |
42 | 104 | """Converts string to float.""" |
| 105 | + if f"float:{key}" in config_cache: |
| 106 | + return config_cache[f"float:{key}"] |
| 107 | + |
43 | 108 | value = to_str(key, "") |
44 | 109 | if not value: |
| 110 | + config_cache[f"float:{key}"] = default |
45 | 111 | return default |
46 | 112 |
|
47 | | - return float(value) |
| 113 | + try: |
| 114 | + result = float(value) |
| 115 | + config_cache[f"float:{key}"] = result |
| 116 | + return result |
| 117 | + except ValueError: |
| 118 | + logger.warning(f"Could not convert {key}={value} to float, using default {default}") |
| 119 | + config_cache[f"float:{key}"] = default |
| 120 | + return default |
48 | 121 |
|
49 | 122 |
|
50 | | -def to_int(value: str, default: int) -> int: |
| 123 | +def to_int(key: str, default: int) -> int: |
51 | 124 | """Converts string to int.""" |
52 | | - value = to_str(value, "") |
| 125 | + if f"int:{key}" in config_cache: |
| 126 | + return config_cache[f"int:{key}"] |
| 127 | + |
| 128 | + value = to_str(key, "") |
53 | 129 | if not value: |
| 130 | + config_cache[f"int:{key}"] = default |
| 131 | + return default |
| 132 | + |
| 133 | + try: |
| 134 | + result = int(value) |
| 135 | + config_cache[f"int:{key}"] = result |
| 136 | + return result |
| 137 | + except ValueError: |
| 138 | + logger.warning(f"Could not convert {key}={value} to int, using default {default}") |
| 139 | + config_cache[f"int:{key}"] = default |
54 | 140 | return default |
55 | 141 |
|
56 | | - return int(value) |
57 | | - |
58 | | - |
59 | | -# General Config |
60 | | -CORS_ALLOW_ORIGINS = to_list("CORS_ALLOW_ORIGINS", ["*"]) # CORS Allow Origins |
61 | | -MAX_FILE_SIZE = to_float("MAX_FILE_SIZE", -1) # Max File Size |
62 | | -PDF_MAX_IMAGES = to_int("PDF_MAX_IMAGES", 10) # PDF Max Images |
63 | | -AZURE_SPEECH_KEY = to_str("AZURE_SPEECH_KEY") # Azure Speech Key |
64 | | -AZURE_SPEECH_REGION = to_str("AZURE_SPEECH_REGION") # Azure Speech Region |
65 | | -ENABLE_AZURE_SPEECH = AZURE_SPEECH_KEY and AZURE_SPEECH_REGION # Enable Azure Speech |
66 | | - |
67 | | -# Storage Config |
68 | | -STORAGE_TYPE = to_str("STORAGE_TYPE", "common") # Storage Type |
69 | | -LOCAL_STORAGE_DOMAIN = to_str("LOCAL_STORAGE_DOMAIN", "").rstrip("/") # Local Storage Domain |
70 | | -S3_BUCKET = to_str("S3_BUCKET", "") # S3 Bucket |
71 | | -S3_ACCESS_KEY = to_str("S3_ACCESS_KEY", "") # S3 Access Key |
72 | | -S3_SECRET_KEY = to_str("S3_SECRET_KEY", "") # S3 Secret Key |
73 | | -S3_REGION = to_str("S3_REGION", "") # S3 Region |
74 | | -S3_DOMAIN = to_endpoint("S3_DOMAIN", "") # S3 Domain (Optional) |
75 | | -S3_DIRECT_URL_DOMAIN = to_endpoint("S3_DIRECT_URL_DOMAIN", "") # S3 Direct/Proxy URL Domain (Optional) |
76 | | -S3_SIGN_VERSION = to_none_str("S3_SIGN_VERSION") # S3 Sign Version |
77 | | -S3_API = S3_DOMAIN or f"https://{S3_BUCKET}.s3.{S3_REGION}.amazonaws.com" # S3 API |
78 | | -S3_SPACE = S3_DIRECT_URL_DOMAIN or S3_API # S3 Image URL Domain |
79 | | -TG_ENDPOINT = to_endpoint("TG_ENDPOINT", "") # Telegram Endpoint |
80 | | -TG_PASSWORD = to_str("TG_PASSWORD", "") # Telegram Password |
81 | | -TG_API = TG_ENDPOINT + "/api" + (f"?pass={TG_PASSWORD}" if TG_PASSWORD and len(TG_PASSWORD) > 0 else "") # Telegram API |
82 | | - |
83 | | -# OCR Config |
84 | | -OCR_ENDPOINT = to_endpoint("OCR_ENDPOINT", "") # OCR Endpoint |
85 | | -OCR_SKIP_MODELS = to_list("OCR_SKIP_MODELS", []) # OCR Skip Models |
86 | | -OCR_SPEC_MODELS = to_list("OCR_SPEC_MODELS", []) # OCR Specific Models |
| 142 | +def init_config(): |
| 143 | + """initialize all config items""" |
| 144 | + global CORS_ALLOW_ORIGINS, MAX_FILE_SIZE, PDF_MAX_IMAGES |
| 145 | + global AZURE_SPEECH_KEY, AZURE_SPEECH_REGION, ENABLE_AZURE_SPEECH |
| 146 | + global STORAGE_TYPE, LOCAL_STORAGE_DOMAIN |
| 147 | + global S3_BUCKET, S3_ACCESS_KEY, S3_SECRET_KEY, S3_REGION |
| 148 | + global S3_DOMAIN, S3_DIRECT_URL_DOMAIN, S3_SIGN_VERSION |
| 149 | + global S3_API, S3_SPACE |
| 150 | + global TG_ENDPOINT, TG_PASSWORD, TG_API |
| 151 | + global OCR_ENDPOINT, OCR_SKIP_MODELS, OCR_SPEC_MODELS |
| 152 | + global LOG_LEVEL |
| 153 | + global MARKITDOWN_ENABLE, MARKITDOWN_ENABLE_PLUGINS, MARKITDOWN_USE_DOCINTEL |
| 154 | + global MARKITDOWN_DOCINTEL_ENDPOINT, MARKITDOWN_DOCINTEL_KEY, MARKITDOWN_USE_LLM, MARKITDOWN_LLM_MODEL |
| 155 | + global MARKITDOWN_LLM_ENDPOINT, MARKITDOWN_LLM_API_KEY |
| 156 | + |
| 157 | + # General Config |
| 158 | + CORS_ALLOW_ORIGINS = to_list("CORS_ALLOW_ORIGINS", ["*"]) # CORS Allow Origins |
| 159 | + MAX_FILE_SIZE = to_float("MAX_FILE_SIZE", -1) # Max File Size |
| 160 | + PDF_MAX_IMAGES = to_int("PDF_MAX_IMAGES", 10) # PDF Max Images |
| 161 | + AZURE_SPEECH_KEY = to_str("AZURE_SPEECH_KEY") # Azure Speech Key |
| 162 | + AZURE_SPEECH_REGION = to_str("AZURE_SPEECH_REGION") # Azure Speech Region |
| 163 | + ENABLE_AZURE_SPEECH = AZURE_SPEECH_KEY and AZURE_SPEECH_REGION # Enable Azure Speech |
| 164 | + |
| 165 | + # Storage Config |
| 166 | + STORAGE_TYPE = to_str("STORAGE_TYPE", "common") # Storage Type |
| 167 | + LOCAL_STORAGE_DOMAIN = to_str("LOCAL_STORAGE_DOMAIN", "").rstrip("/") # Local Storage Domain |
| 168 | + S3_BUCKET = to_str("S3_BUCKET", "") # S3 Bucket |
| 169 | + S3_ACCESS_KEY = to_str("S3_ACCESS_KEY", "") # S3 Access Key |
| 170 | + S3_SECRET_KEY = to_str("S3_SECRET_KEY", "") # S3 Secret Key |
| 171 | + S3_REGION = to_str("S3_REGION", "") # S3 Region |
| 172 | + S3_DOMAIN = to_endpoint("S3_DOMAIN", "") # S3 Domain (Optional) |
| 173 | + S3_DIRECT_URL_DOMAIN = to_endpoint("S3_DIRECT_URL_DOMAIN", "") # S3 Direct/Proxy URL Domain (Optional) |
| 174 | + S3_SIGN_VERSION = to_none_str("S3_SIGN_VERSION") # S3 Sign Version |
| 175 | + S3_API = S3_DOMAIN or f"https://{S3_BUCKET}.s3.{S3_REGION}.amazonaws.com" # S3 API |
| 176 | + S3_SPACE = S3_DIRECT_URL_DOMAIN or S3_API # S3 Image URL Domain |
| 177 | + TG_ENDPOINT = to_endpoint("TG_ENDPOINT", "") # Telegram Endpoint |
| 178 | + TG_PASSWORD = to_str("TG_PASSWORD", "") # Telegram Password |
| 179 | + TG_API = TG_ENDPOINT + "/api" + (f"?pass={TG_PASSWORD}" if TG_PASSWORD and len(TG_PASSWORD) > 0 else "") # Telegram API |
| 180 | + |
| 181 | + # OCR Config |
| 182 | + OCR_ENDPOINT = to_endpoint("OCR_ENDPOINT", "") # OCR Endpoint |
| 183 | + OCR_SKIP_MODELS = to_list("OCR_SKIP_MODELS", []) # OCR Skip Models |
| 184 | + OCR_SPEC_MODELS = to_list("OCR_SPEC_MODELS", []) # OCR Specific Models |
| 185 | + |
| 186 | + # MarkItDown Config |
| 187 | + MARKITDOWN_ENABLE = to_bool("MARKITDOWN_ENABLE", False) # Enable MarkItDown |
| 188 | + MARKITDOWN_ENABLE_PLUGINS = to_bool("MARKITDOWN_ENABLE_PLUGINS", False) # Enable MarkItDown Plugins |
| 189 | + MARKITDOWN_USE_DOCINTEL = to_bool("MARKITDOWN_USE_DOCINTEL", False) # Use Document Intelligence |
| 190 | + MARKITDOWN_DOCINTEL_ENDPOINT = to_str("MARKITDOWN_DOCINTEL_ENDPOINT", "") # Document Intelligence Endpoint |
| 191 | + MARKITDOWN_DOCINTEL_KEY = to_str("MARKITDOWN_DOCINTEL_KEY", "") # Document Intelligence API Key |
| 192 | + MARKITDOWN_USE_LLM = to_bool("MARKITDOWN_USE_LLM", False) # Use LLM for image descriptions |
| 193 | + MARKITDOWN_LLM_MODEL = to_str("MARKITDOWN_LLM_MODEL", "gpt-4o") # LLM Model for image descriptions |
| 194 | + MARKITDOWN_LLM_ENDPOINT = to_str("MARKITDOWN_LLM_ENDPOINT", "") # LLM Endpoint |
| 195 | + MARKITDOWN_LLM_API_KEY = to_str("MARKITDOWN_LLM_API_KEY", "") # LLM API Key |
| 196 | + |
| 197 | + LOG_LEVEL = to_str("LOG_LEVEL", "INFO").upper() # log level |
| 198 | + |
| 199 | +init_config() |
0 commit comments