Skip to content

Commit 92f03b6

Browse files
committed
Merge branch 'master' into explore-page-fix
2 parents 0f3263d + ae608f7 commit 92f03b6

File tree

16 files changed

+203
-148
lines changed

16 files changed

+203
-148
lines changed

bots/admin.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -445,11 +445,12 @@ class SavedRunAdmin(admin.ModelAdmin):
445445

446446
readonly_fields = [
447447
"open_in_gooey",
448-
"parent",
448+
"view_parent_published_run",
449449
"view_bots",
450450
"price",
451451
"view_usage_cost",
452452
"transaction",
453+
"parent",
453454
"created_at",
454455
"updated_at",
455456
"run_time",
@@ -485,7 +486,10 @@ def view_user(self, saved_run: SavedRun):
485486
view_user.short_description = "View User"
486487

487488
def view_bots(self, saved_run: SavedRun):
488-
return list_related_html_url(saved_run.botintegrations)
489+
pr = saved_run.parent_published_run()
490+
if not pr:
491+
raise SavedRun.DoesNotExist
492+
return list_related_html_url(pr.botintegrations)
489493

490494
view_bots.short_description = "View Bots"
491495

bots/models/saved_run.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ class SavedRun(models.Model):
5656
null=True,
5757
blank=True,
5858
related_name="children",
59+
verbose_name="Parent Run",
5960
)
6061
parent_version = models.ForeignKey(
6162
"bots.PublishedRunVersion",

daras_ai_v2/asr.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,17 @@
121121
"srp", "swe", "swh", "tam", "tel", "tgk", "tgl", "tha", "tur", "ukr", "urd", "uzn", "vie", "yor", "yue", "zul",
122122
} # fmt: skip
123123

124+
# Eleven Labs Scribe v1 - supports 99 languages with 3-letter ISO codes
125+
ELEVENLABS_SUPPORTED = {
126+
"afr", "amh", "ara", "hye", "asm", "ast", "aze", "bel", "ben", "bos", "bul", "mya", "yue", "cat", "ceb", "nya",
127+
"hrv", "ces", "dan", "nld", "eng", "est", "fil", "fin", "fra", "ful", "glg", "lug", "kat", "deu", "ell", "guj",
128+
"hau", "heb", "hin", "hun", "isl", "ibo", "ind", "gle", "ita", "jpn", "jav", "kea", "kan", "kaz", "khm", "kor",
129+
"kur", "kir", "lao", "lav", "lin", "lit", "luo", "ltz", "mkd", "msa", "mal", "mlt", "zho", "mri", "mar", "mon",
130+
"nep", "nso", "nor", "oci", "ori", "pus", "fas", "pol", "por", "pan", "ron", "rus", "srp", "sna", "snd", "slk",
131+
"slv", "som", "spa", "swa", "swe", "tam", "tgk", "tel", "tha", "tur", "ukr", "umb", "urd", "uzb", "vie", "cym",
132+
"wol", "xho", "zul",
133+
} # fmt: skip
134+
124135
AZURE_SUPPORTED = {
125136
"af-ZA", "am-ET", "ar-AE", "ar-BH", "ar-DZ", "ar-EG", "ar-IL", "ar-IQ", "ar-JO", "ar-KW", "ar-LB", "ar-LY", "ar-MA",
126137
"ar-OM", "ar-PS", "ar-QA", "ar-SA", "ar-SY", "ar-TN", "ar-YE", "az-AZ", "bg-BG", "bn-IN", "bs-BA", "ca-ES", "cs-CZ",
@@ -260,6 +271,7 @@ class AsrModels(Enum):
260271
usm = "Chirp / USM (Google V2)"
261272
deepgram = "Deepgram"
262273
azure = "Azure Speech"
274+
elevenlabs = "ElevenLabs Scribe v1"
263275
seamless_m4t_v2 = "Seamless M4T v2 (Facebook Research)"
264276
mms_1b_all = "Massively Multilingual Speech (MMS) (Facebook Research)"
265277

@@ -329,6 +341,7 @@ def supports_input_prompt(self) -> bool:
329341
AsrModels.seamless_m4t_v2: "facebook/seamless-m4t-v2-large",
330342
AsrModels.mms_1b_all: "facebook/mms-1b-all",
331343
AsrModels.lelapa: "lelapa-vulavula",
344+
AsrModels.elevenlabs: "elevenlabs-scribe-v1",
332345
}
333346

334347
forced_asr_languages = {
@@ -354,6 +367,7 @@ def supports_input_prompt(self) -> bool:
354367
AsrModels.gcp_v1: GCP_V1_SUPPORTED,
355368
AsrModels.usm: CHIRP_SUPPORTED,
356369
AsrModels.deepgram: DEEPGRAM_SUPPORTED,
370+
AsrModels.elevenlabs: ELEVENLABS_SUPPORTED,
357371
AsrModels.seamless_m4t_v2: SEAMLESS_v2_ASR_SUPPORTED,
358372
AsrModels.azure: AZURE_SUPPORTED,
359373
AsrModels.mms_1b_all: MMS_SUPPORTED,
@@ -971,6 +985,33 @@ def get_google_auth_session(*scopes: str) -> tuple[AuthorizedSession, str]:
971985
return AuthorizedSession(credentials=creds), project
972986

973987

988+
def elevenlabs_asr(audio_url: str, language: str = None) -> dict:
989+
"""
990+
Call ElevenLabs Speech-to-Text API
991+
"""
992+
audio_r = requests.get(audio_url)
993+
raise_for_status(audio_r, is_user_url=True)
994+
995+
# Set up the files and form data for the multipart request
996+
files = {"file": audio_r.content}
997+
data = {"model_id": "scribe_v1"}
998+
headers = {"xi-api-key": settings.ELEVEN_LABS_API_KEY}
999+
1000+
# Language parameter is sent in the form data
1001+
if language:
1002+
data["language_code"] = language
1003+
1004+
response = requests.post(
1005+
"https://api.elevenlabs.io/v1/speech-to-text",
1006+
files=files,
1007+
headers=headers,
1008+
data=data,
1009+
)
1010+
raise_for_status(response)
1011+
1012+
return response.json()
1013+
1014+
9741015
def run_asr(
9751016
audio_url: str,
9761017
selected_model: str,
@@ -1017,6 +1058,21 @@ def run_asr(
10171058

10181059
if selected_model == AsrModels.azure:
10191060
return azure_asr(audio_url, language)
1061+
elif selected_model == AsrModels.elevenlabs:
1062+
result = elevenlabs_asr(audio_url, language)
1063+
chunks = []
1064+
for word_data in result.get("words", []):
1065+
if word_data.get("type") == "word":
1066+
speaker = word_data.get("speaker_id", 0)
1067+
else:
1068+
speaker = None
1069+
chunk = {
1070+
"timestamp": (word_data["start"], word_data["end"]),
1071+
"text": word_data["text"],
1072+
"speaker": speaker,
1073+
}
1074+
chunks.append(chunk)
1075+
data = {"text": result["text"], "chunks": chunks}
10201076
elif selected_model == AsrModels.whisper_large_v3:
10211077
import replicate
10221078

daras_ai_v2/language_model.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,8 @@ class LargeLanguageModels(Enum):
122122
price=1,
123123
is_vision_model=True,
124124
supports_json=True,
125+
is_deprecated=True,
126+
redirect_to="gpt_4_o",
125127
)
126128

127129
# https://platform.openai.com/docs/models/o4-mini

daras_ai_v2/stable_diffusion.py

Lines changed: 6 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ def _deprecated(cls):
117117
cls.openjourney,
118118
cls.analog_diffusion,
119119
cls.protogen_5_3,
120+
cls.dall_e,
120121
}
121122

122123

@@ -444,18 +445,6 @@ def _get_gpt_image_1_img_size(width: int, height: int) -> tuple[int, int]:
444445
return 1024, 1536
445446

446447

447-
def prepare_init_image(
448-
init_image_bytes: bytes, width: int, height: int
449-
) -> tuple[bytes, bytes]:
450-
image = resize_img_pad(init_image_bytes, (width, height))
451-
image = rgb_img_to_rgba(image)
452-
mask = io.BytesIO()
453-
Image.new("RGBA", (width, height), (0, 0, 0, 0)).save(mask, format="PNG")
454-
mask = mask.getvalue()
455-
456-
return image, mask
457-
458-
459448
def img2img(
460449
*,
461450
selected_model: str,
@@ -468,12 +457,13 @@ def img2img(
468457
negative_prompt: str = None,
469458
guidance_scale: float,
470459
seed: int = 42,
460+
gpt_image_1_quality: typing.Literal["low", "medium", "high"] | None = None,
471461
):
472462
prompt_strength = prompt_strength or 0.7
473463
assert 0 <= prompt_strength <= 0.9, "Prompt Strength must be in range [0, 0.9]"
474464

475465
match selected_model:
476-
case Img2ImgModels.dall_e.name | Img2ImgModels.gpt_image_1.name:
466+
case Img2ImgModels.gpt_image_1.name:
477467
from openai import NOT_GIVEN, OpenAI
478468

479469
init_height, init_width, _ = bytes_to_cv2_img(init_image_bytes).shape
@@ -487,20 +477,19 @@ def img2img(
487477
width, height = _get_gpt_image_1_img_size(init_width, init_height)
488478
response_format = NOT_GIVEN
489479

490-
image, mask = prepare_init_image(
491-
init_image_bytes, width=width, height=height
492-
)
480+
image = resize_img_pad(init_image_bytes, (width, height))
481+
image = rgb_img_to_rgba(image)
493482

494483
client = OpenAI()
495484
with capture_openai_content_policy_violation():
496485
response = client.images.edit(
497486
model=img2img_model_ids[Img2ImgModels[selected_model]],
498487
prompt=prompt,
499488
image=("image.png", image),
500-
mask=("mask.png", mask),
501489
n=num_outputs,
502490
size=f"{width}x{height}",
503491
response_format=response_format,
492+
quality=gpt_image_1_quality,
504493
)
505494

506495
# Record usage costs if usage data is available

poetry.lock

Lines changed: 4 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ anthropic = "^0.34.1"
8282
azure-cognitiveservices-speech = "^1.37.0"
8383
twilio = "^9.2.3"
8484
sentry-sdk = {version = "1.45.0", extras = ["loguru"]}
85-
gooey-gui = "0.5.5"
85+
gooey-gui = "0.6.0"
8686
django-safedelete = "^1.4.0"
8787
numexpr = "^2.10.1"
8888
django-csp = "^3.8"

recipes/BulkEval.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -102,13 +102,7 @@ def _render_results(results: list[AggFunctionResult]):
102102
+ [
103103
[
104104
columns[i],
105-
dict(
106-
kind="number",
107-
readonly=True,
108-
displayData=str(values[i]),
109-
data=values[i],
110-
themeOverride=dict(bgCell=colors[i]),
111-
),
105+
dict(value=values[i], style=dict(backgroundColor=colors[i])),
112106
g[i].get("count", 1),
113107
]
114108
for i in range(len(g))

0 commit comments

Comments
 (0)