ValueError: [dequantize] The matrix should be given as a uint32
ahmetkca opened this issue · comments
hf repo: mlx-community/whisper-large-v3-mlx-4bit
whisper transcribe
import whisper
whisper_model_hf_repos = [
"mlx-community/whisper-large-v3-mlx-4bit",
]
with tempfile.NamedTemporaryFile(suffix=".webm", dir="./speech_files", delete=True) as temp:
temp.write(current_speech.read())
temp.seek(0)
text = whisper.transcribe(temp.name, verbose=True, path_or_hf_repo=whisper_model_hf_repos[0])
elapsed = time.time() - start
print(f"Transcribed in {elapsed} seconds: {text}")
last commit:
commit 574ad7f6fe0b41ab0f9923d9b737dca865f86cdd (HEAD -> main, origin/main, origin/HEAD)
Author: Awni Hannun <awni@apple.com>
Date: Fri Apr 19 10:46:59 2024 -0700
fix dequantization (#693)
Getting the following error:
Detecting language using up to the first 30 seconds. Use the `language` decoding option to specify the language
ERROR: Exception in ASGI application
Traceback (most recent call last):
File "/Users/ahmetkca/Projects/real-time-voice-streaming/.venv/lib/python3.12/site-packages/uvicorn/protocols/websockets/websockets_impl.py", line 240, in run_asgi
result = await self.app(self.scope, self.asgi_receive, self.asgi_send)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ahmetkca/Projects/real-time-voice-streaming/.venv/lib/python3.12/site-packages/uvicorn/middleware/proxy_headers.py", line 69, in __call__
return await self.app(scope, receive, send)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ahmetkca/Projects/real-time-voice-streaming/.venv/lib/python3.12/site-packages/fastapi/applications.py", line 1054, in __call__
await super().__call__(scope, receive, send)
File "/Users/ahmetkca/Projects/real-time-voice-streaming/.venv/lib/python3.12/site-packages/starlette/applications.py", line 123, in __call__
await self.middleware_stack(scope, receive, send)
File "/Users/ahmetkca/Projects/real-time-voice-streaming/.venv/lib/python3.12/site-packages/starlette/middleware/errors.py", line 151, in __call__
await self.app(scope, receive, send)
File "/Users/ahmetkca/Projects/real-time-voice-streaming/.venv/lib/python3.12/site-packages/starlette/middleware/exceptions.py", line 65, in __call__
await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
File "/Users/ahmetkca/Projects/real-time-voice-streaming/.venv/lib/python3.12/site-packages/starlette/_exception_handler.py", line 64, in wrapped_app
raise exc
File "/Users/ahmetkca/Projects/real-time-voice-streaming/.venv/lib/python3.12/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
await app(scope, receive, sender)
File "/Users/ahmetkca/Projects/real-time-voice-streaming/.venv/lib/python3.12/site-packages/starlette/routing.py", line 756, in __call__
await self.middleware_stack(scope, receive, send)
File "/Users/ahmetkca/Projects/real-time-voice-streaming/.venv/lib/python3.12/site-packages/starlette/routing.py", line 776, in app
await route.handle(scope, receive, send)
File "/Users/ahmetkca/Projects/real-time-voice-streaming/.venv/lib/python3.12/site-packages/starlette/routing.py", line 373, in handle
await self.app(scope, receive, send)
File "/Users/ahmetkca/Projects/real-time-voice-streaming/.venv/lib/python3.12/site-packages/starlette/routing.py", line 96, in app
await wrap_app_handling_exceptions(app, session)(scope, receive, send)
File "/Users/ahmetkca/Projects/real-time-voice-streaming/.venv/lib/python3.12/site-packages/starlette/_exception_handler.py", line 64, in wrapped_app
raise exc
File "/Users/ahmetkca/Projects/real-time-voice-streaming/.venv/lib/python3.12/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
await app(scope, receive, sender)
File "/Users/ahmetkca/Projects/real-time-voice-streaming/.venv/lib/python3.12/site-packages/starlette/routing.py", line 94, in app
await func(session)
File "/Users/ahmetkca/Projects/real-time-voice-streaming/.venv/lib/python3.12/site-packages/fastapi/routing.py", line 348, in app
await dependant.call(**values)
File "/Users/ahmetkca/Projects/real-time-voice-streaming/main.py", line 74, in websocket_endpoint
text = whisper.transcribe(temp.name, verbose=True, path_or_hf_repo=whisper_model_hf_repos[0])
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ahmetkca/Projects/real-time-voice-streaming/whisper/transcribe.py", line 173, in transcribe
_, probs = model.detect_language(mel_segment)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ahmetkca/Projects/real-time-voice-streaming/whisper/decoding.py", line 58, in detect_language
logits = model.logits(x, mel)[:, 0]
^^^^^^^^^^^^^^^^^^^^
File "/Users/ahmetkca/Projects/real-time-voice-streaming/whisper/whisper.py", line 249, in logits
return self.decoder(tokens, audio_features)[0]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ahmetkca/Projects/real-time-voice-streaming/whisper/whisper.py", line 186, in __call__
self.token_embedding(x)
File "/Users/ahmetkca/Projects/real-time-voice-streaming/.venv/lib/python3.12/site-packages/mlx/nn/layers/quantized.py", line 98, in __call__
out = mx.dequantize(
^^^^^^^^^^^^^^
ValueError: [dequantize] The matrix should be given as a uint32
Fix in #698