Skip to content

Commit 2258be9

Browse files
committed
feat: add vulkan support
1 parent c610a88 commit 2258be9

File tree

5 files changed

+13
-27
lines changed

5 files changed

+13
-27
lines changed

README.md

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,16 @@ python -m build --wheel # in this repository to build the wheel. Assumes you hav
113113
pip install dist/<generated>.whl
114114
```
115115

116+
### Vulkan support
117+
118+
Thanks to [@thewh1teagle](https://github.com/thewh1teagle), using Vulkan is now supported:
119+
120+
```shell
121+
export CMAKE_ARGS="-DGGML_VULKAN=ON"
122+
python -m build --wheel # in this repository to build the wheel. Assumes you have installed build with pip install build
123+
pip install dist/<generated>.whl
124+
```
125+
116126
Then download and convert the appropriate model using the original `whisper.cpp` repository, producing a `<model>.mlmodelc` directory.
117127

118128
You can now verify if everything's working:
@@ -180,7 +190,7 @@ usage: pwcpp [-h] [-m MODEL] [--version] [--processors PROCESSORS] [-otxt] [-ovt
180190
[--translate TRANSLATE] [--no_context NO_CONTEXT] [--single_segment SINGLE_SEGMENT] [--print_special PRINT_SPECIAL]
181191
[--print_progress PRINT_PROGRESS] [--print_realtime PRINT_REALTIME] [--print_timestamps PRINT_TIMESTAMPS]
182192
[--token_timestamps TOKEN_TIMESTAMPS] [--thold_pt THOLD_PT] [--thold_ptsum THOLD_PTSUM] [--max_len MAX_LEN]
183-
[--split_on_word SPLIT_ON_WORD] [--max_tokens MAX_TOKENS] [--speed_up SPEED_UP] [--audio_ctx AUDIO_CTX]
193+
[--split_on_word SPLIT_ON_WORD] [--max_tokens MAX_TOKENS] [--audio_ctx AUDIO_CTX]
184194
[--prompt_tokens PROMPT_TOKENS] [--prompt_n_tokens PROMPT_N_TOKENS] [--language LANGUAGE] [--suppress_blank SUPPRESS_BLANK]
185195
[--suppress_non_speech_tokens SUPPRESS_NON_SPEECH_TOKENS] [--temperature TEMPERATURE] [--max_initial_ts MAX_INITIAL_TS]
186196
[--length_penalty LENGTH_PENALTY] [--temperature_inc TEMPERATURE_INC] [--entropy_thold ENTROPY_THOLD]
@@ -234,7 +244,6 @@ options:
234244
split on word rather than on token (when used with max_len)
235245
--max_tokens MAX_TOKENS
236246
max tokens per segment (0 = no limit)
237-
--speed_up SPEED_UP speed-up the audio by 2x using Phase Vocoder
238247
--audio_ctx AUDIO_CTX
239248
overwrite the audio context size (0 = use default)
240249
--prompt_tokens PROMPT_TOKENS

pywhispercpp/constants.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -154,14 +154,6 @@
154154
'options': None,
155155
'default': 0
156156
},
157-
# [EXPERIMENTAL] speed-up techniques
158-
# note: these can significantly reduce the quality of the output
159-
'speed_up': {
160-
'type': bool,
161-
'description': "speed-up the audio by 2x using Phase Vocoder",
162-
'options': None,
163-
'default': False
164-
},
165157
'audio_ctx': {
166158
'type': int,
167159
'description': "overwrite the audio context size (0 = use default)",

pywhispercpp/model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ class Model:
5252
Example usage.
5353
```python
5454
model = Model('base.en', n_threads=6)
55-
segments = model.transcribe('file.mp3', speed_up=True)
55+
segments = model.transcribe('file.mp3')
5656
for segment in segments:
5757
print(segment.text)
5858
```

src/main.cpp

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -90,17 +90,6 @@ int whisper_pcm_to_mel_wrapper(
9090
return whisper_pcm_to_mel(ctx->ptr, samples_ptr, n_samples, n_threads);
9191
};
9292

93-
int whisper_pcm_to_mel_phase_vocoder_wrapper(
94-
struct whisper_context_wrapper* ctx,
95-
py::array_t<float> samples,
96-
int n_samples,
97-
int n_threads){
98-
py::buffer_info buf = samples.request();
99-
float *samples_ptr = static_cast<float *>(buf.ptr);
100-
return whisper_pcm_to_mel_phase_vocoder(ctx->ptr, samples_ptr, n_samples, n_threads);
101-
102-
};
103-
10493
int whisper_set_mel_wrapper(
10594
struct whisper_context_wrapper * ctx,
10695
py::array_t<float> data,
@@ -388,9 +377,6 @@ PYBIND11_MODULE(_pywhispercpp, m) {
388377
m.def("whisper_pcm_to_mel", &whisper_pcm_to_mel_wrapper, "Convert RAW PCM audio to log mel spectrogram.\n"
389378
"The resulting spectrogram is stored inside the provided whisper context.\n"
390379
"Returns 0 on success");
391-
m.def("whisper_pcm_to_mel_phase_vocoder", &whisper_pcm_to_mel_phase_vocoder_wrapper, "Convert RAW PCM audio to log mel spectrogram but applies a Phase Vocoder to speed up the audio x2. \n"
392-
"The resulting spectrogram is stored inside the provided whisper context.\n"
393-
"Returns 0 on success");
394380

395381
m.def("whisper_set_mel", &whisper_set_mel_wrapper, " This can be used to set a custom log mel spectrogram inside the provided whisper context.\n"
396382
"Use this instead of whisper_pcm_to_mel() if you want to provide your own log mel spectrogram.\n"
@@ -490,7 +476,6 @@ PYBIND11_MODULE(_pywhispercpp, m) {
490476
.def_readwrite("max_len", &whisper_full_params::max_len)
491477
.def_readwrite("split_on_word", &whisper_full_params::split_on_word)
492478
.def_readwrite("max_tokens", &whisper_full_params::max_tokens)
493-
.def_readwrite("speed_up", &whisper_full_params::speed_up)
494479
.def_readwrite("audio_ctx", &whisper_full_params::audio_ctx)
495480
.def_readwrite("initial_prompt", &whisper_full_params::initial_prompt)
496481
.def_readwrite("prompt_tokens", &whisper_full_params::prompt_tokens)

whisper.cpp

0 commit comments

Comments
 (0)