import{useWhisper}from'@octoml/use-whisper'constApp=()=>{/** * you have more control like this * do whatever you want with the recorded speech * send it to your own custom server * and return the response back to useWhisper */constonTranscribe=(blob: Blob)=>{constbase64=awaitnewPromise<string|ArrayBuffer|null>((resolve)=>{constreader=newFileReader()reader.onloadend=()=>resolve(reader.result)reader.readAsDataURL(blob)})constbody=JSON.stringify({file: base64,model: 'whisper-1'})constheaders={'Content-Type': 'application/json'}const{default: axios}=awaitimport('axios')constresponse=awaitaxios.post('/api/whisper',body,{
headers,})const{ text }=awaitresponse.data// you must return result from your server in Transcript formatreturn{
blob,
text,}}const{ transcript }=useWhisper({// callback to handle transcription with custom server
onTranscribe,})return(<div><p>{transcript.text}</p></div>)}
Remove silence before sending to Whisper to save cost
import{useWhisper}from'@octoml/use-whisper'constApp=()=>{const{ transcript }=useWhisper({apiKey: process.env.WHISPER_API_TOKEN,apiUrl: process.env.WHISPER_API_URL// use ffmpeg-wasp to remove silence from recorded speechremoveSilence: true,})return(<div><p>{transcript.text}</p></div>)}
Auto start recording on component mounted
import{useWhisper}from'@octoml/use-whisper'constApp=()=>{const{ transcript }=useWhisper({apiKey: process.env.WHISPER_API_TOKEN,apiUrl: process.env.WHISPER_API_URL// will auto start recording speech upon component mountedautoStart: true,})return(<div><p>{transcript.text}</p></div>)}
Keep recording as long as the user is speaking
import{useWhisper}from'@octoml/use-whisper'constApp=()=>{const{ transcript }=useWhisper({apiKey: process.env.WHISPER_API_TOKEN,apiUrl: process.env.WHISPER_API_URLnonStop: true,// keep recording as long as the user is speakingstopTimeout: 5000,// auto stop after 5 seconds})return(<div><p>{transcript.text}</p></div>)}
Customize Whisper API config when autoTranscribe is true
import{useWhisper}from'@octoml/use-whisper'constApp=()=>{const{ transcript }=useWhisper({apiKey: process.env.WHISPER_API_TOKEN,apiUrl: process.env.WHISPER_API_URLautoTranscribe: true,whisperConfig: {prompt: 'previous conversation',// you can pass previous conversation for contextresponse_format: 'text',// output text instead of jsontemperature: 0.8,// random outputlanguage: 'es',// Spanish},})return(<div><p>{transcript.text}</p></div>)}
callback function to handle transcription on your own custom server
WhisperApiConfig
Name
Type
Default Value
Description
prompt
string
undefined
An optional text to guide the model's style or continue a previous audio segment. The prompt should match the audio language.
response_format
string
json
The format of the transcript output, in one of these options: json, text, srt, verbose_json, or vtt.
temperature
number
0
The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.
language
string
en
The language of the input audio. Supplying the input language in ISO-639-1 format will improve accuracy and latency.
Return Object
Name
Type
Description
recording
boolean
speech recording state
speaking
boolean
detect when user is speaking
transcribing
boolean
while removing silence from speech and send request to OpenAI Whisper API