argmaxinc / WhisperKit

How do I use a parameter like initial_prompt in Python's Whisper?

Best way to do this is to use the promptTokens parameter in DecodingOptions. This will add whatever tokens you pass into the <|startofprev|> section of the prompt that is passed into the decoder and can help with spelling and punctuation style, but keep in mind this is not like a LLM prompt, it should purely be used as an example of the style and spelling of output you're looking for. Check out these test cases for how you might be able to implement it in your code:

WhisperKit/Tests/WhisperKitTests/UnitTests.swift

Lines 730 to 759 in 3bab206

    
           func testPromptTokens() async throws { 
        
               let whisperKit = try await WhisperKit(modelFolder: tinyModelPath(), verbose: true, logLevel: .debug) 
        
               let promptText = " prompt to encourage output without any punctuation and without capitalizing americans as if it was already normalized" 
        
               let tokenizer = try XCTUnwrap(whisperKit.tokenizer) 
        
               let promptTokens = tokenizer.encode(text: promptText).filter { $0 < tokenizer.specialTokens.specialTokenBegin } 
        
               let options = DecodingOptions(skipSpecialTokens: true, promptTokens: promptTokens) 
        
               let result = try await XCTUnwrapAsync( 
        
                   await transcribe(with: .tiny, options: options), 
        
                   "Failed to transcribe" 
        
               ) 
        
               XCTAssertEqual(result.segments.first?.text, " and so my fellow americans ask not what your country can do for you ask what you can do for your country.") 
        
           } 
        
           func testPrefixTokens() async throws { 
        
               let whisperKit = try await WhisperKit(modelFolder: tinyModelPath(), verbose: true, logLevel: .debug) 
        
               // Prefix to encourage output without any punctuation and without capitalizing americans as if it was already normalized 
        
               let prefixText = " and so my fellow americans" 
        
               let tokenizer = try XCTUnwrap(whisperKit.tokenizer) 
        
               let prefixTokens = tokenizer.encode(text: prefixText).filter { $0 < tokenizer.specialTokens.specialTokenBegin } 
        
               let options = DecodingOptions(skipSpecialTokens: true, prefixTokens: prefixTokens) 
        
               let result = try await XCTUnwrapAsync( 
        
                   await transcribe(with: .tiny, options: options), 
        
                   "Failed to transcribe" 
        
               ) 
        
               XCTAssertEqual(result.segments.first?.text, " and so my fellow americans ask not what your country can do for you ask what you can do for your country.") 
        
           }

	func testPromptTokens() async throws {
	let whisperKit = try await WhisperKit(modelFolder: tinyModelPath(), verbose: true, logLevel: .debug)
	let promptText = " prompt to encourage output without any punctuation and without capitalizing americans as if it was already normalized"
	let tokenizer = try XCTUnwrap(whisperKit.tokenizer)
	let promptTokens = tokenizer.encode(text: promptText).filter { $0 < tokenizer.specialTokens.specialTokenBegin }
	let options = DecodingOptions(skipSpecialTokens: true, promptTokens: promptTokens)

	let result = try await XCTUnwrapAsync(
	await transcribe(with: .tiny, options: options),
	"Failed to transcribe"
	)

	XCTAssertEqual(result.segments.first?.text, " and so my fellow americans ask not what your country can do for you ask what you can do for your country.")
	}

	func testPrefixTokens() async throws {
	let whisperKit = try await WhisperKit(modelFolder: tinyModelPath(), verbose: true, logLevel: .debug)
	// Prefix to encourage output without any punctuation and without capitalizing americans as if it was already normalized
	let prefixText = " and so my fellow americans"
	let tokenizer = try XCTUnwrap(whisperKit.tokenizer)
	let prefixTokens = tokenizer.encode(text: prefixText).filter { $0 < tokenizer.specialTokens.specialTokenBegin }
	let options = DecodingOptions(skipSpecialTokens: true, prefixTokens: prefixTokens)

	let result = try await XCTUnwrapAsync(
	await transcribe(with: .tiny, options: options),
	"Failed to transcribe"
	)

	XCTAssertEqual(result.segments.first?.text, " and so my fellow americans ask not what your country can do for you ask what you can do for your country.")
	}