suggestion: upadte to support gpt-3.5-turbo model

Question

suggestion: upadte to support gpt-3.5-turbo model

rgaisandbox opened this issue a year ago · comments

The newer gpt-3.5-turbo model is cheaper to run and should be more powerful however it has a different api response.
Suggest that you update the API to support this. I tried modifying myself but ran into some issues. If I do fix this I will submit a pull request

Meng · Answer 1 · Sun Apr 30 2023 00:11:58 GMT+0800 (China Standard Time)

also run into some issues. thought it should work but not.

import { createParser } from 'eventsource-parser';
const key = import.meta.env.VITE_OPENAI_API_KEY;

interface OpenAIStreamPayload {
model: string;
messages: Array<{ role: string; content: string }>;
temperature: number;
top_p: number;
frequency_penalty: number;
presence_penalty: number;
max_tokens: number;
stream: boolean;
n: number;
}

async function OpenAIStream(payload: OpenAIStreamPayload) {
const encoder = new TextEncoder();
const decoder = new TextDecoder();

let counter = 0;

const res = await fetch('https://api.openai.com/v1/chat/completions', {
	headers: {
		'Content-Type': 'application/json',
		Authorization: `Bearer ${key}`
	},
	method: 'POST',
	body: JSON.stringify(payload)
});

const stream = new ReadableStream({
	async start(controller) {
		function onParse(event: any) {
			if (event.type === 'event') {
			  const data = event.data;
			  // https://beta.openai.com/docs/api-reference/completions/create#completions/create-stream
			  if (data === '[DONE]') {
				controller.close();
				return;
			  }
			  try {
				const json = JSON.parse(data);
				const text = json.choices[0].delta.content;
		  
				if (counter < 2 && (text.match(/\n/) || []).length) {
				  // this is a prefix character (i.e., "\n\n"), do nothing
				  return;
				}
				const queue = encoder.encode(text);
				controller.enqueue(queue);
				counter++;
			  } catch (e) {
				controller.error(e);
			  }
			}
		  }

		// stream response (SSE) from OpenAI may be fragmented into multiple chunks
		// this ensures we properly read chunks and invoke an event for each SSE event stream
		const parser = createParser(onParse);
		// https://web.dev/streams/#asynchronous-iteration
		for await (const chunk of res.body as any) {
			parser.feed(decoder.decode(chunk));
		}
	}
});
return stream;

}

export async function POST({ request }: { request: any }) {
const { searched } = await request.json();
const payload = {
model: 'gpt-3.5-turbo',
messages: [{ role: 'user', content: searched }],
temperature: 0.7,
max_tokens: 2048,
top_p: 1.0,
frequency_penalty: 0.0,
stream: true,
presence_penalty: 0.0,
n: 1,
};
const stream = await OpenAIStream(payload);
return new Response(stream);
}