Token based and model conditional limits (#36)
* use tiktoken for api limit * model conditional char limits on frontend * adjust for completion tokens --------- Co-authored-by: Alan Pogrebinschi <alanpog@gmail.com>
This commit is contained in:
+16
-5
@@ -1,5 +1,9 @@
|
||||
import { Message, OpenAIModel } from "@/types";
|
||||
import { Message, OpenAIModel, OpenAIModelID } from "@/types";
|
||||
import { OpenAIStream } from "@/utils/server";
|
||||
import tiktokenModel from "@dqbd/tiktoken/encoders/cl100k_base.json";
|
||||
import { init, Tiktoken } from "@dqbd/tiktoken/lite/init";
|
||||
// @ts-expect-error
|
||||
import wasm from "../../node_modules/@dqbd/tiktoken/lite/tiktoken_bg.wasm?module";
|
||||
|
||||
export const config = {
|
||||
runtime: "edge"
|
||||
@@ -13,19 +17,26 @@ const handler = async (req: Request): Promise<Response> => {
|
||||
key: string;
|
||||
};
|
||||
|
||||
const charLimit = 12000;
|
||||
let charCount = 0;
|
||||
await init((imports) => WebAssembly.instantiate(wasm, imports));
|
||||
const encoding = new Tiktoken(tiktokenModel.bpe_ranks, tiktokenModel.special_tokens, tiktokenModel.pat_str);
|
||||
|
||||
const tokenLimit = model.id === OpenAIModelID.GPT_4 ? 6000 : 3000;
|
||||
let tokenCount = 0;
|
||||
let messagesToSend: Message[] = [];
|
||||
|
||||
for (let i = messages.length - 1; i >= 0; i--) {
|
||||
const message = messages[i];
|
||||
if (charCount + message.content.length > charLimit) {
|
||||
const tokens = encoding.encode(message.content);
|
||||
|
||||
if (tokenCount + tokens.length > tokenLimit) {
|
||||
break;
|
||||
}
|
||||
charCount += message.content.length;
|
||||
tokenCount += tokens.length;
|
||||
messagesToSend = [message, ...messagesToSend];
|
||||
}
|
||||
|
||||
encoding.free();
|
||||
|
||||
const stream = await OpenAIStream(model, key, messagesToSend);
|
||||
|
||||
return new Response(stream);
|
||||
|
||||
Reference in New Issue
Block a user