Token based and model conditional limits (#36)

* use tiktoken for api limit

* model conditional char limits on frontend

* adjust for completion tokens

---------

Co-authored-by: Alan Pogrebinschi <alanpog@gmail.com>
This commit is contained in:
Mckay Wrigley
2023-03-20 22:02:24 -06:00
committed by GitHub
parent 4c425eb441
commit 537957d5f5
6 changed files with 79 additions and 20 deletions
+16 -5
View File
@@ -1,5 +1,9 @@
import { Message, OpenAIModel } from "@/types";
import { Message, OpenAIModel, OpenAIModelID } from "@/types";
import { OpenAIStream } from "@/utils/server";
import tiktokenModel from "@dqbd/tiktoken/encoders/cl100k_base.json";
import { init, Tiktoken } from "@dqbd/tiktoken/lite/init";
// @ts-expect-error
import wasm from "../../node_modules/@dqbd/tiktoken/lite/tiktoken_bg.wasm?module";
export const config = {
runtime: "edge"
@@ -13,19 +17,26 @@ const handler = async (req: Request): Promise<Response> => {
key: string;
};
const charLimit = 12000;
let charCount = 0;
await init((imports) => WebAssembly.instantiate(wasm, imports));
const encoding = new Tiktoken(tiktokenModel.bpe_ranks, tiktokenModel.special_tokens, tiktokenModel.pat_str);
const tokenLimit = model.id === OpenAIModelID.GPT_4 ? 6000 : 3000;
let tokenCount = 0;
let messagesToSend: Message[] = [];
for (let i = messages.length - 1; i >= 0; i--) {
const message = messages[i];
if (charCount + message.content.length > charLimit) {
const tokens = encoding.encode(message.content);
if (tokenCount + tokens.length > tokenLimit) {
break;
}
charCount += message.content.length;
tokenCount += tokens.length;
messagesToSend = [message, ...messagesToSend];
}
encoding.free();
const stream = await OpenAIStream(model, key, messagesToSend);
return new Response(stream);