Spaces:

huggingface
/

inference-playground

Running on CPU Upgrade

App Files Files Community

Thomas G. Lopes commited on Mar 5

Commit

e8b5344

1 Parent(s): 25c63d0

wip session as store

Browse files

Files changed (11) hide show

src/lib/components/InferencePlayground/InferencePlayground.svelte +43 -71
src/lib/components/InferencePlayground/InferencePlaygroundCodeSnippets.svelte +4 -4
src/lib/components/InferencePlayground/InferencePlaygroundConversationHeader.svelte +1 -1
src/lib/components/InferencePlayground/InferencePlaygroundGenerationConfig.svelte +1 -59
src/lib/components/InferencePlayground/InferencePlaygroundModelSelector.svelte +12 -7
src/lib/components/InferencePlayground/inferencePlaygroundUtils.ts +58 -1
src/lib/stores/models.ts +5 -2
src/lib/stores/session.ts +60 -0
src/lib/utils/store.ts +9 -0
src/routes/{+page.server.ts → +layout.server.ts} +2 -3
tsconfig.json +2 -1

src/lib/components/InferencePlayground/InferencePlayground.svelte CHANGED Viewed

@@ -1,10 +1,8 @@
 <script lang="ts">
-	import type { Conversation, ConversationMessage, ModelEntryWithTokenizer, Session } from "./types";
 	import { page } from "$app/stores";
-	import { defaultGenerationConfig } from "./generationConfigSettings";
 	import {
-		FEATURED_MODELS_IDS,
 		handleNonStreamingResponse,
 		handleStreamingResponse,
 		isSystemPromptSupported,
@@ -12,6 +10,7 @@
 	import { goto } from "$app/navigation";
 	import { models } from "$lib/stores/models";
 	import { token } from "$lib/stores/token";
 	import { isMac } from "$lib/utils/platform";
 	import { HfInference } from "@huggingface/inference";
@@ -23,44 +22,12 @@
 	import IconThrashcan from "../Icons/IconThrashcan.svelte";
 	import PlaygroundConversation from "./InferencePlaygroundConversation.svelte";
 	import PlaygroundConversationHeader from "./InferencePlaygroundConversationHeader.svelte";
-	import GenerationConfig, { defaultSystemMessage } from "./InferencePlaygroundGenerationConfig.svelte";
 	import HFTokenModal from "./InferencePlaygroundHFTokenModal.svelte";
 	import ModelSelector from "./InferencePlaygroundModelSelector.svelte";
 	import ModelSelectorModal from "./InferencePlaygroundModelSelectorModal.svelte";
 	const startMessageUser: ConversationMessage = { role: "user", content: "" };
-	const modelIdsFromQueryParam = $page.url.searchParams.get("modelId")?.split(",");
-	const modelsFromQueryParam = modelIdsFromQueryParam?.map(id => $models.find(model => model.id === id));
-	const systemMessage: ConversationMessage = {
-		role: "system",
-		content: modelIdsFromQueryParam ? (defaultSystemMessage?.[modelIdsFromQueryParam[0]] ?? "") : "",
-	};
-	let session: Session = {
-		conversations: [
-			{
-				model: $models.find(m => FEATURED_MODELS_IDS.includes(m.id)) ?? $models[0],
-				config: { ...defaultGenerationConfig },
-				messages: [{ ...startMessageUser }],
-				systemMessage,
-				streaming: true,
-			},
-		],
-	};
-	if (modelsFromQueryParam?.length) {
-		const conversations = modelsFromQueryParam.map(model => {
-			return {
-				model,
-				config: { ...defaultGenerationConfig },
-				messages: [{ ...startMessageUser }],
-				systemMessage,
-				streaming: true,
-			};
-		}) as [Conversation] | [Conversation, Conversation];
-		session.conversations = conversations;
-		session = session;
-	}
 	let viewCode = false;
 	let viewSettings = false;
@@ -73,36 +40,38 @@
 		latency: number;
 		generatedTokensCount: number;
 	}
-	let generationStats = session.conversations.map(_ => ({ latency: 0, generatedTokensCount: 0 })) as
 		| [GenerationStatistics]
 		| [GenerationStatistics, GenerationStatistics];
-	$: systemPromptSupported = session.conversations.some(conversation => isSystemPromptSupported(conversation.model));
-	$: compareActive = session.conversations.length === 2;
 	function addMessage(conversationIdx: number) {
-		const conversation = session.conversations[conversationIdx];
 		conversation.messages = [
-			...conversation.messages,
 			{
-				role: conversation.messages.at(-1)?.role === "user" ? "assistant" : "user",
 				content: "",
 			},
 		];
-		session = session;
 	}
 	function deleteMessage(conversationIdx: number, idx: number) {
-		session.conversations[conversationIdx].messages.splice(idx, 1)[0];
-		session = session;
 	}
 	function reset() {
-		session.conversations.map(conversation => {
 			conversation.systemMessage.content = "";
 			conversation.messages = [{ ...startMessageUser }];
 		});
-		session = session;
 	}
 	function abort() {
@@ -136,8 +105,9 @@
 							conversation.messages = [...conversation.messages, streamingMessage];
 							addStreamingMessage = false;
 						}
-						session = session;
-						generationStats[conversationIdx].generatedTokensCount += 1;
 					}
 				},
 				abortController
@@ -151,12 +121,14 @@
 			// check if the user did not abort the request
 			if (waitForNonStreaming) {
 				conversation.messages = [...conversation.messages, newMessage];
-				generationStats[conversationIdx].generatedTokensCount += newTokensCount;
 			}
 		}
 		const endTime = performance.now();
-		generationStats[conversationIdx].latency = Math.round(endTime - startTime);
 	}
 	async function submit() {
@@ -165,10 +137,10 @@
 			return;
 		}
-		for (const [idx, conversation] of session.conversations.entries()) {
 			if (conversation.messages.at(-1)?.role === "assistant") {
 				let prefix = "";
-				if (session.conversations.length === 2) {
 					prefix = `Error on ${idx === 0 ? "left" : "right"} conversation. `;
 				}
 				return alert(`${prefix}Messages must alternate between user/assistant roles.`);
@@ -179,15 +151,15 @@
 		loading = true;
 		try {
-			const promises = session.conversations.map((conversation, idx) => runInference(conversation, idx));
 			await Promise.all(promises);
 		} catch (error) {
-			for (const conversation of session.conversations) {
 				if (conversation.messages.at(-1)?.role === "assistant" && !conversation.messages.at(-1)?.content?.trim()) {
 					conversation.messages.pop();
 					conversation.messages = [...conversation.messages];
 				}
-				session = session;
 			}
 			if (error instanceof Error) {
 				if (error.message.includes("token seems invalid")) {
@@ -226,16 +198,16 @@
 	function addCompareModel(modelId: ModelEntryWithTokenizer["id"]) {
 		const model = $models.find(m => m.id === modelId);
-		if (!model || session.conversations.length === 2) {
 			return;
 		}
-		const newConversation = { ...JSON.parse(JSON.stringify(session.conversations[0])), model };
-		session.conversations = [...session.conversations, newConversation];
 		generationStats = [generationStats[0], { latency: 0, generatedTokensCount: 0 }];
 		// update query param
 		const url = new URL($page.url);
-		const queryParamValue = `${session.conversations[0].model.id},${modelId}`;
 		url.searchParams.set("modelId", queryParamValue);
 		const parentOrigin = "https://huggingface.co";
@@ -244,8 +216,8 @@
 	}
 	function removeCompareModal(conversationIdx: number) {
-		session.conversations.splice(conversationIdx, 1)[0];
-		session = session;
 		generationStats.splice(conversationIdx, 1)[0];
 		generationStats = generationStats;
@@ -297,12 +269,12 @@
 				placeholder={systemPromptSupported
 					? "Enter a custom prompt"
 					: "System prompt is not supported with the chosen model."}
-				value={systemPromptSupported ? session.conversations[0].systemMessage.content : ""}
 				on:input={e => {
-					for (const conversation of session.conversations) {
 						conversation.systemMessage.content = e.currentTarget.value;
 					}
-					session = session;
 				}}
 				class="absolute inset-x-0 bottom-0 h-full resize-none bg-transparent px-3 pt-10 text-sm outline-hidden"
 			></textarea>
@@ -312,7 +284,7 @@
 		<div
 			class="flex h-[calc(100dvh-5rem-120px)] divide-x divide-gray-200 overflow-x-auto overflow-y-hidden *:w-full max-sm:w-dvw md:h-[calc(100dvh-5rem)] md:pt-3 dark:divide-gray-800"
 		>
-			{#each session.conversations as conversation, conversationIdx}
 				<div class="max-sm:min-w-full">
 					{#if compareActive}
 						<PlaygroundConversationHeader
@@ -382,7 +354,7 @@
 					{#if loading}
 						<div class="flex flex-none items-center gap-[3px]">
 							<span class="mr-2">
-								{#if session.conversations[0].streaming || session.conversations[1]?.streaming}
 									Stop
 								{:else}
 									Cancel
@@ -417,7 +389,7 @@
 				class="flex flex-1 flex-col gap-6 overflow-y-hidden rounded-xl border border-gray-200/80 bg-white bg-linear-to-b from-white via-white p-3 shadow-xs dark:border-white/5 dark:bg-gray-900 dark:from-gray-800/40 dark:via-gray-800/40"
 			>
 				<div class="flex flex-col gap-2">
-					<ModelSelector bind:conversation={session.conversations[0]} />
 					<div class="flex items-center gap-2 self-end px-2 text-xs whitespace-nowrap">
 						<button
 							class="flex items-center gap-0.5 text-gray-500 hover:text-gray-700 dark:text-gray-400 dark:hover:text-gray-300"
@@ -427,7 +399,7 @@
 							Compare
 						</button>
 						<a
-							href="https://huggingface.co/{session.conversations[0].model.id}"
 							target="_blank"
 							class="flex items-center gap-0.5 text-gray-500 hover:text-gray-700 dark:text-gray-400 dark:hover:text-gray-300"
 						>
@@ -439,7 +411,7 @@
 					</div>
 				</div>
-				<GenerationConfig bind:conversation={session.conversations[0]} />
 				{#if $token.value}
 					<button
 						on:click={token.reset}
@@ -496,7 +468,7 @@
 {#if selectCompareModelOpen}
 	<ModelSelectorModal
-		conversation={session.conversations[0]}
 		on:modelSelected={e => addCompareModel(e.detail)}
 		on:close={() => (selectCompareModelOpen = false)}
 	/>

 <script lang="ts">
+	import type { Conversation, ConversationMessage, ModelEntryWithTokenizer } from "./types";
 	import { page } from "$app/stores";
 	import {
 		handleNonStreamingResponse,
 		handleStreamingResponse,
 		isSystemPromptSupported,
 	import { goto } from "$app/navigation";
 	import { models } from "$lib/stores/models";
+	import { session } from "$lib/stores/session";
 	import { token } from "$lib/stores/token";
 	import { isMac } from "$lib/utils/platform";
 	import { HfInference } from "@huggingface/inference";
 	import IconThrashcan from "../Icons/IconThrashcan.svelte";
 	import PlaygroundConversation from "./InferencePlaygroundConversation.svelte";
 	import PlaygroundConversationHeader from "./InferencePlaygroundConversationHeader.svelte";
+	import GenerationConfig from "./InferencePlaygroundGenerationConfig.svelte";
 	import HFTokenModal from "./InferencePlaygroundHFTokenModal.svelte";
 	import ModelSelector from "./InferencePlaygroundModelSelector.svelte";
 	import ModelSelectorModal from "./InferencePlaygroundModelSelectorModal.svelte";
 	const startMessageUser: ConversationMessage = { role: "user", content: "" };
 	let viewCode = false;
 	let viewSettings = false;
 		latency: number;
 		generatedTokensCount: number;
 	}
+	let generationStats = $session.conversations.map(_ => ({ latency: 0, generatedTokensCount: 0 })) as
 		| [GenerationStatistics]
 		| [GenerationStatistics, GenerationStatistics];
+	$: systemPromptSupported = $session.conversations.some(conversation => isSystemPromptSupported(conversation.model));
+	$: compareActive = $session.conversations.length === 2;
 	function addMessage(conversationIdx: number) {
+		const conversation = $session.conversations[conversationIdx];
+		if (!conversation) return;
+		const msgs = conversation.messages.slice();
 		conversation.messages = [
+			...msgs,
 			{
+				role: msgs.at(-1)?.role === "user" ? "assistant" : "user",
 				content: "",
 			},
 		];
+		$session = $session;
 	}
 	function deleteMessage(conversationIdx: number, idx: number) {
+		$session.conversations[conversationIdx]?.messages.splice(idx, 1)[0];
+		$session = $session;
 	}
 	function reset() {
+		$session.conversations.map(conversation => {
 			conversation.systemMessage.content = "";
 			conversation.messages = [{ ...startMessageUser }];
 		});
+		// session = session;
 	}
 	function abort() {
 							conversation.messages = [...conversation.messages, streamingMessage];
 							addStreamingMessage = false;
 						}
+						$session = $session;
+						const c = generationStats[conversationIdx];
+						if (c) c.generatedTokensCount += 1;
 					}
 				},
 				abortController
 			// check if the user did not abort the request
 			if (waitForNonStreaming) {
 				conversation.messages = [...conversation.messages, newMessage];
+				const c = generationStats[conversationIdx];
+				if (c) c.generatedTokensCount += newTokensCount;
 			}
 		}
 		const endTime = performance.now();
+		const c = generationStats[conversationIdx];
+		if (c) c.latency = Math.round(endTime - startTime);
 	}
 	async function submit() {
 			return;
 		}
+		for (const [idx, conversation] of $session.conversations.entries()) {
 			if (conversation.messages.at(-1)?.role === "assistant") {
 				let prefix = "";
+				if ($session.conversations.length === 2) {
 					prefix = `Error on ${idx === 0 ? "left" : "right"} conversation. `;
 				}
 				return alert(`${prefix}Messages must alternate between user/assistant roles.`);
 		loading = true;
 		try {
+			const promises = $session.conversations.map((conversation, idx) => runInference(conversation, idx));
 			await Promise.all(promises);
 		} catch (error) {
+			for (const conversation of $session.conversations) {
 				if (conversation.messages.at(-1)?.role === "assistant" && !conversation.messages.at(-1)?.content?.trim()) {
 					conversation.messages.pop();
 					conversation.messages = [...conversation.messages];
 				}
+				$session = $session;
 			}
 			if (error instanceof Error) {
 				if (error.message.includes("token seems invalid")) {
 	function addCompareModel(modelId: ModelEntryWithTokenizer["id"]) {
 		const model = $models.find(m => m.id === modelId);
+		if (!model || $session.conversations.length === 2) {
 			return;
 		}
+		const newConversation = { ...JSON.parse(JSON.stringify($session.conversations[0])), model };
+		$session.conversations = [...$session.conversations, newConversation];
 		generationStats = [generationStats[0], { latency: 0, generatedTokensCount: 0 }];
 		// update query param
 		const url = new URL($page.url);
+		const queryParamValue = `${$session.conversations[0].model.id},${modelId}`;
 		url.searchParams.set("modelId", queryParamValue);
 		const parentOrigin = "https://huggingface.co";
 	}
 	function removeCompareModal(conversationIdx: number) {
+		$session.conversations.splice(conversationIdx, 1)[0];
+		$session = $session;
 		generationStats.splice(conversationIdx, 1)[0];
 		generationStats = generationStats;
 				placeholder={systemPromptSupported
 					? "Enter a custom prompt"
 					: "System prompt is not supported with the chosen model."}
+				value={systemPromptSupported ? $session.conversations[0].systemMessage.content : ""}
 				on:input={e => {
+					for (const conversation of $session.conversations) {
 						conversation.systemMessage.content = e.currentTarget.value;
 					}
+					$session = $session;
 				}}
 				class="absolute inset-x-0 bottom-0 h-full resize-none bg-transparent px-3 pt-10 text-sm outline-hidden"
 			></textarea>
 		<div
 			class="flex h-[calc(100dvh-5rem-120px)] divide-x divide-gray-200 overflow-x-auto overflow-y-hidden *:w-full max-sm:w-dvw md:h-[calc(100dvh-5rem)] md:pt-3 dark:divide-gray-800"
 		>
+			{#each $session.conversations as conversation, conversationIdx}
 				<div class="max-sm:min-w-full">
 					{#if compareActive}
 						<PlaygroundConversationHeader
 					{#if loading}
 						<div class="flex flex-none items-center gap-[3px]">
 							<span class="mr-2">
+								{#if $session.conversations[0].streaming || $session.conversations[1]?.streaming}
 									Stop
 								{:else}
 									Cancel
 				class="flex flex-1 flex-col gap-6 overflow-y-hidden rounded-xl border border-gray-200/80 bg-white bg-linear-to-b from-white via-white p-3 shadow-xs dark:border-white/5 dark:bg-gray-900 dark:from-gray-800/40 dark:via-gray-800/40"
 			>
 				<div class="flex flex-col gap-2">
+					<ModelSelector bind:conversation={$session.conversations[0]} />
 					<div class="flex items-center gap-2 self-end px-2 text-xs whitespace-nowrap">
 						<button
 							class="flex items-center gap-0.5 text-gray-500 hover:text-gray-700 dark:text-gray-400 dark:hover:text-gray-300"
 							Compare
 						</button>
 						<a
+							href="https://huggingface.co/{$session.conversations[0].model.id}"
 							target="_blank"
 							class="flex items-center gap-0.5 text-gray-500 hover:text-gray-700 dark:text-gray-400 dark:hover:text-gray-300"
 						>
 					</div>
 				</div>
+				<GenerationConfig bind:conversation={$session.conversations[0]} />
 				{#if $token.value}
 					<button
 						on:click={token.reset}
 {#if selectCompareModelOpen}
 	<ModelSelectorModal
+		conversation={$session.conversations[0]}
 		on:modelSelected={e => addCompareModel(e.detail)}
 		on:close={() => (selectCompareModelOpen = false)}
 	/>

src/lib/components/InferencePlayground/InferencePlaygroundCodeSnippets.svelte CHANGED Viewed

@@ -76,7 +76,7 @@
 		const placeholder = [{ role: "user", content: "Tell me a story" }];
 		let messages = [...conversation.messages];
-		if (messages.length === 1 && messages[0].role === "user" && !messages[0].content) {
 			messages = placeholder;
 		}
@@ -468,9 +468,9 @@ print(completion.choices[0].message)`,
 		</ul>
 	</div>
-	{#if clientSnippetsByLang[selectedLanguage].length > 1}
 		<div class="flex gap-x-2 px-2 pt-6">
-			{#each clientSnippetsByLang[selectedLanguage] as { name }, idx}
 				<button
 					class="rounded-md px-1.5 py-0.5 leading-tight {idx === selectedClientIdxByLang[selectedLanguage]
 						? 'bg-black text-gray-100 dark:bg-gray-600 dark:text-white'
@@ -481,7 +481,7 @@ print(completion.choices[0].message)`,
 		</div>
 	{/if}
-	{#each clientSnippetsByLang[selectedLanguage] as { snippets }, idx}
 		{#if idx === selectedClientIdxByLang[selectedLanguage]}
 			{#each snippets as { label, code, language, needsToken }}
 				<div class="flex items-center justify-between px-2 pt-6 pb-4">

 		const placeholder = [{ role: "user", content: "Tell me a story" }];
 		let messages = [...conversation.messages];
+		if (messages.length === 1 && messages[0]?.role === "user" && !messages[0]?.content) {
 			messages = placeholder;
 		}
 		</ul>
 	</div>
+	{#if clientSnippetsByLang[selectedLanguage]?.length ?? 0 > 1}
 		<div class="flex gap-x-2 px-2 pt-6">
+			{#each clientSnippetsByLang[selectedLanguage] ?? [] as { name }, idx}
 				<button
 					class="rounded-md px-1.5 py-0.5 leading-tight {idx === selectedClientIdxByLang[selectedLanguage]
 						? 'bg-black text-gray-100 dark:bg-gray-600 dark:text-white'
 		</div>
 	{/if}
+	{#each clientSnippetsByLang[selectedLanguage] ?? [] as { snippets }, idx}
 		{#if idx === selectedClientIdxByLang[selectedLanguage]}
 			{#each snippets as { label, code, language, needsToken }}
 				<div class="flex items-center justify-between px-2 pt-6 pb-4">

src/lib/components/InferencePlayground/InferencePlaygroundConversationHeader.svelte CHANGED Viewed

@@ -41,7 +41,7 @@
 		}
 	}
-	$: [nameSpace] = conversation.model.id.split("/");
 </script>
 {#if modelSelectorOpen}

 		}
 	}
+	$: nameSpace = conversation.model.id.split("/")[0] ?? "";
 </script>
 {#if modelSelectorOpen}

src/lib/components/InferencePlayground/InferencePlaygroundGenerationConfig.svelte CHANGED Viewed

@@ -1,70 +1,12 @@
-<script context="module" lang="ts">
-	export const defaultSystemMessage: { [key: string]: string } = {
-		"Qwen/QwQ-32B-Preview":
-			"You are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step.",
-	} as const;
-</script>
 <script lang="ts">
 	import type { Conversation } from "$lib/components/InferencePlayground/types";
 	import { GENERATION_CONFIG_KEYS, GENERATION_CONFIG_SETTINGS } from "./generationConfigSettings";
 	export let conversation: Conversation;
 	export let classNames = "";
-	const customMaxTokens: { [key: string]: number } = {
-		"01-ai/Yi-1.5-34B-Chat": 2048,
-		"HuggingFaceM4/idefics-9b-instruct": 2048,
-		"deepseek-ai/DeepSeek-Coder-V2-Instruct": 16384,
-		"bigcode/starcoder": 8192,
-		"bigcode/starcoderplus": 8192,
-		"HuggingFaceH4/starcoderbase-finetuned-oasst1": 8192,
-		"google/gemma-7b": 8192,
-		"google/gemma-1.1-7b-it": 8192,
-		"google/gemma-2b": 8192,
-		"google/gemma-1.1-2b-it": 8192,
-		"google/gemma-2-27b-it": 8192,
-		"google/gemma-2-9b-it": 4096,
-		"google/gemma-2-2b-it": 8192,
-		"tiiuae/falcon-7b": 8192,
-		"tiiuae/falcon-7b-instruct": 8192,
-		"timdettmers/guanaco-33b-merged": 2048,
-		"mistralai/Mixtral-8x7B-Instruct-v0.1": 32768,
-		"Qwen/Qwen2.5-72B-Instruct": 32768,
-		"Qwen/Qwen2.5-Coder-32B-Instruct": 32768,
-		"meta-llama/Meta-Llama-3-70B-Instruct": 8192,
-		"CohereForAI/c4ai-command-r-plus-08-2024": 32768,
-		"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": 32768,
-		"meta-llama/Llama-2-70b-chat-hf": 8192,
-		"HuggingFaceH4/zephyr-7b-alpha": 17432,
-		"HuggingFaceH4/zephyr-7b-beta": 32768,
-		"mistralai/Mistral-7B-Instruct-v0.1": 32768,
-		"mistralai/Mistral-7B-Instruct-v0.2": 32768,
-		"mistralai/Mistral-7B-Instruct-v0.3": 32768,
-		"mistralai/Mistral-Nemo-Instruct-2407": 32768,
-		"meta-llama/Meta-Llama-3-8B-Instruct": 8192,
-		"mistralai/Mistral-7B-v0.1": 32768,
-		"bigcode/starcoder2-3b": 16384,
-		"bigcode/starcoder2-15b": 16384,
-		"HuggingFaceH4/starchat2-15b-v0.1": 16384,
-		"codellama/CodeLlama-7b-hf": 8192,
-		"codellama/CodeLlama-13b-hf": 8192,
-		"codellama/CodeLlama-34b-Instruct-hf": 8192,
-		"meta-llama/Llama-2-7b-chat-hf": 8192,
-		"meta-llama/Llama-2-13b-chat-hf": 8192,
-		"OpenAssistant/oasst-sft-6-llama-30b": 2048,
-		"TheBloke/vicuna-7B-v1.5-GPTQ": 2048,
-		"HuggingFaceH4/starchat-beta": 8192,
-		"bigcode/octocoder": 8192,
-		"vwxyzjn/starcoderbase-triviaqa": 8192,
-		"lvwerra/starcoderbase-gsm8k": 8192,
-		"NousResearch/Hermes-3-Llama-3.1-8B": 16384,
-		"microsoft/Phi-3.5-mini-instruct": 32768,
-		"meta-llama/Llama-3.1-70B-Instruct": 32768,
-		"meta-llama/Llama-3.1-8B-Instruct": 8192,
-	} as const;
 	$: modelMaxLength = customMaxTokens[conversation.model.id] ?? conversation.model.tokenizerConfig.model_max_length;
 	$: maxTokens = Math.min(modelMaxLength ?? GENERATION_CONFIG_SETTINGS["max_tokens"].max, 64_000);
 </script>

 <script lang="ts">
 	import type { Conversation } from "$lib/components/InferencePlayground/types";
 	import { GENERATION_CONFIG_KEYS, GENERATION_CONFIG_SETTINGS } from "./generationConfigSettings";
+	import { customMaxTokens } from "./inferencePlaygroundUtils";
 	export let conversation: Conversation;
 	export let classNames = "";
 	$: modelMaxLength = customMaxTokens[conversation.model.id] ?? conversation.model.tokenizerConfig.model_max_length;
 	$: maxTokens = Math.min(modelMaxLength ?? GENERATION_CONFIG_SETTINGS["max_tokens"].max, 64_000);
 </script>

src/lib/components/InferencePlayground/InferencePlaygroundModelSelector.svelte CHANGED Viewed

@@ -4,12 +4,13 @@
 	import { goto } from "$app/navigation";
 	import { page } from "$app/stores";
 	import IconCaret from "../Icons/IconCaret.svelte";
 	import ModelSelectorModal from "./InferencePlaygroundModelSelectorModal.svelte";
-	import Avatar from "../Avatar.svelte";
-	import { defaultSystemMessage } from "./InferencePlaygroundGenerationConfig.svelte";
-	import { models } from "$lib/stores/models";
-	import { fetchHuggingFaceModel, type Provider } from "$lib/fetchers/providers";
 	export let conversation: Conversation;
@@ -32,12 +33,16 @@
 		goto(url.toString(), { replaceState: true });
 	}
-	$: [nameSpace, modelName] = conversation.model.id.split("/");
 	async function loadProviders(modelId: string) {
-		const providers = await fetchHuggingFaceModel;
 	}
-	let providers: Provider[] = [];
 	const id = crypto.randomUUID();
 </script>

 	import { goto } from "$app/navigation";
 	import { page } from "$app/stores";
+	import { fetchHuggingFaceModel, type InferenceProviderMapping } from "$lib/fetchers/providers";
+	import { models } from "$lib/stores/models";
+	import { token } from "$lib/stores/token";
+	import Avatar from "../Avatar.svelte";
 	import IconCaret from "../Icons/IconCaret.svelte";
 	import ModelSelectorModal from "./InferencePlaygroundModelSelectorModal.svelte";
+	import { defaultSystemMessage } from "./inferencePlaygroundUtils";
 	export let conversation: Conversation;
 		goto(url.toString(), { replaceState: true });
 	}
+	$: nameSpace = conversation.model.id.split("/")[0] ?? "";
+	$: modelName = conversation.model.id.split("/")[1] ?? "";
 	async function loadProviders(modelId: string) {
+		providerMap = {};
+		const res = await fetchHuggingFaceModel(modelId, $token.value);
+		providerMap = res.inferenceProviderMapping;
 	}
+	let providerMap: InferenceProviderMapping = {};
+	// $: loadProviders(conversation.model.id);
 	const id = crypto.randomUUID();
 </script>

src/lib/components/InferencePlayground/inferencePlaygroundUtils.ts CHANGED Viewed

@@ -55,7 +55,7 @@ export async function handleNonStreamingResponse(
 }
 export function isSystemPromptSupported(model: ModelEntryWithTokenizer) {
-	return model.tokenizerConfig?.chat_template?.includes("system");
 }
 export const FEATURED_MODELS_IDS = [
@@ -65,3 +65,60 @@ export const FEATURED_MODELS_IDS = [
 	"Qwen/Qwen2.5-72B-Instruct",
 	"Qwen/QwQ-32B-Preview",
 ];

 }
 export function isSystemPromptSupported(model: ModelEntryWithTokenizer) {
+	return model?.tokenizerConfig?.chat_template?.includes("system");
 }
 export const FEATURED_MODELS_IDS = [
 	"Qwen/Qwen2.5-72B-Instruct",
 	"Qwen/QwQ-32B-Preview",
 ];
+export const defaultSystemMessage: { [key: string]: string } = {
+	"Qwen/QwQ-32B-Preview":
+		"You are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step.",
+} as const;
+export const customMaxTokens: { [key: string]: number } = {
+	"01-ai/Yi-1.5-34B-Chat": 2048,
+	"HuggingFaceM4/idefics-9b-instruct": 2048,
+	"deepseek-ai/DeepSeek-Coder-V2-Instruct": 16384,
+	"bigcode/starcoder": 8192,
+	"bigcode/starcoderplus": 8192,
+	"HuggingFaceH4/starcoderbase-finetuned-oasst1": 8192,
+	"google/gemma-7b": 8192,
+	"google/gemma-1.1-7b-it": 8192,
+	"google/gemma-2b": 8192,
+	"google/gemma-1.1-2b-it": 8192,
+	"google/gemma-2-27b-it": 8192,
+	"google/gemma-2-9b-it": 4096,
+	"google/gemma-2-2b-it": 8192,
+	"tiiuae/falcon-7b": 8192,
+	"tiiuae/falcon-7b-instruct": 8192,
+	"timdettmers/guanaco-33b-merged": 2048,
+	"mistralai/Mixtral-8x7B-Instruct-v0.1": 32768,
+	"Qwen/Qwen2.5-72B-Instruct": 32768,
+	"Qwen/Qwen2.5-Coder-32B-Instruct": 32768,
+	"meta-llama/Meta-Llama-3-70B-Instruct": 8192,
+	"CohereForAI/c4ai-command-r-plus-08-2024": 32768,
+	"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": 32768,
+	"meta-llama/Llama-2-70b-chat-hf": 8192,
+	"HuggingFaceH4/zephyr-7b-alpha": 17432,
+	"HuggingFaceH4/zephyr-7b-beta": 32768,
+	"mistralai/Mistral-7B-Instruct-v0.1": 32768,
+	"mistralai/Mistral-7B-Instruct-v0.2": 32768,
+	"mistralai/Mistral-7B-Instruct-v0.3": 32768,
+	"mistralai/Mistral-Nemo-Instruct-2407": 32768,
+	"meta-llama/Meta-Llama-3-8B-Instruct": 8192,
+	"mistralai/Mistral-7B-v0.1": 32768,
+	"bigcode/starcoder2-3b": 16384,
+	"bigcode/starcoder2-15b": 16384,
+	"HuggingFaceH4/starchat2-15b-v0.1": 16384,
+	"codellama/CodeLlama-7b-hf": 8192,
+	"codellama/CodeLlama-13b-hf": 8192,
+	"codellama/CodeLlama-34b-Instruct-hf": 8192,
+	"meta-llama/Llama-2-7b-chat-hf": 8192,
+	"meta-llama/Llama-2-13b-chat-hf": 8192,
+	"OpenAssistant/oasst-sft-6-llama-30b": 2048,
+	"TheBloke/vicuna-7B-v1.5-GPTQ": 2048,
+	"HuggingFaceH4/starchat-beta": 8192,
+	"bigcode/octocoder": 8192,
+	"vwxyzjn/starcoderbase-triviaqa": 8192,
+	"lvwerra/starcoderbase-gsm8k": 8192,
+	"NousResearch/Hermes-3-Llama-3.1-8B": 16384,
+	"microsoft/Phi-3.5-mini-instruct": 32768,
+	"meta-llama/Llama-3.1-70B-Instruct": 32768,
+	"meta-llama/Llama-3.1-8B-Instruct": 8192,
+} as const;

src/lib/stores/models.ts CHANGED Viewed

@@ -1,5 +1,8 @@
-import { page } from "$app/stores";
 import type { ModelEntryWithTokenizer } from "$lib/components/InferencePlayground/types";
 import { derived } from "svelte/store";
-export const models = derived(page, $page => $page.data.models as ModelEntryWithTokenizer[]);

 import type { ModelEntryWithTokenizer } from "$lib/components/InferencePlayground/types";
+import { safePage } from "$lib/utils/store";
 import { derived } from "svelte/store";
+export const models = derived(safePage, $page => {
+	const res: ModelEntryWithTokenizer[] = $page?.data?.models ?? [];
+	return res;
+});

src/lib/stores/session.ts ADDED Viewed

	@@ -0,0 +1,60 @@

+import type { Conversation, Session } from "$lib/components/InferencePlayground/types";
+import { defaultGenerationConfig } from "$lib/components/InferencePlayground/generationConfigSettings";
+import {
+	defaultSystemMessage,
+	FEATURED_MODELS_IDS,
+} from "$lib/components/InferencePlayground/inferencePlaygroundUtils";
+import { models } from "$lib/stores/models";
+import { get, writable } from "svelte/store";
+import type { ChatCompletionInputMessage } from "@huggingface/tasks";
+import { partialSet, safePage } from "$lib/utils/store";
+export function createSessionStore() {
+	const startMessageUser: ChatCompletionInputMessage = { role: "user", content: "" };
+	const modelIdsFromQueryParam = get(safePage)?.url?.searchParams?.get("modelId")?.split(",");
+	const modelsFromQueryParam = modelIdsFromQueryParam?.map(id => get(models).find(model => model.id === id));
+	const systemMessage: ChatCompletionInputMessage = {
+		role: "system",
+		content: modelIdsFromQueryParam?.[0] ? (defaultSystemMessage?.[modelIdsFromQueryParam[0]] ?? "") : "",
+	};
+	const store = writable<Session>({
+		conversations: [
+			{
+				model: get(models).find(m => FEATURED_MODELS_IDS.includes(m.id)) ??
+					get(models)[0] ?? {
+						id: "",
+						downloads: 0,
+						gated: false,
+						likes: 0,
+						name: "",
+						private: false,
+						tokenizerConfig: {},
+						updatedAt: new Date(),
+					},
+				config: { ...defaultGenerationConfig },
+				messages: [{ ...startMessageUser }],
+				systemMessage,
+				streaming: true,
+			},
+		],
+	});
+	if (modelsFromQueryParam?.length) {
+		const conversations = modelsFromQueryParam.map(model => {
+			return {
+				model,
+				config: { ...defaultGenerationConfig },
+				messages: [{ ...startMessageUser }],
+				systemMessage,
+				streaming: true,
+			};
+		}) as [Conversation] | [Conversation, Conversation];
+		partialSet(store, { conversations });
+	}
+	return store;
+}
+export const session = createSessionStore();

src/lib/utils/store.ts ADDED Viewed

	@@ -0,0 +1,9 @@

+import { browser } from "$app/environment";
+import { page } from "$app/stores";
+import { readable, type Writable } from "svelte/store";
+export function partialSet<T extends Record<string, unknown>>(store: Writable<T>, partial: Partial<T>) {
+	store.update(s => ({ ...s, ...partial }));
+}
+export const safePage = browser ? page : readable(undefined);

src/routes/{+page.server.ts → +layout.server.ts} RENAMED Viewed

@@ -1,9 +1,9 @@
 import type { ModelEntryWithTokenizer } from "$lib/components/InferencePlayground/types";
 import type { ModelEntry } from "@huggingface/hub";
-import type { PageServerLoad } from "./$types";
 import { env } from "$env/dynamic/private";
-export const load: PageServerLoad = async ({ fetch }) => {
 	const apiUrl =
 		"https://huggingface.co/api/models?pipeline_tag=text-generation&inference_provider=hf-inference&filter=conversational";
 	const HF_TOKEN = env.HF_TOKEN;
@@ -18,7 +18,6 @@ export const load: PageServerLoad = async ({ fetch }) => {
 		return { models: [] };
 	}
 	const compatibleModels: ModelEntry[] = await res.json();
-	console.log(compatibleModels);
 	compatibleModels.sort((a, b) => a.id.toLowerCase().localeCompare(b.id.toLowerCase()));
 	const promises = compatibleModels.map(async model => {

 import type { ModelEntryWithTokenizer } from "$lib/components/InferencePlayground/types";
 import type { ModelEntry } from "@huggingface/hub";
+import type { LayoutServerLoad } from "./$types";
 import { env } from "$env/dynamic/private";
+export const load: LayoutServerLoad = async ({ fetch }) => {
 	const apiUrl =
 		"https://huggingface.co/api/models?pipeline_tag=text-generation&inference_provider=hf-inference&filter=conversational";
 	const HF_TOKEN = env.HF_TOKEN;
 		return { models: [] };
 	}
 	const compatibleModels: ModelEntry[] = await res.json();
 	compatibleModels.sort((a, b) => a.id.toLowerCase().localeCompare(b.id.toLowerCase()));
 	const promises = compatibleModels.map(async model => {

tsconfig.json CHANGED Viewed

@@ -9,7 +9,8 @@
 		"skipLibCheck": true,
 		"sourceMap": true,
 		"strict": true,
-		"target": "ES2018"
 	},
 	"exclude": ["vite.config.ts"]
 	// Path aliases are handled by https://kit.svelte.dev/docs/configuration#alias

 		"skipLibCheck": true,
 		"sourceMap": true,
 		"strict": true,
+		"target": "ES2018",
+		"noUncheckedIndexedAccess": true
 	},
 	"exclude": ["vite.config.ts"]
 	// Path aliases are handled by https://kit.svelte.dev/docs/configuration#alias