Returns (
{ id: string } & (
| {
name: "Gemma 3n E2B";
repo: "unsloth/gemma-3n-E2B-it-GGUF";
file: "gemma-3n-E2B-it-Q4_K_M.gguf";
parameters: "5B→2B";
quantization: "Q4_K_M";
contextLength: 32768;
languages: readonly [
"en",
"de",
"fr",
"es",
"it",
"pt",
"nl",
"pl",
"ru",
"ja",
"ko",
"zh",
];
description: "Ultra-efficient edge model, ~2GB RAM";
requiresAuth: false;
benchmarks: { mmlu: 64; arena: 1250 };
}
| {
name: "Gemma 3n E4B";
repo: "unsloth/gemma-3n-E4B-it-GGUF";
file: "gemma-3n-E4B-it-Q4_K_M.gguf";
parameters: "8B→4B";
quantization: "Q4_K_M";
contextLength: 32768;
languages: readonly [
"en",
"de",
"fr",
"es",
"it",
"pt",
"nl",
"pl",
"ru",
"ja",
"ko",
"zh",
];
description: "Best edge model, ~3GB RAM";
requiresAuth: false;
benchmarks: { mmlu: 75; arena: 1300 };
}
| {
name: "Gemma 3 27B";
repo: "unsloth/gemma-3-27b-it-GGUF";
file: "gemma-3-27b-it-Q4_K_M.gguf";
parameters: "27B";
quantization: "Q4_K_M";
contextLength: 131072;
languages: readonly [
"en",
"de",
"fr",
"es",
"it",
"pt",
"nl",
"pl",
"ru",
"ja",
"ko",
"zh",
];
description: "Maximum quality, 128K context, ~18GB RAM";
benchmarks: { mmlu: 77; arena: 1338 };
}
| {
name: "GPT-OSS 20B";
repo: "unsloth/gpt-oss-20b-GGUF";
file: "gpt-oss-20b-Q4_K_M.gguf";
parameters: "21B (3.6B active)";
quantization: "Q4_K_M";
contextLength: 131072;
languages: readonly ["en"];
description: "OpenAI's open model, MoE, ~16GB RAM";
benchmarks: { mmlu: 82; arena: 1340 };
}
| {
name: "Phi-4 14B";
repo: "bartowski/phi-4-GGUF";
file: "phi-4-Q4_K_M.gguf";
parameters: "14B";
quantization: "Q4_K_M";
contextLength: 16384;
languages: readonly ["en"];
description: "Microsoft's reasoning-focused, excellent for STEM";
benchmarks: { mmlu: 84; arena: 1320 };
}
| {
name: "Qwen3 4B";
repo: "unsloth/Qwen3-4B-GGUF";
file: "Qwen3-4B-Q4_K_M.gguf";
parameters: "4B";
quantization: "Q4_K_M";
contextLength: 32768;
languages: readonly [
"en",
"zh",
"de",
"fr",
"es",
"pt",
"it",
"nl",
"pl",
"ru",
"ja",
"ko",
];
description: "Thinking mode, 100+ languages, ~3GB RAM";
thinkingMode: "qwen";
benchmarks: { mmlu: 76; arena: 1300 };
}
| {
name: "Qwen3 8B";
repo: "unsloth/Qwen3-8B-GGUF";
file: "Qwen3-8B-Q4_K_M.gguf";
parameters: "8B";
quantization: "Q4_K_M";
contextLength: 32768;
languages: readonly [
"en",
"zh",
"de",
"fr",
"es",
"pt",
"it",
"nl",
"pl",
"ru",
"ja",
"ko",
];
description: "Thinking mode, excellent multilingual, ~5GB RAM";
thinkingMode: "qwen";
benchmarks: { mmlu: 81; arena: 1350 };
}
| {
name: "Qwen3 14B";
repo: "unsloth/Qwen3-14B-GGUF";
file: "Qwen3-14B-Q4_K_M.gguf";
parameters: "14B";
quantization: "Q4_K_M";
contextLength: 32768;
languages: readonly [
"en",
"zh",
"de",
"fr",
"es",
"pt",
"it",
"nl",
"pl",
"ru",
"ja",
"ko",
];
description: "Thinking mode, top multilingual, ~9GB RAM";
thinkingMode: "qwen";
benchmarks: { mmlu: 84; arena: 1380 };
}
| {
name: "Qwen 2.5 Coder 7B";
repo: "bartowski/Qwen2.5-Coder-7B-Instruct-GGUF";
file: "Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf";
parameters: "7B";
quantization: "Q4_K_M";
contextLength: 131072;
languages: readonly ["en"];
description: "Optimized for code generation";
benchmarks: { mmlu: 66; arena: 1250 };
}
| {
name: "DeepSeek R1 Distill 7B";
repo: "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF";
file: "DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf";
parameters: "7B";
quantization: "Q4_K_M";
contextLength: 131072;
languages: readonly ["en", "zh"];
description: "Strong reasoning with chain-of-thought";
thinkingMode: "deepseek";
benchmarks: { mmlu: 72; arena: 1300 };
}
| {
name: "DeepSeek R1 Distill 14B";
repo: "bartowski/DeepSeek-R1-Distill-Qwen-14B-GGUF";
file: "DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf";
parameters: "14B";
quantization: "Q4_K_M";
contextLength: 131072;
languages: readonly ["en", "zh"];
description: "Best reasoning model, shows thinking";
thinkingMode: "deepseek";
benchmarks: { mmlu: 79; arena: 1350 };
}
)
)[]
Array of model information objects
Native LLM Engine
Provides text generation using llama.cpp with Metal GPU acceleration.