native-llm - v0.2.0
    Preparing search index...

    Variable MODELSConst

    MODELS: {
        "gemma-3n-e2b": {
            name: "Gemma 3n E2B";
            repo: "unsloth/gemma-3n-E2B-it-GGUF";
            file: "gemma-3n-E2B-it-Q4_K_M.gguf";
            parameters: "5B→2B";
            quantization: "Q4_K_M";
            contextLength: 32768;
            languages: readonly [
                "en",
                "de",
                "fr",
                "es",
                "it",
                "pt",
                "nl",
                "pl",
                "ru",
                "ja",
                "ko",
                "zh",
            ];
            description: "Ultra-efficient edge model, ~2GB RAM";
            requiresAuth: false;
            benchmarks: { mmlu: 64; arena: 1250 };
        };
        "gemma-3n-e4b": {
            name: "Gemma 3n E4B";
            repo: "unsloth/gemma-3n-E4B-it-GGUF";
            file: "gemma-3n-E4B-it-Q4_K_M.gguf";
            parameters: "8B→4B";
            quantization: "Q4_K_M";
            contextLength: 32768;
            languages: readonly [
                "en",
                "de",
                "fr",
                "es",
                "it",
                "pt",
                "nl",
                "pl",
                "ru",
                "ja",
                "ko",
                "zh",
            ];
            description: "Best edge model, ~3GB RAM";
            requiresAuth: false;
            benchmarks: { mmlu: 75; arena: 1300 };
        };
        "gemma-3-27b": {
            name: "Gemma 3 27B";
            repo: "unsloth/gemma-3-27b-it-GGUF";
            file: "gemma-3-27b-it-Q4_K_M.gguf";
            parameters: "27B";
            quantization: "Q4_K_M";
            contextLength: 131072;
            languages: readonly [
                "en",
                "de",
                "fr",
                "es",
                "it",
                "pt",
                "nl",
                "pl",
                "ru",
                "ja",
                "ko",
                "zh",
            ];
            description: "Maximum quality, 128K context, ~18GB RAM";
            benchmarks: { mmlu: 77; arena: 1338 };
        };
        "gpt-oss-20b": {
            name: "GPT-OSS 20B";
            repo: "unsloth/gpt-oss-20b-GGUF";
            file: "gpt-oss-20b-Q4_K_M.gguf";
            parameters: "21B (3.6B active)";
            quantization: "Q4_K_M";
            contextLength: 131072;
            languages: readonly ["en"];
            description: "OpenAI's open model, MoE, ~16GB RAM";
            benchmarks: { mmlu: 82; arena: 1340 };
        };
        "phi-4": {
            name: "Phi-4 14B";
            repo: "bartowski/phi-4-GGUF";
            file: "phi-4-Q4_K_M.gguf";
            parameters: "14B";
            quantization: "Q4_K_M";
            contextLength: 16384;
            languages: readonly ["en"];
            description: "Microsoft's reasoning-focused, excellent for STEM";
            benchmarks: { mmlu: 84; arena: 1320 };
        };
        "qwen3-4b": {
            name: "Qwen3 4B";
            repo: "unsloth/Qwen3-4B-GGUF";
            file: "Qwen3-4B-Q4_K_M.gguf";
            parameters: "4B";
            quantization: "Q4_K_M";
            contextLength: 32768;
            languages: readonly [
                "en",
                "zh",
                "de",
                "fr",
                "es",
                "pt",
                "it",
                "nl",
                "pl",
                "ru",
                "ja",
                "ko",
            ];
            description: "Thinking mode, 100+ languages, ~3GB RAM";
            thinkingMode: "qwen";
            benchmarks: { mmlu: 76; arena: 1300 };
        };
        "qwen3-8b": {
            name: "Qwen3 8B";
            repo: "unsloth/Qwen3-8B-GGUF";
            file: "Qwen3-8B-Q4_K_M.gguf";
            parameters: "8B";
            quantization: "Q4_K_M";
            contextLength: 32768;
            languages: readonly [
                "en",
                "zh",
                "de",
                "fr",
                "es",
                "pt",
                "it",
                "nl",
                "pl",
                "ru",
                "ja",
                "ko",
            ];
            description: "Thinking mode, excellent multilingual, ~5GB RAM";
            thinkingMode: "qwen";
            benchmarks: { mmlu: 81; arena: 1350 };
        };
        "qwen3-14b": {
            name: "Qwen3 14B";
            repo: "unsloth/Qwen3-14B-GGUF";
            file: "Qwen3-14B-Q4_K_M.gguf";
            parameters: "14B";
            quantization: "Q4_K_M";
            contextLength: 32768;
            languages: readonly [
                "en",
                "zh",
                "de",
                "fr",
                "es",
                "pt",
                "it",
                "nl",
                "pl",
                "ru",
                "ja",
                "ko",
            ];
            description: "Thinking mode, top multilingual, ~9GB RAM";
            thinkingMode: "qwen";
            benchmarks: { mmlu: 84; arena: 1380 };
        };
        "qwen-2.5-coder-7b": {
            name: "Qwen 2.5 Coder 7B";
            repo: "bartowski/Qwen2.5-Coder-7B-Instruct-GGUF";
            file: "Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf";
            parameters: "7B";
            quantization: "Q4_K_M";
            contextLength: 131072;
            languages: readonly ["en"];
            description: "Optimized for code generation";
            benchmarks: { mmlu: 66; arena: 1250 };
        };
        "deepseek-r1-7b": {
            name: "DeepSeek R1 Distill 7B";
            repo: "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF";
            file: "DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf";
            parameters: "7B";
            quantization: "Q4_K_M";
            contextLength: 131072;
            languages: readonly ["en", "zh"];
            description: "Strong reasoning with chain-of-thought";
            thinkingMode: "deepseek";
            benchmarks: { mmlu: 72; arena: 1300 };
        };
        "deepseek-r1-14b": {
            name: "DeepSeek R1 Distill 14B";
            repo: "bartowski/DeepSeek-R1-Distill-Qwen-14B-GGUF";
            file: "DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf";
            parameters: "14B";
            quantization: "Q4_K_M";
            contextLength: 131072;
            languages: readonly ["en", "zh"];
            description: "Best reasoning model, shows thinking";
            thinkingMode: "deepseek";
            benchmarks: { mmlu: 79; arena: 1350 };
        };
    } = ...

    Available GGUF models from HuggingFace

    All models use GGUF format optimized for llama.cpp inference. Q4_K_M quantization offers best quality/size tradeoff.

    Auth: Models marked with requiresAuth: true need HF_TOKEN environment variable.

    Quantization guide:

    • Q8_0: Highest quality, ~1 byte/param
    • Q6_K: Very high quality, ~0.75 byte/param
    • Q5_K_M: High quality, ~0.6 byte/param
    • Q4_K_M: Good quality, ~0.5 byte/param (recommended)
    • Q3_K_M: Lower quality, ~0.4 byte/param

    Type Declaration

    • Readonlygemma-3n-e2b: {
          name: "Gemma 3n E2B";
          repo: "unsloth/gemma-3n-E2B-it-GGUF";
          file: "gemma-3n-E2B-it-Q4_K_M.gguf";
          parameters: "5B→2B";
          quantization: "Q4_K_M";
          contextLength: 32768;
          languages: readonly [
              "en",
              "de",
              "fr",
              "es",
              "it",
              "pt",
              "nl",
              "pl",
              "ru",
              "ja",
              "ko",
              "zh",
          ];
          description: "Ultra-efficient edge model, ~2GB RAM";
          requiresAuth: false;
          benchmarks: { mmlu: 64; arena: 1250 };
      }
    • Readonlygemma-3n-e4b: {
          name: "Gemma 3n E4B";
          repo: "unsloth/gemma-3n-E4B-it-GGUF";
          file: "gemma-3n-E4B-it-Q4_K_M.gguf";
          parameters: "8B→4B";
          quantization: "Q4_K_M";
          contextLength: 32768;
          languages: readonly [
              "en",
              "de",
              "fr",
              "es",
              "it",
              "pt",
              "nl",
              "pl",
              "ru",
              "ja",
              "ko",
              "zh",
          ];
          description: "Best edge model, ~3GB RAM";
          requiresAuth: false;
          benchmarks: { mmlu: 75; arena: 1300 };
      }
    • Readonlygemma-3-27b: {
          name: "Gemma 3 27B";
          repo: "unsloth/gemma-3-27b-it-GGUF";
          file: "gemma-3-27b-it-Q4_K_M.gguf";
          parameters: "27B";
          quantization: "Q4_K_M";
          contextLength: 131072;
          languages: readonly [
              "en",
              "de",
              "fr",
              "es",
              "it",
              "pt",
              "nl",
              "pl",
              "ru",
              "ja",
              "ko",
              "zh",
          ];
          description: "Maximum quality, 128K context, ~18GB RAM";
          benchmarks: { mmlu: 77; arena: 1338 };
      }
    • Readonlygpt-oss-20b: {
          name: "GPT-OSS 20B";
          repo: "unsloth/gpt-oss-20b-GGUF";
          file: "gpt-oss-20b-Q4_K_M.gguf";
          parameters: "21B (3.6B active)";
          quantization: "Q4_K_M";
          contextLength: 131072;
          languages: readonly ["en"];
          description: "OpenAI's open model, MoE, ~16GB RAM";
          benchmarks: { mmlu: 82; arena: 1340 };
      }
    • Readonlyphi-4: {
          name: "Phi-4 14B";
          repo: "bartowski/phi-4-GGUF";
          file: "phi-4-Q4_K_M.gguf";
          parameters: "14B";
          quantization: "Q4_K_M";
          contextLength: 16384;
          languages: readonly ["en"];
          description: "Microsoft's reasoning-focused, excellent for STEM";
          benchmarks: { mmlu: 84; arena: 1320 };
      }
    • Readonlyqwen3-4b: {
          name: "Qwen3 4B";
          repo: "unsloth/Qwen3-4B-GGUF";
          file: "Qwen3-4B-Q4_K_M.gguf";
          parameters: "4B";
          quantization: "Q4_K_M";
          contextLength: 32768;
          languages: readonly [
              "en",
              "zh",
              "de",
              "fr",
              "es",
              "pt",
              "it",
              "nl",
              "pl",
              "ru",
              "ja",
              "ko",
          ];
          description: "Thinking mode, 100+ languages, ~3GB RAM";
          thinkingMode: "qwen";
          benchmarks: { mmlu: 76; arena: 1300 };
      }
    • Readonlyqwen3-8b: {
          name: "Qwen3 8B";
          repo: "unsloth/Qwen3-8B-GGUF";
          file: "Qwen3-8B-Q4_K_M.gguf";
          parameters: "8B";
          quantization: "Q4_K_M";
          contextLength: 32768;
          languages: readonly [
              "en",
              "zh",
              "de",
              "fr",
              "es",
              "pt",
              "it",
              "nl",
              "pl",
              "ru",
              "ja",
              "ko",
          ];
          description: "Thinking mode, excellent multilingual, ~5GB RAM";
          thinkingMode: "qwen";
          benchmarks: { mmlu: 81; arena: 1350 };
      }
    • Readonlyqwen3-14b: {
          name: "Qwen3 14B";
          repo: "unsloth/Qwen3-14B-GGUF";
          file: "Qwen3-14B-Q4_K_M.gguf";
          parameters: "14B";
          quantization: "Q4_K_M";
          contextLength: 32768;
          languages: readonly [
              "en",
              "zh",
              "de",
              "fr",
              "es",
              "pt",
              "it",
              "nl",
              "pl",
              "ru",
              "ja",
              "ko",
          ];
          description: "Thinking mode, top multilingual, ~9GB RAM";
          thinkingMode: "qwen";
          benchmarks: { mmlu: 84; arena: 1380 };
      }
    • Readonlyqwen-2.5-coder-7b: {
          name: "Qwen 2.5 Coder 7B";
          repo: "bartowski/Qwen2.5-Coder-7B-Instruct-GGUF";
          file: "Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf";
          parameters: "7B";
          quantization: "Q4_K_M";
          contextLength: 131072;
          languages: readonly ["en"];
          description: "Optimized for code generation";
          benchmarks: { mmlu: 66; arena: 1250 };
      }
    • Readonlydeepseek-r1-7b: {
          name: "DeepSeek R1 Distill 7B";
          repo: "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF";
          file: "DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf";
          parameters: "7B";
          quantization: "Q4_K_M";
          contextLength: 131072;
          languages: readonly ["en", "zh"];
          description: "Strong reasoning with chain-of-thought";
          thinkingMode: "deepseek";
          benchmarks: { mmlu: 72; arena: 1300 };
      }
    • Readonlydeepseek-r1-14b: {
          name: "DeepSeek R1 Distill 14B";
          repo: "bartowski/DeepSeek-R1-Distill-Qwen-14B-GGUF";
          file: "DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf";
          parameters: "14B";
          quantization: "Q4_K_M";
          contextLength: 131072;
          languages: readonly ["en", "zh"];
          description: "Best reasoning model, shows thinking";
          thinkingMode: "deepseek";
          benchmarks: { mmlu: 79; arena: 1350 };
      }