llama.cpp
.devops
nix
apps.nix devshells.nix docker.nix jetson-support.nix nixpkgs-instances.nix package-gguf-py.nix package.nix python-scripts.nix scope.nix sif.nix.github
ISSUE_TEMPLATE
010-bug-compilation.yml 011-bug-results.yml 019-bug-misc.yml 020-enhancement.yml 030-research.yml 040-refactor.yml config.ymlworkflows
bench.yml.disabled build-cache.yml build-cmake-pkg.yml build-linux-cross.yml build.yml check-vendor.yml close-issue.yml copilot-setup-steps.yml docker.yml editorconfig.yml gguf-publish.yml labeler.yml pre-tokenizer-hashes.yml python-check-requirements.yml python-lint.yml python-type-check.yml release.yml server-metal.yml server-webui.yml server.yml update-ops-docs.yml winget.ymlbenches
cmake
arm64-apple-clang.cmake arm64-windows-llvm.cmake build-info.cmake common.cmake download-models.cmake git-vars.cmake license.cmake llama-config.cmake.in llama.pc.in riscv64-spacemit-linux-gnu-gcc.cmake x64-windows-llvm.cmakecommon
jinja
README.md caps.cpp caps.h lexer.cpp lexer.h parser.cpp parser.h runtime.cpp runtime.h string.cpp string.h utils.h value.cpp value.hdocs
multimodal
MobileVLM.md gemma3.md glmedge.md granitevision.md llava.md minicpmo2.6.md minicpmo4.0.md minicpmv2.5.md minicpmv2.6.md minicpmv4.0.md minicpmv4.5.mdops
BLAS.csv CANN.csv CPU.csv CUDA.csv Metal.csv OpenCL.csv SYCL.csv Vulkan.csv WebGPU.csv ZenDNN.csv zDNN.csvexamples
llama.android
app
src
lib
.gitignore build.gradle.kts consumer-rules.pro proguard-rules.promodel-conversion
scripts
causal
compare-embeddings-logits.sh compare-logits.py convert-model.sh modelcard.template run-casual-gen-embeddings-org.py run-converted-model-embeddings-logits.sh run-converted-model.sh run-org-model.pyembedding
compare-embeddings-logits.sh convert-model.sh modelcard.template run-converted-model.sh run-original-model.pyutils
__init__.py check-nmse.py common.py compare_tokens.py create-collection-add-model.sh curl-embedding-server.sh hf-add-model-to-collection.py hf-create-collection.py hf-create-model.py hf-upload-gguf-model.py inspect-converted-model.sh inspect-org-model.py perplexity-gen.sh perplexity-run-simple.sh perplexity-run.sh quantize.sh run-embedding-server.sh semantic_check.py tensor-info.pysycl
CMakeLists.txt README.md build.sh ls-sycl-device.cpp run-llama2.sh test.sh win-build-sycl.bat win-run-llama2.bat win-test.batggml
include
ggml-alloc.h ggml-backend.h ggml-blas.h ggml-cann.h ggml-cpp.h ggml-cpu.h ggml-cuda.h ggml-hexagon.h ggml-metal.h ggml-opencl.h ggml-opt.h ggml-rpc.h ggml-sycl.h ggml-virtgpu.h ggml-vulkan.h ggml-webgpu.h ggml-zdnn.h ggml-zendnn.h ggml.h gguf.hsrc
ggml-cann
CMakeLists.txt acl_tensor.cpp acl_tensor.h aclnn_ops.cpp aclnn_ops.h common.h ggml-cann.cppggml-cpu
CMakeLists.txt arch-fallback.h binary-ops.cpp binary-ops.h common.h ggml-cpu-impl.h ggml-cpu.c ggml-cpu.cpp hbm.cpp hbm.h ops.cpp ops.h quants.c quants.h repack.cpp repack.h simd-mappings.h traits.cpp traits.h unary-ops.cpp unary-ops.h vec.cpp vec.hggml-cuda
template-instances
fattn-mma-f16-instance-ncols1_1-ncols2_16.cu fattn-mma-f16-instance-ncols1_1-ncols2_32.cu fattn-mma-f16-instance-ncols1_1-ncols2_8.cu fattn-mma-f16-instance-ncols1_16-ncols2_1.cu fattn-mma-f16-instance-ncols1_16-ncols2_2.cu fattn-mma-f16-instance-ncols1_16-ncols2_4.cu fattn-mma-f16-instance-ncols1_2-ncols2_16.cu fattn-mma-f16-instance-ncols1_2-ncols2_32.cu fattn-mma-f16-instance-ncols1_2-ncols2_4.cu fattn-mma-f16-instance-ncols1_2-ncols2_8.cu fattn-mma-f16-instance-ncols1_32-ncols2_1.cu fattn-mma-f16-instance-ncols1_32-ncols2_2.cu fattn-mma-f16-instance-ncols1_4-ncols2_16.cu fattn-mma-f16-instance-ncols1_4-ncols2_2.cu fattn-mma-f16-instance-ncols1_4-ncols2_4.cu fattn-mma-f16-instance-ncols1_4-ncols2_8.cu fattn-mma-f16-instance-ncols1_64-ncols2_1.cu fattn-mma-f16-instance-ncols1_8-ncols2_1.cu fattn-mma-f16-instance-ncols1_8-ncols2_2.cu fattn-mma-f16-instance-ncols1_8-ncols2_4.cu fattn-mma-f16-instance-ncols1_8-ncols2_8.cu fattn-tile-instance-dkq112-dv112.cu fattn-tile-instance-dkq128-dv128.cu fattn-tile-instance-dkq256-dv256.cu fattn-tile-instance-dkq40-dv40.cu fattn-tile-instance-dkq576-dv512.cu fattn-tile-instance-dkq64-dv64.cu fattn-tile-instance-dkq72-dv72.cu fattn-tile-instance-dkq80-dv80.cu fattn-tile-instance-dkq96-dv96.cu fattn-vec-instance-f16-f16.cu fattn-vec-instance-f16-q4_0.cu fattn-vec-instance-f16-q4_1.cu fattn-vec-instance-f16-q5_0.cu fattn-vec-instance-f16-q5_1.cu fattn-vec-instance-f16-q8_0.cu fattn-vec-instance-q4_0-f16.cu fattn-vec-instance-q4_0-q4_0.cu fattn-vec-instance-q4_0-q4_1.cu fattn-vec-instance-q4_0-q5_0.cu fattn-vec-instance-q4_0-q5_1.cu fattn-vec-instance-q4_0-q8_0.cu fattn-vec-instance-q4_1-f16.cu fattn-vec-instance-q4_1-q4_0.cu fattn-vec-instance-q4_1-q4_1.cu fattn-vec-instance-q4_1-q5_0.cu fattn-vec-instance-q4_1-q5_1.cu fattn-vec-instance-q4_1-q8_0.cu fattn-vec-instance-q5_0-f16.cu fattn-vec-instance-q5_0-q4_0.cu fattn-vec-instance-q5_0-q4_1.cu fattn-vec-instance-q5_0-q5_0.cu fattn-vec-instance-q5_0-q5_1.cu fattn-vec-instance-q5_0-q8_0.cu fattn-vec-instance-q5_1-f16.cu fattn-vec-instance-q5_1-q4_0.cu fattn-vec-instance-q5_1-q4_1.cu fattn-vec-instance-q5_1-q5_0.cu fattn-vec-instance-q5_1-q5_1.cu fattn-vec-instance-q5_1-q8_0.cu fattn-vec-instance-q8_0-f16.cu fattn-vec-instance-q8_0-q4_0.cu fattn-vec-instance-q8_0-q4_1.cu fattn-vec-instance-q8_0-q5_0.cu fattn-vec-instance-q8_0-q5_1.cu fattn-vec-instance-q8_0-q8_0.cu generate_cu_files.py mmf-instance-ncols_1.cu mmf-instance-ncols_10.cu mmf-instance-ncols_11.cu mmf-instance-ncols_12.cu mmf-instance-ncols_13.cu mmf-instance-ncols_14.cu mmf-instance-ncols_15.cu mmf-instance-ncols_16.cu mmf-instance-ncols_2.cu mmf-instance-ncols_3.cu mmf-instance-ncols_4.cu mmf-instance-ncols_5.cu mmf-instance-ncols_6.cu mmf-instance-ncols_7.cu mmf-instance-ncols_8.cu mmf-instance-ncols_9.cu mmq-instance-iq1_s.cu mmq-instance-iq2_s.cu mmq-instance-iq2_xs.cu mmq-instance-iq2_xxs.cu mmq-instance-iq3_s.cu mmq-instance-iq3_xxs.cu mmq-instance-iq4_nl.cu mmq-instance-iq4_xs.cu mmq-instance-mxfp4.cu mmq-instance-q2_k.cu mmq-instance-q3_k.cu mmq-instance-q4_0.cu mmq-instance-q4_1.cu mmq-instance-q4_k.cu mmq-instance-q5_0.cu mmq-instance-q5_1.cu mmq-instance-q5_k.cu mmq-instance-q6_k.cu mmq-instance-q8_0.cuggml-hexagon
htp
CMakeLists.txt act-ops.c argsort-ops.c binary-ops.c cmake-toolchain.cmake cpy-ops.c flash-attn-ops.c get-rows-ops.c hex-dma.c hex-dma.h hex-dump.h hex-fastdiv.h hex-utils.h htp-ctx.h htp-msg.h htp-ops.h htp_iface.idl hvx-arith.h hvx-base.h hvx-copy.h hvx-div.h hvx-dump.h hvx-exp.h hvx-floor.h hvx-inverse.h hvx-reduce.h hvx-scale.h hvx-sigmoid.h hvx-sqrt.h hvx-types.h hvx-utils.h main.c matmul-ops.c rope-ops.c set-rows-ops.c softmax-ops.c sum-rows-ops.c unary-ops.c worker-pool.c worker-pool.hggml-metal
CMakeLists.txt ggml-metal-common.cpp ggml-metal-common.h ggml-metal-context.h ggml-metal-context.m ggml-metal-device.cpp ggml-metal-device.h ggml-metal-device.m ggml-metal-impl.h ggml-metal-ops.cpp ggml-metal-ops.h ggml-metal.cpp ggml-metal.metalggml-opencl
kernels
add.cl add_id.cl argsort.cl clamp.cl concat.cl conv2d.cl conv2d_f16_f32.cl cpy.cl cvt.cl diag_mask_inf.cl div.cl embed_kernel.py expm1.cl fill.cl flash_attn_f16.cl flash_attn_f32.cl flash_attn_f32_f16.cl gelu.cl gemm_moe_mxfp4_f32.cl gemv_moe_mxfp4_f32.cl gemv_noshuffle.cl gemv_noshuffle_general.cl gemv_noshuffle_general_q8_0_f32.cl get_rows.cl glu.cl group_norm.cl im2col_f16.cl im2col_f32.cl mean.cl mul.cl mul_mat_Ab_Bi_8x4.cl mul_mat_f16_f32.cl mul_mm_f16_f32_kq_kqv.cl mul_mm_f16_f32_l4_lm.cl mul_mm_f32_f32_l4_lm.cl mul_mm_q6_k_f32_l4_lm.cl mul_mm_q8_0_f32_8x4.cl mul_mm_q8_0_f32_l4_lm.cl mul_mv_f16_f16.cl mul_mv_f16_f32.cl mul_mv_f16_f32_1row.cl mul_mv_f16_f32_l4.cl mul_mv_f32_f32.cl mul_mv_id_mxfp4_f32.cl mul_mv_id_mxfp4_f32_flat.cl mul_mv_id_q4_0_f32_8x_flat.cl mul_mv_id_q8_0_f32.cl mul_mv_id_q8_0_f32_flat.cl mul_mv_mxfp4_f32.cl mul_mv_mxfp4_f32_flat.cl mul_mv_q4_0_f32.cl mul_mv_q4_0_f32_1d_16x_flat.cl mul_mv_q4_0_f32_1d_8x_flat.cl mul_mv_q4_0_f32_8x_flat.cl mul_mv_q4_0_f32_v.cl mul_mv_q4_k_f32.cl mul_mv_q6_k_f32.cl mul_mv_q6_k_f32_flat.cl mul_mv_q8_0_f32.cl mul_mv_q8_0_f32_flat.cl norm.cl pad.cl relu.cl repeat.cl rms_norm.cl rope.cl scale.cl set_rows.cl sigmoid.cl silu.cl softmax_4_f16.cl softmax_4_f32.cl softmax_f16.cl softmax_f32.cl softplus.cl solve_tri.cl sqr.cl sqrt.cl ssm_conv.cl sub.cl sum_rows.cl tanh.cl transpose.cl tri.cl tsembd.cl upscale.clggml-sycl
CMakeLists.txt add-id.cpp add-id.hpp backend.hpp binbcast.cpp binbcast.hpp common.cpp common.hpp concat.cpp concat.hpp conv.cpp conv.hpp convert.cpp convert.hpp count-equal.cpp count-equal.hpp cpy.cpp cpy.hpp dequantize.hpp dmmv.cpp dmmv.hpp element_wise.cpp element_wise.hpp gemm.hpp getrows.cpp getrows.hpp ggml-sycl.cpp gla.cpp gla.hpp im2col.cpp im2col.hpp mmq.cpp mmq.hpp mmvq.cpp mmvq.hpp norm.cpp norm.hpp outprod.cpp outprod.hpp pad.cpp pad.hpp pad_reflect_1d.cpp pad_reflect_1d.hpp presets.hpp quantize.hpp quants.hpp repeat_back.cpp repeat_back.hpp roll.cpp roll.hpp rope.cpp rope.hpp set.cpp set.hpp set_rows.cpp set_rows.hpp softmax.cpp softmax.hpp ssm_conv.cpp ssm_conv.hpp sycl_hw.cpp sycl_hw.hpp tsembd.cpp tsembd.hpp vecdotq.hpp wkv.cpp wkv.hppggml-virtgpu
backend
CMakeLists.txt apir_cs_ggml-rpc-back.cpp backend-convert.h backend-dispatched-backend.cpp backend-dispatched-buffer-type.cpp backend-dispatched-buffer.cpp backend-dispatched-device.cpp backend-dispatched.cpp backend-dispatched.gen.h backend-dispatched.h backend-virgl-apir.h backend.cppggml-vulkan
vulkan-shaders
CMakeLists.txt abs.comp acc.comp add.comp add1.comp add_id.comp arange.comp argmax.comp argsort.comp argsort_large.comp ceil.comp clamp.comp concat.comp contig_copy.comp conv2d_dw.comp conv2d_mm.comp conv_transpose_1d.comp copy.comp copy_from_quant.comp copy_to_quant.comp copy_transpose.comp cos.comp count_equal.comp count_experts.comp cumsum.comp cumsum_multipass1.comp cumsum_multipass2.comp dequant_f32.comp dequant_funcs.glsl dequant_funcs_cm2.glsl dequant_head.glsl dequant_iq1_m.comp dequant_iq1_s.comp dequant_iq2_s.comp dequant_iq2_xs.comp dequant_iq2_xxs.comp dequant_iq3_s.comp dequant_iq3_xxs.comp dequant_iq4_nl.comp dequant_iq4_xs.comp dequant_mxfp4.comp dequant_q2_k.comp dequant_q3_k.comp dequant_q4_0.comp dequant_q4_1.comp dequant_q4_k.comp dequant_q5_0.comp dequant_q5_1.comp dequant_q5_k.comp dequant_q6_k.comp dequant_q8_0.comp diag.comp diag_mask_inf.comp div.comp exp.comp fill.comp flash_attn.comp flash_attn_base.glsl flash_attn_cm1.comp flash_attn_cm2.comp flash_attn_mask_opt.comp flash_attn_split_k_reduce.comp floor.comp geglu.comp geglu_erf.comp geglu_quick.comp gelu.comp gelu_erf.comp gelu_quick.comp generic_binary_head.glsl generic_head.glsl generic_unary_head.glsl get_rows.comp get_rows_quant.comp glu_head.glsl glu_main.glsl group_norm.comp hardsigmoid.comp hardswish.comp im2col.comp im2col_3d.comp l2_norm.comp leaky_relu.comp log.comp mul.comp mul_mat_split_k_reduce.comp mul_mat_vec.comp mul_mat_vec_base.glsl mul_mat_vec_iface.glsl mul_mat_vec_iq1_m.comp mul_mat_vec_iq1_s.comp mul_mat_vec_iq2_s.comp mul_mat_vec_iq2_xs.comp mul_mat_vec_iq2_xxs.comp mul_mat_vec_iq3_s.comp mul_mat_vec_iq3_xxs.comp mul_mat_vec_nc.comp mul_mat_vec_p021.comp mul_mat_vec_q2_k.comp mul_mat_vec_q3_k.comp mul_mat_vec_q4_k.comp mul_mat_vec_q5_k.comp mul_mat_vec_q6_k.comp mul_mat_vecq.comp mul_mat_vecq_funcs.glsl mul_mm.comp mul_mm_cm2.comp mul_mm_funcs.glsl mul_mm_id_funcs.glsl mul_mmq.comp mul_mmq_funcs.glsl mul_mmq_shmem_types.glsl multi_add.comp neg.comp norm.comp opt_step_adamw.comp opt_step_sgd.comp pad.comp pool2d.comp quantize_q8_1.comp reglu.comp relu.comp repeat.comp repeat_back.comp rms_norm.comp rms_norm_back.comp rms_norm_partials.comp roll.comp rope_funcs.glsl rope_head.glsl rope_multi.comp rope_neox.comp rope_norm.comp rope_params.glsl rope_vision.comp round.comp rte.glsl scale.comp sigmoid.comp silu.comp silu_back.comp sin.comp soft_max.comp soft_max_back.comp soft_max_large1.comp soft_max_large2.comp soft_max_large3.comp soft_max_large_common.glsl softplus.comp solve_tri.comp sqrt.comp square.comp ssm_conv.comp ssm_scan.comp step.comp sub.comp sum_rows.comp sum_rows.glsl swiglu.comp swiglu_oai.comp tanh.comp timestep_embedding.comp topk_argsort.comp topk_moe.comp topk_nary_search.comp tri.comp trunc.comp types.glsl upscale.comp utils.glsl vulkan-shaders-gen.cpp wkv6.comp wkv7.comp xielu.compggml-webgpu
wgsl-shaders
argmax.wgsl argsort.wgsl argsort_merge.wgsl binary.wgsl common_decls.tmpl cpy.tmpl.wgsl cumsum.wgsl embed_wgsl.py flash_attn.wgsl get_rows.tmpl.wgsl glu.tmpl.wgsl memset.wgsl mul_mat.tmpl.wgsl mul_mat_decls.tmpl mul_mat_reg_tile.tmpl.wgsl mul_mat_subgroup_matrix.tmpl.wgsl mul_mat_vec.tmpl.wgsl pad.wgsl rms_norm.wgsl rope.tmpl.wgsl scale.tmpl.wgsl set_rows.wgsl soft_max.tmpl.wgsl sum_rows.wgsl unary.wgslgguf-py
gguf
scripts
gguf_convert_endian.py gguf_dump.py gguf_editor_gui.py gguf_hash.py gguf_new_metadata.py gguf_set_metadata.pygrammars
README.md arithmetic.gbnf c.gbnf chess.gbnf english.gbnf japanese.gbnf json.gbnf json_arr.gbnf list.gbnfmedia
llama0-banner.png llama0-logo.png llama1-banner.png llama1-icon-transparent.png llama1-icon-transparent.svg llama1-icon.png llama1-icon.svg llama1-logo.png llama1-logo.svg matmul.png matmul.svgmodels
templates
Apertus-8B-Instruct.jinja ByteDance-Seed-OSS.jinja CohereForAI-c4ai-command-r-plus-tool_use.jinja CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja GLM-4.6.jinja Kimi-K2-Instruct.jinja Kimi-K2-Thinking.jinja MiMo-VL.jinja MiniMax-M2.jinja Mistral-Small-3.2-24B-Instruct-2506.jinja NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja NVIDIA-Nemotron-Nano-v2.jinja NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja Qwen-QwQ-32B.jinja Qwen-Qwen2.5-7B-Instruct.jinja Qwen-Qwen3-0.6B.jinja Qwen3-Coder.jinja README.md deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja deepseek-ai-DeepSeek-V3.1.jinja fireworks-ai-llama-3-firefunction-v2.jinja google-gemma-2-2b-it.jinja ibm-granite-granite-3.3-2B-Instruct.jinja llama-cpp-deepseek-r1.jinja llama-cpp-lfm2.jinja llama-cpp-rwkv-world.jinja meetkai-functionary-medium-v3.1.jinja meetkai-functionary-medium-v3.2.jinja meta-llama-Llama-3.1-8B-Instruct.jinja meta-llama-Llama-3.2-3B-Instruct.jinja meta-llama-Llama-3.3-70B-Instruct.jinja microsoft-Phi-3.5-mini-instruct.jinja mistralai-Ministral-3-14B-Reasoning-2512.jinja mistralai-Mistral-Nemo-Instruct-2407.jinja moonshotai-Kimi-K2.jinja openai-gpt-oss-120b.jinja unsloth-Apriel-1.5.jinja unsloth-mistral-Devstral-Small-2507.jinja upstage-Solar-Open-100B.jinjarequirements
requirements-all.txt requirements-compare-llama-bench.txt requirements-convert_hf_to_gguf.txt requirements-convert_hf_to_gguf_update.txt requirements-convert_legacy_llama.txt requirements-convert_llama_ggml_to_gguf.txt requirements-convert_lora_to_gguf.txt requirements-gguf_editor_gui.txt requirements-pydantic.txt requirements-server-bench.txt requirements-test-tokenizer-random.txt requirements-tool_bench.txtscripts
bench-models.sh build-info.sh check-requirements.sh compare-commits.sh compare-llama-bench.py compare-logprobs.py create_ops_docs.py debug-test.sh fetch_server_test_models.py gen-authors.sh gen-unicode-data.py get-flags.mk get-hellaswag.sh get-pg.sh get-wikitext-103.sh get-wikitext-2.sh get-winogrande.sh get_chat_template.py hf.sh install-oneapi.bat pr2wt.sh serve-static.js server-bench.py sync-ggml-am.sh sync-ggml.last sync-ggml.sh sync_vendor.py tool_bench.py tool_bench.sh verify-checksum-models.py xxd.cmakesrc
models
afmoe.cpp apertus.cpp arcee.cpp arctic.cpp arwkv7.cpp baichuan.cpp bailingmoe.cpp bailingmoe2.cpp bert.cpp bitnet.cpp bloom.cpp chameleon.cpp chatglm.cpp codeshell.cpp cogvlm.cpp cohere2-iswa.cpp command-r.cpp dbrx.cpp deci.cpp deepseek.cpp deepseek2.cpp dots1.cpp dream.cpp ernie4-5-moe.cpp ernie4-5.cpp exaone-moe.cpp exaone.cpp exaone4.cpp falcon-h1.cpp falcon.cpp gemma-embedding.cpp gemma.cpp gemma2-iswa.cpp gemma3.cpp gemma3n-iswa.cpp glm4-moe.cpp glm4.cpp gpt2.cpp gptneox.cpp granite-hybrid.cpp granite.cpp graph-context-mamba.cpp grok.cpp grovemoe.cpp hunyuan-dense.cpp hunyuan-moe.cpp internlm2.cpp jais.cpp jamba.cpp kimi-linear.cpp lfm2.cpp llada-moe.cpp llada.cpp llama-iswa.cpp llama.cpp maincoder.cpp mamba.cpp mimo2-iswa.cpp minicpm3.cpp minimax-m2.cpp mistral3.cpp models.h modern-bert.cpp mpt.cpp nemotron-h.cpp nemotron.cpp neo-bert.cpp olmo.cpp olmo2.cpp olmoe.cpp openai-moe-iswa.cpp openelm.cpp orion.cpp pangu-embedded.cpp phi2.cpp phi3.cpp plamo.cpp plamo2.cpp plamo3.cpp plm.cpp qwen.cpp qwen2.cpp qwen2moe.cpp qwen2vl.cpp qwen3.cpp qwen35.cpp qwen35moe.cpp qwen3moe.cpp qwen3next.cpp qwen3vl-moe.cpp qwen3vl.cpp refact.cpp rnd1.cpp rwkv6-base.cpp rwkv6.cpp rwkv6qwen2.cpp rwkv7-base.cpp rwkv7.cpp seed-oss.cpp smallthinker.cpp smollm3.cpp stablelm.cpp starcoder.cpp starcoder2.cpp step35-iswa.cpp t5-dec.cpp t5-enc.cpp wavtokenizer-dec.cpp xverse.cpptests
peg-parser
simple-tokenize.cpp simple-tokenize.h test-basic.cpp test-gbnf-generation.cpp test-json-parser.cpp test-json-serialization.cpp test-unicode.cpp tests.htools
cvector-generator
CMakeLists.txt README.md completions.txt cvector-generator.cpp mean.hpp negative.txt pca.hpp positive.txtmtmd
legacy-models
convert_image_encoder_to_gguf.py glmedge-convert-image-encoder-to-gguf.py glmedge-surgery.py llava_surgery.py llava_surgery_v2.py minicpmv-convert-image-encoder-to-gguf.py minicpmv-surgery.pymodels
cogvlm.cpp conformer.cpp glm4v.cpp internvl.cpp kimik25.cpp kimivl.cpp llama4.cpp llava.cpp minicpmv.cpp mobilenetv5.cpp models.h pixtral.cpp qwen2vl.cpp qwen3vl.cpp siglip.cpp whisper-enc.cpp youtuvl.cppserver
public_legacy
colorthemes.css completion.js favicon.ico index-new.html index.html index.js json-schema-to-grammar.mjs loading.html prompt-formats.js style.css system-prompts.js theme-beeninorder.css theme-ketivah.css theme-mangotango.css theme-playground.css theme-polarnight.css theme-snowstorm.csspublic_simplechat
datautils.mjs index.html readme.md simplechat.css simplechat.js simplechat_screens.webp ui.mjstests
unit
test_basic.py test_chat_completion.py test_compat_anthropic.py test_compat_oai_responses.py test_completion.py test_ctx_shift.py test_embedding.py test_infill.py test_lora.py test_rerank.py test_router.py test_security.py test_sleep.py test_slot_save.py test_speculative.py test_template.py test_tokenize.py test_tool_call.py test_vision_api.pywebui
.storybook
ModeWatcherDecorator.svelte TooltipProviderDecorator.svelte main.ts preview.ts vitest.setup.tssrc
lib
components
app
chat
ChatAttachments
ChatAttachmentPreview.svelte ChatAttachmentThumbnailFile.svelte ChatAttachmentThumbnailImage.svelte ChatAttachmentsList.svelte ChatAttachmentsViewAll.svelteChatForm
ChatFormActions
ChatFormActionFileAttachments.svelte ChatFormActionRecord.svelte ChatFormActionSubmit.svelte ChatFormActions.svelteChatMessages
ChatMessage.svelte ChatMessageActions.svelte ChatMessageAssistant.svelte ChatMessageBranchingControls.svelte ChatMessageEditForm.svelte ChatMessageStatistics.svelte ChatMessageSystem.svelte ChatMessageThinkingBlock.svelte ChatMessageUser.svelte ChatMessages.svelteChatScreen
ChatScreen.svelte ChatScreenDragOverlay.svelte ChatScreenHeader.svelte ChatScreenProcessingInfo.sveltedialogs
DialogChatAttachmentPreview.svelte DialogChatAttachmentsViewAll.svelte DialogChatError.svelte DialogChatSettings.svelte DialogConfirmation.svelte DialogConversationSelection.svelte DialogConversationTitleUpdate.svelte DialogEmptyFileAlert.svelte DialogModelInformation.svelte DialogModelNotAvailable.sveltemisc
ActionButton.svelte ActionDropdown.svelte BadgeChatStatistic.svelte BadgeInfo.svelte BadgeModality.svelte CodePreviewDialog.svelte ConversationSelection.svelte CopyToClipboardIcon.svelte KeyboardShortcutInfo.svelte MarkdownContent.svelte RemoveButton.svelte SearchInput.svelte SyntaxHighlightedCode.svelteui
alert-dialog
alert-dialog-action.svelte alert-dialog-cancel.svelte alert-dialog-content.svelte alert-dialog-description.svelte alert-dialog-footer.svelte alert-dialog-header.svelte alert-dialog-overlay.svelte alert-dialog-title.svelte alert-dialog-trigger.svelte index.tscard
card-action.svelte card-content.svelte card-description.svelte card-footer.svelte card-header.svelte card-title.svelte card.svelte index.tsdialog
dialog-close.svelte dialog-content.svelte dialog-description.svelte dialog-footer.svelte dialog-header.svelte dialog-overlay.svelte dialog-title.svelte dialog-trigger.svelte index.tsdropdown-menu
dropdown-menu-checkbox-item.svelte dropdown-menu-content.svelte dropdown-menu-group-heading.svelte dropdown-menu-group.svelte dropdown-menu-item.svelte dropdown-menu-label.svelte dropdown-menu-radio-group.svelte dropdown-menu-radio-item.svelte dropdown-menu-separator.svelte dropdown-menu-shortcut.svelte dropdown-menu-sub-content.svelte dropdown-menu-sub-trigger.svelte dropdown-menu-trigger.svelte index.tspopover
index.ts popover-close.svelte popover-content.svelte popover-portal.svelte popover-trigger.svelte popover.svelteselect
index.ts select-content.svelte select-group-heading.svelte select-group.svelte select-item.svelte select-label.svelte select-scroll-down-button.svelte select-scroll-up-button.svelte select-separator.svelte select-trigger.sveltesheet
index.ts sheet-close.svelte sheet-content.svelte sheet-description.svelte sheet-footer.svelte sheet-header.svelte sheet-overlay.svelte sheet-title.svelte sheet-trigger.sveltesidebar
constants.ts context.svelte.ts index.ts sidebar-content.svelte sidebar-footer.svelte sidebar-group-action.svelte sidebar-group-content.svelte sidebar-group-label.svelte sidebar-group.svelte sidebar-header.svelte sidebar-input.svelte sidebar-inset.svelte sidebar-menu-action.svelte sidebar-menu-badge.svelte sidebar-menu-button.svelte sidebar-menu-item.svelte sidebar-menu-skeleton.svelte sidebar-menu-sub-button.svelte sidebar-menu-sub-item.svelte sidebar-menu-sub.svelte sidebar-menu.svelte sidebar-provider.svelte sidebar-rail.svelte sidebar-separator.svelte sidebar-trigger.svelte sidebar.sveltetable
index.ts table-body.svelte table-caption.svelte table-cell.svelte table-footer.svelte table-head.svelte table-header.svelte table-row.svelte table.svelteconstants
auto-scroll.ts binary-detection.ts default-context.ts floating-ui-constraints.ts icons.ts input-classes.ts latex-protection.ts literal-html.ts localstorage-keys.ts max-bundle-size.ts precision.ts processing-info.ts settings-config.ts supported-file-types.ts table-html-restorer.ts tooltip-config.ts viewport.tsstores
chat.svelte.ts conversations.svelte.ts models.svelte.ts persisted.svelte.ts server.svelte.ts settings.svelte.tsutils
api-headers.ts api-key-validation.ts attachment-display.ts attachment-type.ts audio-recording.ts autoresize-textarea.ts branching.ts browser-only.ts clipboard.ts config-helpers.ts conversation-utils.ts convert-files-to-extra.ts file-preview.ts file-type.ts formatters.ts index.ts is-ime-composing.ts latex-protection.ts modality-file-validation.ts model-names.ts pdf-processing.ts portal-to-body.ts precision.ts process-uploaded-files.ts svg-to-png.ts syntax-highlight-language.ts text-files.ts text.ts webp-to-png.tstests
llama.cpp/tools/server/themes/buttons-top/index.html
raw
1<html>
2
3<head>
4 <meta charset="UTF-8">
5 <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1" />
6 <meta name="color-scheme" content="light dark">
7 <title>llama.cpp - chat</title>
8
9 <style>
10 body {
11 font-family: system-ui;
12 font-size: 90%;
13 }
14
15 #container {
16 margin: 0em auto;
17 display: flex;
18 flex-direction: column;
19 justify-content: space-between;
20 height: 100%;
21 }
22
23 main {
24 margin: 3px;
25 display: flex;
26 flex-direction: column;
27 justify-content: space-between;
28 gap: 1em;
29
30 flex-grow: 1;
31 overflow-y: auto;
32
33 border: 1px solid #ccc;
34 border-radius: 5px;
35 padding: 0.5em;
36 }
37
38 body {
39 max-width: 600px;
40 min-width: 300px;
41 line-height: 1.2;
42 margin: 0 auto;
43 padding: 0 0.5em;
44 }
45
46 p {
47 overflow-wrap: break-word;
48 word-wrap: break-word;
49 hyphens: auto;
50 margin-top: 0.5em;
51 margin-bottom: 0.5em;
52 }
53
54 #write form {
55 margin: 1em 0 0 0;
56 display: flex;
57 flex-direction: column;
58 gap: 0.5em;
59 align-items: stretch;
60 }
61
62 .right {
63 display: flex;
64 flex-direction: row;
65 gap: 0.5em;
66 justify-content: flex-end;
67 }
68
69 fieldset {
70 border: none;
71 padding: 0;
72 margin: 0;
73 }
74
75 fieldset.two {
76 display: grid;
77 grid-template: "a a";
78 gap: 1em;
79 }
80
81 fieldset.three {
82 display: grid;
83 grid-template: "a a a";
84 gap: 1em;
85 }
86
87 details {
88 border: 1px solid #aaa;
89 border-radius: 4px;
90 padding: 0.5em 0.5em 0;
91 margin-top: 0.5em;
92 }
93
94 summary {
95 font-weight: bold;
96 margin: -0.5em -0.5em 0;
97 padding: 0.5em;
98 cursor: pointer;
99 }
100
101 details[open] {
102 padding: 0.5em;
103 }
104
105 .prob-set {
106 padding: 0.3em;
107 border-bottom: 1px solid #ccc;
108 }
109
110 .popover-content {
111 position: absolute;
112 background-color: white;
113 padding: 0.2em;
114 box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
115 }
116
117 textarea {
118 padding: 5px;
119 flex-grow: 1;
120 width: 100%;
121 }
122
123 pre code {
124 display: block;
125 background-color: #222;
126 color: #ddd;
127 }
128
129 code {
130 font-family: monospace;
131 padding: 0.1em 0.3em;
132 border-radius: 3px;
133 }
134
135 fieldset label {
136 margin: 0.5em 0;
137 display: block;
138 }
139
140 fieldset label.slim {
141 margin: 0 0.5em;
142 display: inline;
143 }
144
145 header,
146 footer {
147 text-align: center;
148 }
149
150 footer {
151 font-size: 80%;
152 color: #888;
153 }
154
155 .mode-chat textarea[name=prompt] {
156 height: 4.5em;
157 }
158
159 .mode-completion textarea[name=prompt] {
160 height: 10em;
161 }
162
163 [contenteditable] {
164 display: inline-block;
165 white-space: pre-wrap;
166 outline: 0px solid transparent;
167 }
168
169 @keyframes loading-bg-wipe {
170 0% {
171 background-position: 0%;
172 }
173
174 100% {
175 background-position: 100%;
176 }
177 }
178
179 .loading {
180 --loading-color-1: #eeeeee00;
181 --loading-color-2: #eeeeeeff;
182 background-size: 50% 100%;
183 background-image: linear-gradient(90deg, var(--loading-color-1), var(--loading-color-2), var(--loading-color-1));
184 animation: loading-bg-wipe 2s linear infinite;
185 }
186
187 @media (prefers-color-scheme: dark) {
188 .loading {
189 --loading-color-1: #22222200;
190 --loading-color-2: #222222ff;
191 }
192
193 .popover-content {
194 background-color: black;
195 }
196 }
197 </style>
198
199 <script type="module">
200 import {
201 html, h, signal, effect, computed, render, useSignal, useEffect, useRef, Component
202 } from './index.js';
203
204 import { llama } from './completion.js';
205 import { SchemaConverter } from './json-schema-to-grammar.mjs';
206 let selected_image = false;
207 var slot_id = -1;
208
209 const session = signal({
210 prompt: "This is a conversation between User and Llama, a friendly chatbot. Llama is helpful, kind, honest, good at writing, and never fails to answer any requests immediately and with precision.",
211 template: "{{prompt}}\n\n{{history}}\n{{char}}:",
212 historyTemplate: "{{name}}: {{message}}",
213 transcript: [],
214 type: "chat", // "chat" | "completion"
215 char: "Llama",
216 user: "User",
217 image_selected: ''
218 })
219
220 const params = signal({
221 n_predict: 400,
222 temperature: 0.7,
223 repeat_last_n: 256, // 0 = disable penalty, -1 = context size
224 repeat_penalty: 1.18, // 1.0 = disabled
225 top_k: 40, // <= 0 to use vocab size
226 top_p: 0.95, // 1.0 = disabled
227 min_p: 0.05, // 0 = disabled
228 typical_p: 1.0, // 1.0 = disabled
229 presence_penalty: 0.0, // 0.0 = disabled
230 frequency_penalty: 0.0, // 0.0 = disabled
231 mirostat: 0, // 0/1/2
232 mirostat_tau: 5, // target entropy
233 mirostat_eta: 0.1, // learning rate
234 grammar: '',
235 n_probs: 0, // no completion_probabilities,
236 min_keep: 0, // min probs from each sampler,
237 image_data: [],
238 cache_prompt: true,
239 api_key: ''
240 })
241
242 /* START: Support for storing prompt templates and parameters in browsers LocalStorage */
243
244 const local_storage_storageKey = "llamacpp_server_local_storage";
245
246 function local_storage_setDataFromObject(tag, content) {
247 localStorage.setItem(local_storage_storageKey + '/' + tag, JSON.stringify(content));
248 }
249
250 function local_storage_setDataFromRawText(tag, content) {
251 localStorage.setItem(local_storage_storageKey + '/' + tag, content);
252 }
253
254 function local_storage_getDataAsObject(tag) {
255 const item = localStorage.getItem(local_storage_storageKey + '/' + tag);
256 if (!item) {
257 return null;
258 } else {
259 return JSON.parse(item);
260 }
261 }
262
263 function local_storage_getDataAsRawText(tag) {
264 const item = localStorage.getItem(local_storage_storageKey + '/' + tag);
265 if (!item) {
266 return null;
267 } else {
268 return item;
269 }
270 }
271
272 // create a container for user templates and settings
273
274 const savedUserTemplates = signal({})
275 const selectedUserTemplate = signal({ name: '', template: { session: {}, params: {} } })
276
277 // let's import locally saved templates and settings if there are any
278 // user templates and settings are stored in one object
279 // in form of { "templatename": "templatedata" } and { "settingstemplatename":"settingsdata" }
280
281 console.log('Importing saved templates')
282
283 let importedTemplates = local_storage_getDataAsObject('user_templates')
284
285 if (importedTemplates) {
286 // saved templates were successfully imported.
287
288 console.log('Processing saved templates and updating default template')
289 params.value = { ...params.value, image_data: [] };
290
291 //console.log(importedTemplates);
292 savedUserTemplates.value = importedTemplates;
293
294 //override default template
295 savedUserTemplates.value.default = { session: session.value, params: params.value }
296 local_storage_setDataFromObject('user_templates', savedUserTemplates.value)
297 } else {
298 // no saved templates detected.
299
300 console.log('Initializing LocalStorage and saving default template')
301
302 savedUserTemplates.value = { "default": { session: session.value, params: params.value } }
303 local_storage_setDataFromObject('user_templates', savedUserTemplates.value)
304 }
305
306 function userTemplateResetToDefault() {
307 console.log('Resetting template to default')
308 selectedUserTemplate.value.name = 'default';
309 selectedUserTemplate.value.data = savedUserTemplates.value['default'];
310 }
311
312 function userTemplateApply(t) {
313 session.value = t.data.session;
314 session.value = { ...session.value, image_selected: '' };
315 params.value = t.data.params;
316 params.value = { ...params.value, image_data: [] };
317 }
318
319 function userTemplateResetToDefaultAndApply() {
320 userTemplateResetToDefault()
321 userTemplateApply(selectedUserTemplate.value)
322 }
323
324 function userTemplateLoadAndApplyAutosaved() {
325 // get autosaved last used template
326 let lastUsedTemplate = local_storage_getDataAsObject('user_templates_last')
327
328 if (lastUsedTemplate) {
329
330 console.log('Autosaved template found, restoring')
331
332 selectedUserTemplate.value = lastUsedTemplate
333 }
334 else {
335
336 console.log('No autosaved template found, using default template')
337 // no autosaved last used template was found, so load from default.
338
339 userTemplateResetToDefault()
340 }
341
342 console.log('Applying template')
343 // and update internal data from templates
344
345 userTemplateApply(selectedUserTemplate.value)
346 }
347
348 //console.log(savedUserTemplates.value)
349 //console.log(selectedUserTemplate.value)
350
351 function userTemplateAutosave() {
352 console.log('Template Autosave...')
353 if (selectedUserTemplate.value.name == 'default') {
354 // we don't want to save over default template, so let's create a new one
355 let newTemplateName = 'UserTemplate-' + Date.now().toString()
356 let newTemplate = { 'name': newTemplateName, 'data': { 'session': session.value, 'params': params.value } }
357
358 console.log('Saving as ' + newTemplateName)
359
360 // save in the autosave slot
361 local_storage_setDataFromObject('user_templates_last', newTemplate)
362
363 // and load it back and apply
364 userTemplateLoadAndApplyAutosaved()
365 } else {
366 local_storage_setDataFromObject('user_templates_last', { 'name': selectedUserTemplate.value.name, 'data': { 'session': session.value, 'params': params.value } })
367 }
368 }
369
370 console.log('Checking for autosaved last used template')
371 userTemplateLoadAndApplyAutosaved()
372
373 /* END: Support for storing prompt templates and parameters in browsers LocalStorage */
374
375 const llamaStats = signal(null)
376 const controller = signal(null)
377
378 // currently generating a completion?
379 const generating = computed(() => controller.value != null)
380
381 // has the user started a chat?
382 const chatStarted = computed(() => session.value.transcript.length > 0)
383
384 const transcriptUpdate = (transcript) => {
385 session.value = {
386 ...session.value,
387 transcript
388 }
389 }
390
391 // simple template replace
392 const template = (str, extraSettings) => {
393 let settings = session.value;
394 if (extraSettings) {
395 settings = { ...settings, ...extraSettings };
396 }
397 return String(str).replaceAll(/\{\{(.*?)\}\}/g, (_, key) => template(settings[key]));
398 }
399
400 async function runLlama(prompt, llamaParams, char) {
401 const currentMessages = [];
402 const history = session.value.transcript;
403 if (controller.value) {
404 throw new Error("already running");
405 }
406 controller.value = new AbortController();
407 for await (const chunk of llama(prompt, llamaParams, { controller: controller.value, api_url: location.pathname.replace(/\/+$/, '') })) {
408 const data = chunk.data;
409
410 if (data.stop) {
411 while (
412 currentMessages.length > 0 &&
413 currentMessages[currentMessages.length - 1].content.match(/\n$/) != null
414 ) {
415 currentMessages.pop();
416 }
417 transcriptUpdate([...history, [char, currentMessages]])
418 console.log("Completion finished: '", currentMessages.map(msg => msg.content).join(''), "', summary: ", data);
419 } else {
420 currentMessages.push(data);
421 slot_id = data.slot_id;
422 if (selected_image && !data.multimodal) {
423 alert("The server was not compiled for multimodal or the model projector can't be loaded.");
424 return;
425 }
426 transcriptUpdate([...history, [char, currentMessages]])
427 }
428
429 if (data.timings) {
430 llamaStats.value = data;
431 }
432 }
433
434 controller.value = null;
435 }
436
437 // send message to server
438 const chat = async (msg) => {
439 if (controller.value) {
440 console.log('already running...');
441 return;
442 }
443
444 transcriptUpdate([...session.value.transcript, ["{{user}}", msg]])
445
446 let prompt = template(session.value.template, {
447 message: msg,
448 history: session.value.transcript.flatMap(
449 ([name, data]) =>
450 template(
451 session.value.historyTemplate,
452 {
453 name,
454 message: Array.isArray(data) ?
455 data.map(msg => msg.content).join('').replace(/^\s/, '') :
456 data,
457 }
458 )
459 ).join("\n"),
460 });
461 if (selected_image) {
462 prompt = `A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\nUSER:[img-10]${msg}\nASSISTANT:`;
463 }
464 await runLlama(prompt, {
465 ...params.value,
466 slot_id: slot_id,
467 stop: ["</s>", template("{{char}}:"), template("{{user}}:")],
468 }, "{{char}}");
469 }
470
471 const runCompletion = () => {
472 if (controller.value) {
473 console.log('already running...');
474 return;
475 }
476 const { prompt } = session.value;
477 transcriptUpdate([...session.value.transcript, ["", prompt]]);
478 runLlama(prompt, {
479 ...params.value,
480 slot_id: slot_id,
481 stop: [],
482 }, "").finally(() => {
483 session.value.prompt = session.value.transcript.map(([_, data]) =>
484 Array.isArray(data) ? data.map(msg => msg.content).join('') : data
485 ).join('');
486 session.value.transcript = [];
487 })
488 }
489
490 const stop = (e) => {
491 e.preventDefault();
492 if (controller.value) {
493 controller.value.abort();
494 controller.value = null;
495 }
496 }
497
498 const reset = (e) => {
499 stop(e);
500 transcriptUpdate([]);
501 }
502
503 const uploadImage = (e) => {
504 e.preventDefault();
505 document.getElementById("fileInput").click();
506 document.getElementById("fileInput").addEventListener("change", function (event) {
507 const selectedFile = event.target.files[0];
508 if (selectedFile) {
509 const reader = new FileReader();
510 reader.onload = function () {
511 const image_data = reader.result;
512 session.value = { ...session.value, image_selected: image_data };
513 params.value = {
514 ...params.value, image_data: [
515 { data: image_data.replace(/data:image\/[^;]+;base64,/, ''), id: 10 }]
516 }
517 };
518 selected_image = true;
519 reader.readAsDataURL(selectedFile);
520 }
521 });
522 }
523
524 function MessageInput() {
525 const message = useSignal("")
526
527 const submit = (e) => {
528 stop(e);
529 chat(message.value);
530 message.value = "";
531 }
532
533 const enterSubmits = (event) => {
534 if (event.which === 13 && !event.shiftKey) {
535 submit(event);
536 }
537 }
538
539 return html`
540 <form onsubmit=${submit}>
541 <div>
542 <textarea
543 className=${generating.value ? "loading" : null}
544 oninput=${(e) => message.value = e.target.value}
545 onkeypress=${enterSubmits}
546 placeholder="Say something..."
547 rows=2
548 type="text"
549 value="${message}"
550 />
551 </div>
552 <div class="right">
553 <button type="submit" disabled=${generating.value}>Send</button>
554 <button onclick=${uploadImage}>Upload Image</button>
555 <button onclick=${stop} disabled=${!generating.value}>Stop</button>
556 <button onclick=${reset}>Reset</button>
557 </div>
558 </form>
559 `
560 }
561
562 function CompletionControls() {
563 const submit = (e) => {
564 stop(e);
565 runCompletion();
566 }
567 return html`
568 <div>
569 <button onclick=${submit} type="button" disabled=${generating.value}>Start</button>
570 <button onclick=${stop} disabled=${!generating.value}>Stop</button>
571 <button onclick=${reset}>Reset</button>
572 </div>`;
573 }
574
575 const ChatLog = (props) => {
576 const messages = session.value.transcript;
577 const container = useRef(null)
578
579 useEffect(() => {
580 // scroll to bottom (if needed)
581 const parent = container.current.parentElement;
582 if (parent && parent.scrollHeight <= parent.scrollTop + parent.offsetHeight + 300) {
583 parent.scrollTo(0, parent.scrollHeight)
584 }
585 }, [messages])
586
587 const isCompletionMode = session.value.type === 'completion'
588 const chatLine = ([user, data], index) => {
589 let message
590 const isArrayMessage = Array.isArray(data)
591 if (params.value.n_probs > 0 && isArrayMessage) {
592 message = html`<${Probabilities} data=${data} />`
593 } else {
594 const text = isArrayMessage ?
595 data.map(msg => msg.content).join('').replace(/^\s+/, '') :
596 data;
597 message = isCompletionMode ?
598 text :
599 html`<${Markdownish} text=${template(text)} />`
600 }
601 if (user) {
602 return html`<p key=${index}><strong>${template(user)}:</strong> ${message}</p>`
603 } else {
604 return isCompletionMode ?
605 html`<span key=${index}>${message}</span>` :
606 html`<p key=${index}>${message}</p>`
607 }
608 };
609
610 const handleCompletionEdit = (e) => {
611 session.value.prompt = e.target.innerText;
612 session.value.transcript = [];
613 }
614
615 return html`
616 <div id="chat" ref=${container} key=${messages.length}>
617 <img style="width: 60%;${!session.value.image_selected ? `display: none;` : ``}" src="${session.value.image_selected}"/>
618 <span contenteditable=${isCompletionMode} ref=${container} oninput=${handleCompletionEdit}>
619 ${messages.flatMap(chatLine)}
620 </span>
621 </div>`;
622 };
623
624 const ConfigForm = (props) => {
625 const updateSession = (el) => session.value = { ...session.value, [el.target.name]: el.target.value }
626 const updateParams = (el) => params.value = { ...params.value, [el.target.name]: el.target.value }
627 const updateParamsFloat = (el) => params.value = { ...params.value, [el.target.name]: parseFloat(el.target.value) }
628 const updateParamsInt = (el) => params.value = { ...params.value, [el.target.name]: Math.floor(parseFloat(el.target.value)) }
629 const updateParamsBool = (el) => params.value = { ...params.value, [el.target.name]: el.target.checked }
630
631 const grammarJsonSchemaPropOrder = signal('')
632 const updateGrammarJsonSchemaPropOrder = (el) => grammarJsonSchemaPropOrder.value = el.target.value
633 const convertJSONSchemaGrammar = async () => {
634 try {
635 let schema = JSON.parse(params.value.grammar)
636 const converter = new SchemaConverter({
637 prop_order: grammarJsonSchemaPropOrder.value
638 .split(',')
639 .reduce((acc, cur, i) => ({ ...acc, [cur.trim()]: i }), {}),
640 allow_fetch: true,
641 })
642 schema = await converter.resolveRefs(schema, 'input')
643 converter.visit(schema, '')
644 params.value = {
645 ...params.value,
646 grammar: converter.formatGrammar(),
647 }
648 } catch (e) {
649 alert(`Convert failed: ${e.message}`)
650 }
651 }
652
653 const FloatField = ({ label, max, min, name, step, value }) => {
654 return html`
655 <div>
656 <label for="${name}">${label}</label>
657 <input type="range" id="${name}" min="${min}" max="${max}" step="${step}" name="${name}" value="${value}" oninput=${updateParamsFloat} />
658 <span>${value}</span>
659 </div>
660 `
661 };
662
663 const IntField = ({ label, max, min, name, value }) => {
664 return html`
665 <div>
666 <label for="${name}">${label}</label>
667 <input type="range" id="${name}" min="${min}" max="${max}" name="${name}" value="${value}" oninput=${updateParamsInt} />
668 <span>${value}</span>
669 </div>
670 `
671 };
672
673 const BoolField = ({ label, name, value }) => {
674 return html`
675 <div>
676 <label for="${name}">${label}</label>
677 <input type="checkbox" id="${name}" name="${name}" checked="${value}" onclick=${updateParamsBool} />
678 </div>
679 `
680 };
681
682 const userTemplateReset = (e) => {
683 e.preventDefault();
684 userTemplateResetToDefaultAndApply()
685 }
686
687 const UserTemplateResetButton = () => {
688 if (selectedUserTemplate.value.name == 'default') {
689 return html`
690 <button disabled>Using default template</button>
691 `
692 }
693
694 return html`
695 <button onclick=${userTemplateReset}>Reset all to default</button>
696 `
697 };
698
699 useEffect(() => {
700 // autosave template on every change
701 userTemplateAutosave()
702 }, [session.value, params.value])
703
704 const GrammarControl = () => (
705 html`
706 <div>
707 <label for="template">Grammar</label>
708 <textarea id="grammar" name="grammar" placeholder="Use gbnf or JSON Schema+convert" value="${params.value.grammar}" rows=4 oninput=${updateParams}/>
709 <input type="text" name="prop-order" placeholder="order: prop1,prop2,prop3" oninput=${updateGrammarJsonSchemaPropOrder} />
710 <button type="button" onclick=${convertJSONSchemaGrammar}>Convert JSON Schema</button>
711 </div>
712 `
713 );
714
715 const PromptControlFieldSet = () => (
716 html`
717 <fieldset>
718 <div>
719 <label htmlFor="prompt">Prompt</label>
720 <textarea type="text" name="prompt" value="${session.value.prompt}" oninput=${updateSession}/>
721 </div>
722 </fieldset>
723 `
724 );
725
726 const ChatConfigForm = () => (
727 html`
728 ${PromptControlFieldSet()}
729
730 <fieldset class="two">
731 <div>
732 <label for="user">User name</label>
733 <input type="text" name="user" value="${session.value.user}" oninput=${updateSession} />
734 </div>
735
736 <div>
737 <label for="bot">Bot name</label>
738 <input type="text" name="char" value="${session.value.char}" oninput=${updateSession} />
739 </div>
740 </fieldset>
741
742 <fieldset>
743 <div>
744 <label for="template">Prompt template</label>
745 <textarea id="template" name="template" value="${session.value.template}" rows=4 oninput=${updateSession}/>
746 </div>
747
748 <div>
749 <label for="template">Chat history template</label>
750 <textarea id="template" name="historyTemplate" value="${session.value.historyTemplate}" rows=1 oninput=${updateSession}/>
751 </div>
752 ${GrammarControl()}
753 </fieldset>
754 `
755 );
756
757 const CompletionConfigForm = () => (
758 html`
759 ${PromptControlFieldSet()}
760 <fieldset>${GrammarControl()}</fieldset>
761 `
762 );
763
764 return html`
765 <form>
766 <fieldset class="two">
767 <${UserTemplateResetButton}/>
768 <div>
769 <label class="slim"><input type="radio" name="type" value="chat" checked=${session.value.type === "chat"} oninput=${updateSession} /> Chat</label>
770 <label class="slim"><input type="radio" name="type" value="completion" checked=${session.value.type === "completion"} oninput=${updateSession} /> Completion</label>
771 </div>
772 </fieldset>
773
774 ${session.value.type === 'chat' ? ChatConfigForm() : CompletionConfigForm()}
775
776 <fieldset class="two">
777 ${IntField({ label: "Predictions", max: 2048, min: -1, name: "n_predict", value: params.value.n_predict })}
778 ${FloatField({ label: "Temperature", max: 2.0, min: 0.0, name: "temperature", step: 0.01, value: params.value.temperature })}
779 ${FloatField({ label: "Penalize repeat sequence", max: 2.0, min: 0.0, name: "repeat_penalty", step: 0.01, value: params.value.repeat_penalty })}
780 ${IntField({ label: "Consider N tokens for penalize", max: 2048, min: 0, name: "repeat_last_n", value: params.value.repeat_last_n })}
781 ${IntField({ label: "Top-K sampling", max: 100, min: -1, name: "top_k", value: params.value.top_k })}
782 ${FloatField({ label: "Top-P sampling", max: 1.0, min: 0.0, name: "top_p", step: 0.01, value: params.value.top_p })}
783 ${FloatField({ label: "Min-P sampling", max: 1.0, min: 0.0, name: "min_p", step: 0.01, value: params.value.min_p })}
784 </fieldset>
785 <details>
786 <summary>More options</summary>
787 <fieldset class="two">
788 ${FloatField({ label: "Typical P", max: 1.0, min: 0.0, name: "typical_p", step: 0.01, value: params.value.typical_p })}
789 ${FloatField({ label: "Presence penalty", max: 1.0, min: 0.0, name: "presence_penalty", step: 0.01, value: params.value.presence_penalty })}
790 ${FloatField({ label: "Frequency penalty", max: 1.0, min: 0.0, name: "frequency_penalty", step: 0.01, value: params.value.frequency_penalty })}
791 </fieldset>
792 <hr />
793 <fieldset class="three">
794 <div>
795 <label><input type="radio" name="mirostat" value="0" checked=${params.value.mirostat == 0} oninput=${updateParamsInt} /> no Mirostat</label>
796 <label><input type="radio" name="mirostat" value="1" checked=${params.value.mirostat == 1} oninput=${updateParamsInt} /> Mirostat v1</label>
797 <label><input type="radio" name="mirostat" value="2" checked=${params.value.mirostat == 2} oninput=${updateParamsInt} /> Mirostat v2</label>
798 </div>
799 ${FloatField({ label: "Mirostat tau", max: 10.0, min: 0.0, name: "mirostat_tau", step: 0.01, value: params.value.mirostat_tau })}
800 ${FloatField({ label: "Mirostat eta", max: 1.0, min: 0.0, name: "mirostat_eta", step: 0.01, value: params.value.mirostat_eta })}
801 </fieldset>
802 <fieldset>
803 ${IntField({ label: "Show Probabilities", max: 10, min: 0, name: "n_probs", value: params.value.n_probs })}
804 </fieldset>
805 <fieldset>
806 ${IntField({ label: "Min Probabilities from each Sampler", max: 10, min: 0, name: "min_keep", value: params.value.min_keep })}
807 </fieldset>
808 <fieldset>
809 <label for="api_key">API Key</label>
810 <input type="text" name="api_key" value="${params.value.api_key}" placeholder="Enter API key" oninput=${updateParams} />
811 </fieldset>
812 </details>
813 </form>
814 `
815 }
816
817 const probColor = (p) => {
818 const r = Math.floor(192 * (1 - p));
819 const g = Math.floor(192 * p);
820 return `rgba(${r},${g},0,0.3)`;
821 }
822
823 const Probabilities = (params) => {
824 return params.data.map(msg => {
825 const { completion_probabilities } = msg;
826 if (
827 !completion_probabilities ||
828 completion_probabilities.length === 0
829 ) return msg.content
830
831 if (completion_probabilities.length > 1) {
832 // Not for byte pair
833 if (completion_probabilities[0].content.startsWith('byte: \\')) return msg.content
834
835 const splitData = completion_probabilities.map(prob => ({
836 content: prob.content,
837 completion_probabilities: [prob]
838 }))
839 return html`<${Probabilities} data=${splitData} />`
840 }
841
842 const { probs, content } = completion_probabilities[0]
843 const found = probs.find(p => p.tok_str === msg.content)
844 const pColor = found ? probColor(found.prob) : 'transparent'
845
846 const popoverChildren = html`
847 <div class="prob-set">
848 ${probs.map((p, index) => {
849 return html`
850 <div
851 key=${index}
852 title=${`prob: ${p.prob}`}
853 style=${{
854 padding: '0.3em',
855 backgroundColor: p.tok_str === content ? probColor(p.prob) : 'transparent'
856 }}
857 >
858 <span>${p.tok_str}: </span>
859 <span>${Math.floor(p.prob * 100)}%</span>
860 </div>
861 `
862 })}
863 </div>
864 `
865
866 return html`
867 <${Popover} style=${{ backgroundColor: pColor }} popoverChildren=${popoverChildren}>
868 ${msg.content.match(/\n/gim) ? html`<br />` : msg.content}
869 </>
870 `
871 });
872 }
873
874 // poor mans markdown replacement
875 const Markdownish = (params) => {
876 const md = params.text
877 .replace(/&/g, '&')
878 .replace(/</g, '<')
879 .replace(/>/g, '>')
880 .replace(/^#{1,6} (.*)$/gim, '<h3>$1</h3>')
881 .replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>')
882 .replace(/__(.*?)__/g, '<strong>$1</strong>')
883 .replace(/\*(.*?)\*/g, '<em>$1</em>')
884 .replace(/_(.*?)_/g, '<em>$1</em>')
885 .replace(/```.*?\n([\s\S]*?)```/g, '<pre><code>$1</code></pre>')
886 .replace(/`(.*?)`/g, '<code>$1</code>')
887 .replace(/\n/gim, '<br />');
888 return html`<span dangerouslySetInnerHTML=${{ __html: md }} />`;
889 };
890
891 const ModelGenerationInfo = (params) => {
892 if (!llamaStats.value) {
893 return html`<span/>`
894 }
895 return html`
896 <span>
897 ${llamaStats.value.tokens_predicted} predicted, ${llamaStats.value.tokens_cached} cached, ${llamaStats.value.timings.predicted_per_token_ms.toFixed()}ms per token, ${llamaStats.value.timings.predicted_per_second.toFixed(2)} tokens per second
898 </span>
899 `
900 }
901
902 // simple popover impl
903 const Popover = (props) => {
904 const isOpen = useSignal(false);
905 const position = useSignal({ top: '0px', left: '0px' });
906 const buttonRef = useRef(null);
907 const popoverRef = useRef(null);
908
909 const togglePopover = () => {
910 if (buttonRef.current) {
911 const rect = buttonRef.current.getBoundingClientRect();
912 position.value = {
913 top: `${rect.bottom + window.scrollY}px`,
914 left: `${rect.left + window.scrollX}px`,
915 };
916 }
917 isOpen.value = !isOpen.value;
918 };
919
920 const handleClickOutside = (event) => {
921 if (popoverRef.current && !popoverRef.current.contains(event.target) && !buttonRef.current.contains(event.target)) {
922 isOpen.value = false;
923 }
924 };
925
926 useEffect(() => {
927 document.addEventListener('mousedown', handleClickOutside);
928 return () => {
929 document.removeEventListener('mousedown', handleClickOutside);
930 };
931 }, []);
932
933 return html`
934 <span style=${props.style} ref=${buttonRef} onClick=${togglePopover}>${props.children}</span>
935 ${isOpen.value && html`
936 <${Portal} into="#portal">
937 <div
938 ref=${popoverRef}
939 class="popover-content"
940 style=${{
941 top: position.value.top,
942 left: position.value.left,
943 }}
944 >
945 ${props.popoverChildren}
946 </div>
947 </${Portal}>
948 `}
949 `;
950 };
951
952 // Source: preact-portal (https://github.com/developit/preact-portal/blob/master/src/preact-portal.js)
953 /** Redirect rendering of descendants into the given CSS selector */
954 class Portal extends Component {
955 componentDidUpdate(props) {
956 for (let i in props) {
957 if (props[i] !== this.props[i]) {
958 return setTimeout(this.renderLayer);
959 }
960 }
961 }
962
963 componentDidMount() {
964 this.isMounted = true;
965 this.renderLayer = this.renderLayer.bind(this);
966 this.renderLayer();
967 }
968
969 componentWillUnmount() {
970 this.renderLayer(false);
971 this.isMounted = false;
972 if (this.remote && this.remote.parentNode) this.remote.parentNode.removeChild(this.remote);
973 }
974
975 findNode(node) {
976 return typeof node === 'string' ? document.querySelector(node) : node;
977 }
978
979 renderLayer(show = true) {
980 if (!this.isMounted) return;
981
982 // clean up old node if moving bases:
983 if (this.props.into !== this.intoPointer) {
984 this.intoPointer = this.props.into;
985 if (this.into && this.remote) {
986 this.remote = render(html`<${PortalProxy} />`, this.into, this.remote);
987 }
988 this.into = this.findNode(this.props.into);
989 }
990
991 this.remote = render(html`
992 <${PortalProxy} context=${this.context}>
993 ${show && this.props.children || null}
994 </${PortalProxy}>
995 `, this.into, this.remote);
996 }
997
998 render() {
999 return null;
1000 }
1001 }
1002 // high-order component that renders its first child if it exists.
1003 // used as a conditional rendering proxy.
1004 class PortalProxy extends Component {
1005 getChildContext() {
1006 return this.props.context;
1007 }
1008 render({ children }) {
1009 return children || null;
1010 }
1011 }
1012
1013 function App(props) {
1014 useEffect(() => {
1015 const query = new URLSearchParams(location.search).get("q");
1016 if (query) chat(query);
1017 }, []);
1018
1019 return html`
1020 <div class="mode-${session.value.type}">
1021 <header>
1022 <h1>llama.cpp</h1>
1023 </header>
1024
1025 <section id="write">
1026 <${session.value.type === 'chat' ? MessageInput : CompletionControls} />
1027 </section>
1028
1029 <main id="content">
1030 <${chatStarted.value ? ChatLog : ConfigForm} />
1031 </main>
1032
1033 <footer>
1034 <p><${ModelGenerationInfo} /></p>
1035 <p>Powered by <a href="https://github.com/ggml-org/llama.cpp">llama.cpp</a> and <a href="https://ggml.ai">ggml.ai</a>.</p>
1036 </footer>
1037 </div>
1038 `;
1039 }
1040
1041 render(h(App), document.querySelector('#container'));
1042 </script>
1043</head>
1044
1045<body>
1046 <div id="container">
1047 <input type="file" id="fileInput" accept="image/*" style="display: none;">
1048 </div>
1049 <div id="portal"></div>
1050</body>
1051
1052</html>